use warnings; use strict; use Mojo::DOM; use Mojo::Util qw/trim/; # this sub should really be in its own package for modularity sub get_data { my $html = shift; my %data; my $dom = Mojo::DOM->new($html); my $addr = $dom->find('.address address'); # could add some conditionals here # in case there are separate fields for street / city / zip etc. die "Didn't find exactly one address" unless @$addr==1; $addr = $addr->first; $addr->find('br')->map('replace',"\n"); $data{address} = { Address => trim( $addr->text ) }; my $phone = $dom->find('.phone p'); die "Didn't find an even number of elements in phone" if @$phone%2; while (@$phone) { my $key = trim( shift(@$phone)->text ); die "Duplicate key '$key' in phone data" if exists $data{phone}{$key}; $data{phone}{$key} = trim( shift(@$phone)->text ); } return \%data; } use Test::More; is_deeply get_data(<<'HTML'),
Sample Street 123
45678 Randomcity

Telephone

0123-4 56 78 90

Telefax

HTML { address => { Address => "Sample Street 123\n45678 Randomcity" }, phone => { Telephone => "0123-4 56 78 90", Telefax => "" }, }; # TODO: many more test cases here done_testing;