use warnings;
use strict;
use HTML::TreeBuilder;
my $html = <<'HTML';
text1.0
text1.1
text1.2
text2.0
text2.1
text2.2
text3.0
text3.1
text3.2 text3.3
text2.3
text1.3
text4.0
text4.1
text4.2 text4.3
text1.4
HTML
my $tree = HTML::TreeBuilder->new_from_content ($html);
for my $elt ($tree->look_down ('_tag', 'a')) {
print "A " . $elt->attr ('href') . "\n\tTEXT: '";
my @text_segs;
for my $child ($elt->content_list ()) {
next if ref $child and $child->{_tag} ne 'a';
last if ref $child;
push @text_segs, $child;
}
print "$_ " for @text_segs;
print "\n";
}
####
A URL1
TEXT: ' text1.0 text1.1 text1.2
A URL2
TEXT: ' text2.0 text2.1 text2.2
A URL3
TEXT: ' text3.0 text3.1 text3.2 text3.3
A URL4
TEXT: ' text4.0 text4.1 text4.2 text4.3