use warnings; use strict; use HTML::TreeBuilder; my $html = <<'HTML'; text1.0 text1.1
text1.2
text2.0 text2.1
text2.2
text3.0 text3.1
text3.2 text3.3
text2.3 text1.3 text4.0 text4.1
text4.2 text4.3
text1.4 HTML my $tree = HTML::TreeBuilder->new_from_content ($html); for my $elt ($tree->look_down ('_tag', 'a')) { print "A " . $elt->attr ('href') . "\n\tTEXT: '"; my @text_segs; for my $child ($elt->content_list ()) { next if ref $child and $child->{_tag} ne 'a'; last if ref $child; push @text_segs, $child; } print "$_ " for @text_segs; print "\n"; } #### A URL1 TEXT: ' text1.0 text1.1 text1.2 A URL2 TEXT: ' text2.0 text2.1 text2.2 A URL3 TEXT: ' text3.0 text3.1 text3.2 text3.3 A URL4 TEXT: ' text4.0 text4.1 text4.2 text4.3