#!/usr/bin/perl use strict; use warnings; use Data::Dumper; use HTML::TokeParser::Simple; my $html; { local $/; $html = } my $p = HTML::TokeParser::Simple->new(\$html); $p->unbroken_text(1); my ($in_li, @record, @db); while (my $t = $p->get_token){ $in_li++, next if $t->is_start_tag('li'); next unless $in_li; if ($t->is_end_tag('li')){ push @db, [@record]; $in_li = 0; next; } if ($t->is_start_tag('a')){ push @record, $t->get_attr('href'); my $text = $p->get_trimmed_text('/a'); push @record, $text; } } #die Dumper \@db; # the text inside the first link's text, the 2nd link's URL, the 2nd link's text. for my $record (@db){ my @field = @{$record}; print $field[1], "::", $field[2], "::", $field[3], "\n"; } __DATA__
  • some words here - "some words here"