#!/bin/perl5 use strict; use warnings; use HTML::TokeParser; my $file = 'map2004.html'; my $tp = HTML::TokeParser->new($file) or die "Couldn't read html file: $!"; # start tag, attrib, value my ($s_tag, $s_attrb, $s_value) = qw(div class menu); # end tag my ($e_tag) = 'h6'; my $max = 20; my $count; my $start; # flag # typo fixed my %data; # hash to hold output while ( my $tag = $tp->get_token ) { next if $tag->[0] eq 'S' and $tag->[1] eq $s_tag and exists $tag->[2]->{$s_attrb} and $tag->[2]->{$s_attrb} eq $s_value and ++$start; next unless $start; last if $tag->[0] eq 'S' and $tag->[1] eq $e_tag; if ( $tag->[0] eq 'S' and $tag->[1] eq 'a' and exists $tag->[2]->{href} ){ my $href = $tag->[2]->{href}; my $link_text = $tp->get_trimmed_text('/a'); $data{$href} = $link_text; $count++; last if $count == $max; } } for my $key (sort keys %data){ print "$key -> $data{$key}\n"; } # ["S", $tag, $attr, $attrseq, $text] # ["E", $tag, $text] # ["T", $text, $is_data] # ["C", $text] # ["D", $text] # ["PI", $token0, $text]