use HTML::Parser;
use Data::Dumper;
my $search_term = qr/\b something here \b/ix;
my @tags_to_search = qw[ title h1 h2 h3 h4 h5 h6 a li p pre img ];
my @attributes_to_search = qw[ alt href ];
my %seen;
my $last_seen_tag;
sub start {
my ($tagname, $attr, $text) = @_;
$last_seen_tag = $tagname;
for (@attributes_to_search) {
$seen{$_} += $attr->{$_} =~ m/$search_term/g
if $attr->{$_};
}
}
sub text {
my $text = shift;
$seen{$last_seen_tag} += $text =~ m/$search_term/g;
}
my $p = HTML::Parser->new( api_version => 3,
start_h => [\&start, "tagname, attr"],
text_h => [\&text, "text"],
unbroken_text => 1,
report_tags => \@tags_to_search
);
$p->parse_file("foo.html");
print Dumper \%seen;