use strict; use HTML::TokeParser; use Data::Dumper; my %tokens; my %tokencount; my $p = HTML::TokeParser->new("test.html") or die "Can't open: $!"; while (my $token = $p->get_token) { if ( $token->[0] eq "S" ) { $tokens{$token->[1]}++ unless $token->[1] =~ /meta/i; } elsif ( $token->[0] eq "E" ) { $tokens{$token->[1]}--; } elsif ( $token->[0] eq "T" ) { my @words = ( $token->[1] =~ /\b(\w+)/g ); for ( keys %tokens ) { $tokencount{$_} += @words if $tokens{$_} > 0; } } } print Dumper (\%tokencount); #### Stuff

I like potatoes!

Me not!

##
## $VAR1 = { 'h1' => 2, 'body' => 5, 'head' => 1, 'html' => 6, 'title' => 1, 'h2' => 3 };