use HTML::TreeBuilder; my $tree = HTML::TreeBuilder->new(); $tree->parse($html); $tree->eof(); my %words = (); foreach my $word ($tree->as_text() =~ m/(\b\w+\'?\w+)/g) { $words{$word} += 1; } my @word_pos; my $key; foreach $key (keys %words) { pos($html) = 0; while ($html =~ />[^<]*?(\b$key\b).*?[<\$]/gis) { push @word_pos, [$key, $-[1]]; } }