use HTML::TreeBuilder;
my $tree = HTML::TreeBuilder->new();
$tree->parse($html);
$tree->eof();
my %words = ();
foreach my $word ($tree->as_text() =~ m/(\b\w+\'?\w+)/g) {
$words{$word} += 1;
}
my @word_pos;
my $key;
foreach $key (keys %words) {
pos($html) = 0;
while ($html =~ />[^<]*?(\b$key\b).*?[<\$]/gis) {
push @word_pos, [$key, $-[1]];
}
}