use warnings; use strict; my $word = qr/(?) { while ($line =~ /($word('$word)?)/g){ $count{$1}++; } } for (sort {$count{$b} <=> $count{$a} || lc $a cmp lc $b } keys %count) { printf "%15s %5d\n", $_, $count{$_}; last if ++$counter > 100; }; __DATA__ "Hello World!" "Oh poor Yorick, his world I knew well yes I did" "don't won't, can't shouldn't, you'll, it's, etc." "Señor Montóya's resüme isn't ápropos." the, the, the, the, the, the, the, the, the, the