$_=join('',); s/\s+/ /g; # clean all whitespace s/<[^>]*>//g; # clean all HTML-Like tags s/[^a-z]/ /gi; # Remove all but letters grep ++$count{$_} && undef, split;