#!/usr/bin/env perl use warnings; use strict; use autodie; my @excluded_words = qw( a about although also an and another are as at be been before between but by can do during for from has how however in into is it many may more most etc ); my @excluded_chars = ( "'", ':', '@', '-', '~', ',', '.', '(', ')', '?', '*', '%', '/', '[', ']', '=', '"' ); my ($word_regex) = map {qr/$_/} join '|', map {"\\b".quotemeta."\\b"} sort { length $b <=> length $a or $a cmp $b } @excluded_words; my ($char_regex) = map {qr/$_/} join '|', map {quotemeta} sort { length $b <=> length $a or $a cmp $b } @excluded_chars; my %count; while (<>) { for (split) { $_ = lc; s/$char_regex//g; s/$word_regex//g; $count{$_}++; } } for my $word ( sort { $count{$a} <=> $count{$b} or $a cmp $b } keys %count ) { print "$count{$word} $word\n"; }