c:\@Work\Perl\monks>perl -wMstrict -MData::Dump -le "my @positive = qw(nation conceived liberty created equal foo); my @negative = qw(fourscore SEVEN fOrTh fathers continent bar); ;; my $sentence = 'FoUrScOrE and seven years ago ' . 'our fathers brought forth, on this continent, ' . 'a new nation, conceived in liberty, and dedicated ' . 'to the proposition that all men are created equal. ' . 'Repeat seven nation fathers nation.' ; ;; my %pos = map { lc($_) => 0 } @positive; my $rx_pos = make_regex(\%pos); print 'for debug: positive rx: ', $rx_pos; ;; my %neg = map { lc($_) => 0 } @negative; my $rx_neg = make_regex(\%neg); print 'for debug: negative rx: ', $rx_neg; ;; my %other; my $rx_undefined = qr{ (?! $rx_pos | $rx_neg) }xms; my $rx_word = qr{ \b [[:alpha:]]+ \b }xms; ;; ++$pos { lc $_ } for $sentence =~ m{ $rx_pos }xmsg; ++$neg { lc $_ } for $sentence =~ m{ $rx_neg }xmsg; ++$other{ lc $_ } for $sentence =~ m{ $rx_undefined $rx_word }xmsg; ;; dd \%pos; dd \%neg; dd \%other; ;; ;; sub make_regex { my ($hr_wordlist) = @_; ;; my ($rx) = map qr{ (?i) \b (?: $_) \b }xms, join '|', map quotemeta, reverse sort keys %$hr_wordlist ; ;; return $rx; } " for debug: positive rx: (?msx-i: (?i) \b (?: nation|liberty|foo|equal|created|conceived) \b ) for debug: negative rx: (?msx-i: (?i) \b (?: seven|fourscore|forth|fathers|continent|bar) \b ) { conceived => 1, created => 1, equal => 1, foo => 0, liberty => 1, nation => 3 } { bar => 0, continent => 1, fathers => 2, forth => 1, fourscore => 1, seven => 2 } { a => 1, ago => 1, all => 1, "and" => 2, are => 1, brought => 1, dedicated => 1, in => 1, men => 1, new => 1, on => 1, our => 1, proposition => 1, repeat => 1, that => 1, the => 1, this => 1, to => 1, years => 1, }