481Finding 883 words (of 5038) took 0.039814 seconds using a hash 482Finding 784 words took 0.027246 seconds using a trie(via regex engine) ##
##

 use strict;
use Data::Dump qw[ pp ];
use Time::HiRes qw[ time ];

chomp( my @words = do{ local @ARGV = 'en-US.dic'; <> } );

${^RE_TRIE_MAXBUF}=2**16;
$|=1;


my %lexicon;

my $limit=10000;
for (@words) {
  s/\/.*$//;
#  next if length($_)<3;
  last unless $limit--;
  $lexicon{ $_ } = 'suplementary data';
}

#print join "\t", grep {length() <3 } keys %lexicon ;exit;


my $re = ' (' . join( '|', sort{ length( $b ) <=> length( $a ) } keys %lexicon ) . ') ';
my $cre = qr/$re/;

#print $re; exit;

open my $infile, '<', $ARGV[ 0 ] or die $!;

my @matches1;
my $start1 = time;
seek $infile, 0, 0;
my( $words, $found1 ) = ( 0, 0 );
while( <$infile> ) {
    printf "\r$.\t";
    tr[a-zA-Z][ ]cs;					    # 
    tr[A-Z][a-z];
    for my $word ( split ) {
        ++$words;
        if (exists $lexicon{ $word }) {
	  $found1++;
	  push @matches1,$word;
	}
	
    }
}
my $end1 = time;

printf "Finding $found1 words (of $words) took %f seconds using a hash\n", $end1 - $start1;


my $start2 = time;
seek $infile, 0, 0; $. = 1;
my $found2 = 0;
my $text="";
while( <$infile> ) {
    printf "\r$.\t";
    tr[a-zA-Z][ ]cs;
    tr[A-Z][a-z];
#    ++$found2 while m[$cre]g;
    $text.=$_." ";
  }

my @matches2 = $text =~ /$cre/g;
$found2=scalar @matches2;
my $end2 = time;

printf "Finding $found2 words took %f seconds using a trie(via regex engine)\n", $end2 - $start2;

my %matches;
@matches{@matches1}=();
print scalar keys %matches;
delete @matches{@matches2};
print "missing matches:\n";

pp \%matches;
#print $text;