481Finding 883 words (of 5038) took 0.039814 seconds using a hash 482Finding 784 words took 0.027246 seconds using a trie(via regex engine) #### use strict; use Data::Dump qw[ pp ]; use Time::HiRes qw[ time ]; chomp( my @words = do{ local @ARGV = 'en-US.dic'; <> } ); ${^RE_TRIE_MAXBUF}=2**16; $|=1; my %lexicon; my $limit=10000; for (@words) { s/\/.*$//; # next if length($_)<3; last unless $limit--; $lexicon{ $_ } = 'suplementary data'; } #print join "\t", grep {length() <3 } keys %lexicon ;exit; my $re = ' (' . join( '|', sort{ length( $b ) <=> length( $a ) } keys %lexicon ) . ') '; my $cre = qr/$re/; #print $re; exit; open my $infile, '<', $ARGV[ 0 ] or die $!; my @matches1; my $start1 = time; seek $infile, 0, 0; my( $words, $found1 ) = ( 0, 0 ); while( <$infile> ) { printf "\r$.\t"; tr[a-zA-Z][ ]cs; # tr[A-Z][a-z]; for my $word ( split ) { ++$words; if (exists $lexicon{ $word }) { $found1++; push @matches1,$word; } } } my $end1 = time; printf "Finding $found1 words (of $words) took %f seconds using a hash\n", $end1 - $start1; my $start2 = time; seek $infile, 0, 0; $. = 1; my $found2 = 0; my $text=""; while( <$infile> ) { printf "\r$.\t"; tr[a-zA-Z][ ]cs; tr[A-Z][a-z]; # ++$found2 while m[$cre]g; $text.=$_." "; } my @matches2 = $text =~ /$cre/g; $found2=scalar @matches2; my $end2 = time; printf "Finding $found2 words took %f seconds using a trie(via regex engine)\n", $end2 - $start2; my %matches; @matches{@matches1}=(); print scalar keys %matches; delete @matches{@matches2}; print "missing matches:\n"; pp \%matches; #print $text;