So I've gotten silly drawing all of the above together. =)
#!/usr/bin/perl
use strict; use warnings; # 75O0K8S - Benchmark all the Trigram-finder
+s!
use Benchmark qw(cmpthese ); # with liberty taken t
+o
use Text::Ngram qw(ngram_counts add_to_counts); # reformat aggressivel
+y
my @strs = qw(computer CheesyNachoz JustAnotherPerlHacker);my $st
+r;
push(@strs, 'Just Another Perl Hacker'); # add a test string w/ spaces
+ too
my %cnms = ( # CodeRefName => PerlMonkAuthor (in post order)
'whileZ' => 'Zaxo' ,
'recurseO' => 'otto' ,
# not sure how to do magic blazar goto Tail-Recursion
'whileM' => 'Marknel' ,
'mapB' => 'blazar' ,
'hngramG' => 'graff' ,
'substrGF' => 'GrandFather' ,
'unpackLG' => 'Limbic~Region && GrandFather' ,
'regexGF' => 'GrandFather' ,
'mapstrS' => 'Skeeve' ,
'substrS' => 'Skeeve' ,
'regexS' => 'Skeeve' ,
'counmapA' => 'andreas1234567',
'mapstrS2' => 'Skeeve' ,
'whileAB' => 'Anonymous Monk && blazar' ,
'hmapB' => 'blazar' ,
'regexS2' => 'Skeeve' ,
'whileP' => 'Pip' ,
'regexP' => 'Pip' ,
); my $crfs; $crfs->{$_} = \&{"do_$_"} for(keys(%cnms));
for my $strn (@strs) { $str = $strn; # assign global $str for all subs
for my $cnam (sort { $a cmp $b } keys(%{$crfs})) {
printf("%-8s:%s\n", $cnam, join ' ', $crfs->{$cnam}->());
} cmpthese(-1, $crfs);
}
sub do_whileZ { local $_ = $str; my @tris;
pos() -= 2, push @tris, $1 while /(...)/g; return @tri
+s; }
sub recurse { my($cnt, $ra_out, $ra_val, $ra_in)= @_;
if( $#$ra_val != $cnt) { # working array not full
if($#$ra_in > -1) { # fill it up
push(@$ra_val, shift(@$ra_in));
recurse($cnt, $ra_out, $ra_val, $ra_in);
} else { return; } # done with list
} else {
# add to output array, joined ltr string, rip off first ltr
push(@$ra_out, join('',@$ra_val)); shift(@$ra_val);
# add new ltr, if any remaining
push(@$ra_val, shift(@$ra_in)) if($#$ra_in > -1);
recurse($cnt, $ra_out, $ra_val, $ra_in);
}
} # input letter array, output trigram string array, && values workspa
+ce
sub do_recurseO { my @in = split('', $str); my @tris; my @val;
recurse( 2, \@tris, \@val, \@in ); return @tri
+s; }
sub do_whileM { my $strlen = length($str); my $loop_num = $strlen-2;
my @tris; my $ndx = 0; while($ndx < $loop_num){
my $tri = substr $str,$ndx,3; push @tris,$tri;$ndx++; }return @tri
+s; }
sub do_mapB { my @tris = map { substr $str, $_, 3 } 0..length($str
+)-3;
return @tri
+s; }
sub do_hngramG { my $href = ngram_counts($str, 3);
return sort { $a cmp $b } keys %{$hre
+f};}
sub do_substrGF { my @tris;
push @tris, substr $str, $_, 3 for 0..length($str)-3; return @tri
+s; }
# HTTP://Perl.Com/doc/manual/html/pod/perlfunc/unpack.html
sub do_unpackLG { my $mats = length($str)-2; my $tmpl = 'a3XX' x $mat
+s;
my @tris = unpack($tmpl, $str); return @tri
+s; }
sub do_regexGF { my @tris = $str =~ /(?=(...))/g; return @tri
+s; }
sub do_mapstrS { return map substr($str,$_-3,3),(3..length $str
+); }
sub do_substrS { my @tris;
push @tris, substr $str, $_, 3 for 0..length($str)-3; return @tri
+s; }
sub do_regexS { return $str =~ /(?=(...))/
+g; }
sub do_counmapA { return map { (length($_)-2) } split('\s+', $str);
+ }
sub do_mapstrS2 { my $len = 3; my @tris = map substr($str,$_-$len,$len
+),
($len..length $str); return @tri
+s; }
sub do_whileAB { my @tris; my $stri = $str; while($stri) {
$stri =~ /(...)/ and push @tris, $1; $stri =~ s/^.//; }return @tri
+s; }
sub do_hmapB { my %saw; my @tris = map { my $s = substr $str, $_, 3
+;
$saw{$s}++ ? () : $s } 0..length($str)-3;return sort{$a cmp$b}keys%s
+aw;}
sub do_regexS2 { $_ = $str; s/(.)(.)(.)(?=(.)(.))/$1$2$3$2$3$4$3$4$5/
+g;
return /(...)/
+g; }
sub do_whileP { my @tris; $_ = $str; # s/// is slow!
push @tris, $1 while(s/^(.(..))/$2/); return @tri
+s; }
sub do_regexP { $_ = $str; s/((?:.)((?:.)(.)))(?=(.)(.))/$1$2$4$3$4$
+5/g;
return /(...)/
+g; }
Results:
counmapA:6
hmapB :com mpu omp put ter ute
hngramG :com mpu omp put ter ute
mapB :com omp mpu put ute ter
mapstrS :com omp mpu put ute ter
mapstrS2:com omp mpu put ute ter
recurseO:com omp mpu put ute ter
regexGF :com omp mpu put ute ter
regexP :com omp mpu put ute ter
regexS :com omp mpu put ute ter
regexS2 :com omp mpu put ute ter
substrGF:com omp mpu put ute ter
substrS :com omp mpu put ute ter
unpackLG:com omp mpu put ute ter
whileAB :com omp mpu put ute ter
whileM :com omp mpu put ute ter
whileP :com omp mpu put ute ter
whileZ :com omp mpu put ute ter
Rate recurseO hngramG whileP hmapB whileAB whileZ regexS2
+ regexP whileM mapstrS2 regexGF mapB substrGF substrS unpackLG mapstr
+S counmapA regexS
recurseO 14490/s -- -48% -57% -63% -68% -70% -74%
+ -80% -82% -83% -83% -84% -86% -87% -87% -92
+% -97% -98%
hngramG 27837/s 92% -- -18% -29% -39% -43% -51%
+ -61% -66% -67% -67% -70% -74% -75% -75% -84
+% -95% -97%
whileP 33810/s 133% 21% -- -14% -26% -31% -40%
+ -52% -58% -60% -60% -64% -68% -69% -70% -81
+% -94% -96%
hmapB 39456/s 172% 42% 17% -- -14% -19% -31%
+ -44% -51% -53% -53% -58% -63% -64% -65% -78
+% -93% -95%
whileAB 45948/s 217% 65% 36% 16% -- -6% -19%
+ -35% -43% -45% -45% -51% -57% -58% -59% -74
+% -92% -94%
whileZ 48651/s 236% 75% 44% 23% 6% -- -14%
+ -32% -40% -42% -42% -48% -55% -56% -57% -73
+% -91% -94%
regexS2 56775/s 292% 104% 68% 44% 24% 17% --
+ -20% -30% -32% -32% -39% -47% -48% -50% -68
+% -90% -93%
regexP 71087/s 391% 155% 110% 80% 55% 46% 25%
+ -- -12% -15% -15% -24% -34% -35% -37% -60
+% -87% -91%
whileM 80736/s 457% 190% 139% 105% 76% 66% 42%
+ 14% -- -3% -4% -13% -25% -26% -28% -55
+% -85% -90%
mapstrS2 83510/s 476% 200% 147% 112% 82% 72% 47%
+ 17% 3% -- -1% -10% -22% -24% -26% -53
+% -85% -90%
regexGF 84020/s 480% 202% 149% 113% 83% 73% 48%
+ 18% 4% 1% -- -10% -22% -23% -25% -53
+% -85% -90%
mapB 92980/s 542% 234% 175% 136% 102% 91% 64%
+ 31% 15% 11% 11% -- -13% -15% -18% -48
+% -83% -89%
substrGF 107184/s 640% 285% 217% 172% 133% 120% 89%
+ 51% 33% 28% 28% 15% -- -2% -5% -40
+% -81% -87%
substrS 109713/s 657% 294% 224% 178% 139% 126% 93%
+ 54% 36% 31% 31% 18% 2% -- -3% -38
+% -80% -86%
unpackLG 112733/s 678% 305% 233% 186% 145% 132% 99%
+ 59% 40% 35% 34% 21% 5% 3% -- -37
+% -80% -86%
mapstrS 177535/s 1125% 538% 425% 350% 286% 265% 213%
+ 150% 120% 113% 111% 91% 66% 62% 57% -
+- -68% -78%
counmapA 550801/s 3701% 1879% 1529% 1296% 1099% 1032% 870%
+ 675% 582% 560% 556% 492% 414% 402% 389% 210
+% -- -32%
regexS 810891/s 5496% 2813% 2298% 1955% 1665% 1567% 1328%
+ 1041% 904% 871% 865% 772% 657% 639% 619% 357
+% 47% --
counmapA:10
hmapB :Che Nac ach cho ees esy hee hoz syN yNa
hngramG :ach che cho ees esy hee hoz nac syn yna
mapB :Che hee ees esy syN yNa Nac ach cho hoz
mapstrS :Che hee ees esy syN yNa Nac ach cho hoz
mapstrS2:Che hee ees esy syN yNa Nac ach cho hoz
recurseO:Che hee ees esy syN yNa Nac ach cho hoz
regexGF :Che hee ees esy syN yNa Nac ach cho hoz
regexP :Che hee ees esy syN yNa Nac ach cho hoz
regexS :Che hee ees esy syN yNa Nac ach cho hoz
regexS2 :Che hee ees esy syN yNa Nac ach cho hoz
substrGF:Che hee ees esy syN yNa Nac ach cho hoz
substrS :Che hee ees esy syN yNa Nac ach cho hoz
unpackLG:Che hee ees esy syN yNa Nac ach cho hoz
whileAB :Che hee ees esy syN yNa Nac ach cho hoz
whileM :Che hee ees esy syN yNa Nac ach cho hoz
whileP :Che hee ees esy syN yNa Nac ach cho hoz
whileZ :Che hee ees esy syN yNa Nac ach cho hoz
Rate recurseO whileP hngramG hmapB whileAB whileZ regexS2
+ regexP whileM regexGF mapstrS2 mapB substrS substrGF unpackLG mapst
+rS counmapA regexS
recurseO 9955/s -- -51% -55% -60% -67% -68% -75%
+ -81% -81% -82% -82% -83% -86% -87% -87% -9
+2% -98% -99%
whileP 20287/s 104% -- -9% -19% -33% -34% -50%
+ -61% -61% -63% -63% -66% -72% -72% -74% -8
+3% -96% -97%
hngramG 22330/s 124% 10% -- -11% -26% -28% -44%
+ -57% -57% -59% -60% -63% -69% -70% -71% -8
+1% -96% -97%
hmapB 25121/s 152% 24% 12% -- -17% -18% -38%
+ -51% -52% -54% -54% -58% -65% -66% -67% -7
+9% -96% -97%
whileAB 30117/s 203% 48% 35% 20% -- -2% -25%
+ -42% -42% -45% -45% -50% -58% -59% -61% -7
+5% -95% -96%
whileZ 30811/s 210% 52% 38% 23% 2% -- -23%
+ -40% -41% -44% -44% -49% -57% -58% -60% -7
+4% -95% -96%
regexS2 40193/s 304% 98% 80% 60% 33% 30% --
+ -22% -23% -26% -27% -33% -44% -46% -48% -6
+7% -93% -95%
regexP 51569/s 418% 154% 131% 105% 71% 67% 28%
+ -- -1% -6% -6% -14% -28% -30% -33% -5
+7% -91% -93%
whileM 52194/s 424% 157% 134% 108% 73% 69% 30%
+ 1% -- -4% -5% -13% -27% -29% -32% -5
+7% -91% -93%
regexGF 54613/s 449% 169% 145% 117% 81% 77% 36%
+ 6% 5% -- -1% -9% -24% -26% -29% -5
+5% -90% -93%
mapstrS2 55137/s 454% 172% 147% 119% 83% 79% 37%
+ 7% 6% 1% -- -8% -23% -25% -28% -5
+4% -90% -93%
mapB 60124/s 504% 196% 169% 139% 100% 95% 50%
+ 17% 15% 10% 9% -- -16% -18% -22% -5
+0% -89% -92%
substrS 71957/s 623% 255% 222% 186% 139% 134% 79%
+ 40% 38% 32% 31% 20% -- -2% -6% -4
+0% -87% -91%
substrGF 73770/s 641% 264% 230% 194% 145% 139% 84%
+ 43% 41% 35% 34% 23% 3% -- -4% -3
+9% -87% -91%
unpackLG 76663/s 670% 278% 243% 205% 155% 149% 91%
+ 49% 47% 40% 39% 28% 7% 4% -- -3
+6% -86% -90%
mapstrS 120470/s 1110% 494% 439% 380% 300% 291% 200%
+ 134% 131% 121% 118% 100% 67% 63% 57%
+-- -79% -85%
counmapA 562196/s 5548% 2671% 2418% 2138% 1767% 1725% 1299%
+ 990% 977% 929% 920% 835% 681% 662% 633% 36
+7% -- -28%
regexS 777546/s 7711% 3733% 3382% 2995% 2482% 2424% 1835%
+ 1408% 1390% 1324% 1310% 1193% 981% 954% 914% 54
+5% 38% --
counmapA:19
hmapB :Ano Hac Jus Per ack cke erP erl her ker lHa not oth rPe rlH s
+tA tAn the ust
hngramG :ack ano cke erl erp hac her jus ker lha not oth per rlh rpe s
+ta tan the ust
mapB :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
mapstrS :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
mapstrS2:Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
recurseO:Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
regexGF :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
regexP :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
regexS :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
regexS2 :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
substrGF:Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
substrS :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
unpackLG:Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
whileAB :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
whileM :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
whileP :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
whileZ :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H
+ac ack cke ker
Rate recurseO whileP hmapB hngramG whileAB whileZ regexS2
+ whileM regexP regexGF mapstrS2 mapB substrGF substrS unpackLG mapst
+rS counmapA regexS
recurseO 5910/s -- -44% -55% -61% -65% -66% -74%
+ -80% -80% -80% -81% -83% -86% -86% -87% -9
+1% -99% -99%
whileP 10645/s 80% -- -20% -29% -36% -38% -53%
+ -63% -64% -65% -65% -69% -74% -75% -76% -8
+4% -98% -99%
hmapB 13274/s 125% 25% -- -12% -21% -23% -41%
+ -54% -55% -56% -56% -61% -68% -69% -70% -8
+0% -98% -98%
hngramG 15058/s 155% 41% 13% -- -10% -13% -33%
+ -48% -50% -50% -50% -56% -63% -64% -66% -7
+7% -97% -98%
whileAB 16748/s 183% 57% 26% 11% -- -3% -26%
+ -42% -44% -44% -45% -51% -59% -60% -63% -7
+4% -97% -98%
whileZ 17230/s 192% 62% 30% 14% 3% -- -24%
+ -41% -42% -43% -43% -50% -58% -59% -62% -7
+4% -97% -98%
regexS2 22541/s 281% 112% 70% 50% 35% 31% --
+ -22% -24% -25% -26% -34% -45% -47% -50% -6
+5% -96% -97%
whileM 28980/s 390% 172% 118% 92% 73% 68% 29%
+ -- -3% -4% -5% -15% -29% -31% -35% -5
+6% -95% -96%
regexP 29824/s 405% 180% 125% 98% 78% 73% 32%
+ 3% -- -1% -2% -13% -27% -29% -33% -5
+4% -95% -96%
regexGF 30118/s 410% 183% 127% 100% 80% 75% 34%
+ 4% 1% -- -1% -12% -26% -29% -33% -5
+4% -95% -96%
mapstrS2 30416/s 415% 186% 129% 102% 82% 77% 35%
+ 5% 2% 1% -- -11% -26% -28% -32% -5
+3% -94% -96%
mapB 34132/s 478% 221% 157% 127% 104% 98% 51%
+ 18% 14% 13% 12% -- -17% -19% -24% -4
+8% -94% -96%
substrGF 40959/s 593% 285% 209% 172% 145% 138% 82%
+ 41% 37% 36% 35% 20% -- -3% -9% -3
+7% -93% -95%
substrS 42164/s 613% 296% 218% 180% 152% 145% 87%
+ 45% 41% 40% 39% 24% 3% -- -6% -3
+5% -92% -95%
unpackLG 44776/s 658% 321% 237% 197% 167% 160% 99%
+ 55% 50% 49% 47% 31% 9% 6% -- -3
+1% -92% -94%
mapstrS 65163/s 1003% 512% 391% 333% 289% 278% 189%
+ 125% 118% 116% 114% 91% 59% 55% 46%
+-- -88% -92%
counmapA 548746/s 9185% 5055% 4034% 3544% 3177% 3085% 2334%
+ 1794% 1740% 1722% 1704% 1508% 1240% 1201% 1126% 74
+2% -- -32%
regexS 803888/s 13502% 7452% 5956% 5239% 4700% 4566% 3466%
+ 2674% 2595% 2569% 2543% 2255% 1863% 1807% 1695% 113
+4% 46% --
counmapA:2 5 2 4
hmapB : An Ha Pe Ano Hac Jus Per ack cke er erl her ker l H not o
+th r P rl st t A the ust
hngramG : an ha pe ack ano cke er erl hac her jus ker l h not oth p
+er r p rl st t a the ust
mapB :Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
mapstrS :Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
mapstrS2:Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
recurseO:Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
regexGF :Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
regexP :Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
regexS :Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
regexS2 :Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
substrGF:Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
substrS :Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
unpackLG:Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
whileAB :Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
whileM :Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
whileP :Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
whileZ :Jus ust st t A An Ano not oth the her er r P Pe Per erl r
+l l H Ha Hac ack cke ker
Rate recurseO whileP hmapB hngramG whileAB whileZ regexS2
+ whileM regexP regexGF mapstrS2 mapB substrGF substrS unpackLG mapst
+rS counmapA regexS
recurseO 5218/s -- -44% -55% -61% -64% -65% -74%
+ -79% -80% -80% -80% -83% -85% -86% -86% -9
+1% -98% -99%
whileP 9309/s 78% -- -21% -31% -36% -38% -54%
+ -63% -64% -65% -65% -69% -74% -74% -76% -8
+4% -96% -99%
hmapB 11712/s 124% 26% -- -13% -20% -21% -42%
+ -53% -55% -55% -55% -61% -67% -68% -69% -7
+9% -95% -99%
hngramG 13524/s 159% 45% 15% -- -8% -9% -33%
+ -46% -48% -49% -49% -55% -62% -63% -65% -7
+6% -94% -98%
whileAB 14629/s 180% 57% 25% 8% -- -2% -27%
+ -42% -44% -44% -44% -52% -59% -60% -62% -7
+4% -94% -98%
whileZ 14913/s 186% 60% 27% 10% 2% -- -26%
+ -41% -43% -43% -43% -51% -58% -59% -61% -7
+4% -94% -98%
regexS2 20096/s 285% 116% 72% 49% 37% 35% --
+ -20% -23% -24% -24% -34% -44% -44% -48% -6
+5% -91% -98%
whileM 25121/s 381% 170% 114% 86% 72% 68% 25%
+ -- -4% -4% -4% -17% -30% -31% -34% -5
+6% -89% -97%
regexP 26065/s 399% 180% 123% 93% 78% 75% 30%
+ 4% -- -1% -1% -14% -27% -28% -32% -5
+4% -89% -97%
regexGF 26304/s 404% 183% 125% 95% 80% 76% 31%
+ 5% 1% -- -0% -13% -27% -27% -31% -5
+4% -89% -97%
mapstrS2 26305/s 404% 183% 125% 95% 80% 76% 31%
+ 5% 1% 0% -- -13% -27% -27% -31% -5
+4% -89% -97%
mapB 30340/s 481% 226% 159% 124% 107% 103% 51%
+ 21% 16% 15% 15% -- -15% -16% -21% -4
+7% -87% -96%
substrGF 35870/s 587% 285% 206% 165% 145% 141% 78%
+ 43% 38% 36% 36% 18% -- -1% -6% -3
+7% -85% -96%
substrS 36202/s 594% 289% 209% 168% 147% 143% 80%
+ 44% 39% 38% 38% 19% 1% -- -6% -3
+6% -84% -96%
unpackLG 38331/s 635% 312% 227% 183% 162% 157% 91%
+ 53% 47% 46% 46% 26% 7% 6% -- -3
+2% -83% -95%
mapstrS 56776/s 988% 510% 385% 320% 288% 281% 183%
+ 126% 118% 116% 116% 87% 58% 57% 48%
+-- -76% -93%
counmapA 231848/s 4343% 2391% 1880% 1614% 1485% 1455% 1054%
+ 823% 790% 781% 781% 664% 546% 540% 505% 30
+8% -- -73%
regexS 847418/s 16139% 9004% 7136% 6166% 5693% 5583% 4117%
+ 3273% 3151% 3122% 3122% 2693% 2262% 2241% 2111% 139
+3% 266% --
Whee! ;)
Updated to include Skeeve's newest regex silliness && one of my own due to such inspiration.