in reply to Re: Benchmarking "Are all these characters in this sentence?"
in thread Benchmarking "Are all these characters in this sentence?"
I've added a few 3k-ish character search strings, and a few 3k-ish character character sets... Doing the long cases moves the results more in favour of Tanktalus_AllIndex:
Short sentence and search set cases tallulah_OriginalPost 2054/s Tanktalus_AllRegex 2511/s Tanktalus_AllRegex_Study 2522/s moritz_BuildRegex_WithStudy 2595/s moritz_BuildRegex 2715/s varian_hash 2983/s RMGir_slice 4035/s Tanktalus_AllIndex 8219/s RMGir_index 12107/s Long sentence and Short search set cases varian_hash 97.2/s RMGir_slice 115/s moritz_BuildRegex_WithStudy 3172/s tallulah_OriginalPost 3230/s Tanktalus_AllRegex_Study 3319/s Tanktalus_AllRegex 4054/s moritz_BuildRegex 4250/s Tanktalus_AllIndex 13032/s RMGir_index 17612/s Short sentence and Long search set cases moritz_BuildRegex_WithStudy 54.1/s moritz_BuildRegex 54.6/s tallulah_OriginalPost 63.6/s Tanktalus_AllRegex 86.9/s Tanktalus_AllRegex_Study 87.1/s varian_hash 161/s RMGir_index 285/s RMGir_slice 319/s Tanktalus_AllIndex 320/s Long sentence and Long search set cases moritz_BuildRegex_WithStudy 54.1/s moritz_BuildRegex 54.6/s tallulah_OriginalPost 63.6/s varian_hash 86.9/s Tanktalus_AllRegex_Study 87.7/s Tanktalus_AllRegex 87.7/s RMGir_slice 135/s RMGir_index 250/s Tanktalus_AllIndex 319/sHere's the benchmark code with the added data points:
#!/usr/bin/perl -w use strict; use List::MoreUtils qw(all); use Benchmark qw(cmpthese); my @shortTestCases=( # sentence wantedChars result [ "abxcd zwe rrv", "xxv", 1 ], [ "abxcd zwe rrv", "xxvq", 0 ], [ "abxcd zwe rrv", "", 1 ], [ "The quick brown fox jumps over the lazy dog", "abcdefghijklmnopqrstuvwxyz", 1 ], [ "The quick brown fox jumps over the lazy dog", "abcdefghijklmnopqrstuvwxyzT", 1 ], [ "The quick brown fox jumps over the lazy dog", "abcdefghijklmnopqrstuvwxyzTU", 0 ], [ "The quick brown fox jumps over the lazy dog", "a", 1 ], [ "The quick brown fox jumps over the lazy dog", "", 1 ], ); # Long sentence, short wantedChars my @longShortTestCases = ( [ "The quick brown fox jumps over the lazy dog" x 100, "", 1 ], [ "The quick brown fox jumps over the lazy dog" x 100, "a", 1 ], [ "The quick brown fox jumps over the lazy dog" x 100, "abcdefghijklmnopqrstuvwxyzT", 1 ], [ "The quick brown fox jumps over the lazy dog" x 100, "abcdefghijklmnopqrstuvwxyzTU", 0 ], ); # Short sentence, long wantedChars my @shortLongTestCases = ( [ "The quick brown fox jumps over the lazy dog", "abcdefghijklmnopqrstuvwxyzT"x100, 1 ], [ "The quick brown fox jumps over the lazy dog", "abcdefghijklmnopqrstuvwxyzTU"x100, 0 ], ); # Long sentence, long wantedChars my @longLongTestCases = ( [ "The quick brown fox jumps over the lazy dog" x 100, "abcdefghijklmnopqrstuvwxyzT"x100, 1 ], [ "The quick brown fox jumps over the lazy dog" x 100, "abcdefghijklmnopqrstuvwxyzTU"x100, 0 ], ); ; sub benchmark_routine { my ($testFn, $testName, $testCases)=@_; foreach(@$testCases) { my ($sentence, $wantedLetters, $expectedResult) = @$_; die "$testName test failed ($sentence, $wantedLetters)" unless (($testFn->($sentence, $wantedLetters))==$expectedRes +ult); } } # [id://707122] sub tallulah_OriginalPost { my ($sentence, $wantedLetters)=@_; my $flag=0; my @a = split '',$wantedLetters; for( my $i=0; $i<$#a+1; $i++ ) { if($sentence !~ /$a[$i]/) { $flag=1;last; } } return !$flag; } # [id://707123] sub moritz_BuildRegex { my ($sentence, $wantedLetters)=@_; my $re = '^' . join '', map "(?=.*?$_)", map quotemeta, split m//, $wantedLetters; if ($sentence =~ m/$re/) { return 1; } return 0; } # [id://707123] sub moritz_BuildRegex_WithStudy { my ($sentence, $wantedLetters)=@_; my $re = '^' . join '', map "(?=.*?$_)", map quotemeta, split m//, $wantedLetters; study $sentence; if ($sentence =~ m/$re/) { return 1; } return 0; } # [id://707124] sub RMGir_index { my ($sentence, $wantedLetters)=@_; # don't need this variable (or any of them, in # fact -- they're just here for clarity. # we could work straight out of @_ if we wanted # this terser # Also, the $[ check is just pedantic - if someone # changes $[, shoot them. my $foundLetters=scalar (grep index($sentence,$_)>=$[, split //,$wantedLetters); return length($wantedLetters)==$foundLetters; } # [id://707222] sub Tanktalus_AllRegex { my ($sentence, $letters) = @_; return 1 unless length($letters); # all we're doing is checking for each letter. all { $sentence =~ $_ } split //, $letters; } # [id://707222] sub Tanktalus_AllRegex_Study { my ($sentence, $letters) = @_; return 1 unless length($letters); study $sentence; # all we're doing is checking for each letter. all { $sentence =~ $_ } split //, $letters; # same as above, but with index which I think is less readable. #all { index($sentence, $_) >= $[ } split //, $letters; } # [id://707222] sub Tanktalus_AllIndex { my ($sentence, $letters) = @_; return 1 unless length($letters); # same as above, but with index which I think is less readable. all { index($sentence, $_) >= $[ } split //, $letters; } # JavaFan's looks about equivalent to OP approach # Doesn't have same repeated letter semantics specified in # OP post. # [id://707176] sub oshalla_scan { my ($sentence, $wanted) = @_ ; while (length($wanted)) { return 0 if ($sentence !~ m/([$wanted])/g) ; $wanted =~ s/$1// ; } ; return 1; } # [id://707231] sub varian_hash { my ($sentence, $wantedLetters)=@_; my %required = map {$_ => 1} split //,$wantedLetters; map delete $required{$_}, split //, $sentence; if (keys %required) { return 0; } else { return 1; } } # [id://707314] sub RMGir_slice { my ($sentence, $wantedLetters)=@_; my %required; @required{split //,$wantedLetters}=(); delete @required{split //, $sentence}; if (keys %required) { return 0; } else { return 1; } } my $testsRef = \@shortTestCases; print "Short cases\n"; cmpthese(-1, { "tallulah_OriginalPost", sub { benchmark_routine(\&tallulah_OriginalPost, "tallulah_OriginalPost", $testsRef) }, "moritz_BuildRegex", sub { benchmark_routine(\&moritz_BuildRegex, "moritz_BuildRegex", $testsRef) }, "moritz_BuildRegex_WithStudy", sub { benchmark_routine(\&moritz_BuildRegex_WithStudy, "moritz_BuildRegex_WithStudy", $testsRef) }, "RMGir_index", sub { benchmark_routine(\&RMGir_index, "RMGir_index", $testsRef) }, "Tanktalus_AllRegex", sub { benchmark_routine(\&Tanktalus_AllRegex, "Tanktalus_AllRegex", $testsRef) }, "Tanktalus_AllRegex_Study", sub { benchmark_routine(\&Tanktalus_AllRegex_Study, "Tanktalus_AllRegex_Study", $testsRef) }, "Tanktalus_AllIndex ", sub { benchmark_routine(\&Tanktalus_AllIndex, "Tanktalus_AllIndex", $testsRef) }, # Doesn't have same repeated letter semantics specified in # OP post. #"oshalla_scan ", #sub { benchmark_routine(\&oshalla_scan, # "oshalla_scan", $testsRef) }, "varian_hash ", sub { benchmark_routine(\&varian_hash, "varian_hash", $testsRef) }, "RMGir_slice ", sub { benchmark_routine(\&RMGir_slice, "RMGir_slice", $testsRef) }, } ); $testsRef = \@longShortTestCases; print "LongShort cases\n"; cmpthese(-1, { "tallulah_OriginalPost", sub { benchmark_routine(\&tallulah_OriginalPost, "tallulah_OriginalPost", $testsRef) }, "moritz_BuildRegex", sub { benchmark_routine(\&moritz_BuildRegex, "moritz_BuildRegex", $testsRef) }, "moritz_BuildRegex_WithStudy", sub { benchmark_routine(\&moritz_BuildRegex_WithStudy, "moritz_BuildRegex_WithStudy", $testsRef) }, "RMGir_index", sub { benchmark_routine(\&RMGir_index, "RMGir_index", $testsRef) }, "Tanktalus_AllRegex", sub { benchmark_routine(\&Tanktalus_AllRegex, "Tanktalus_AllRegex", $testsRef) }, "Tanktalus_AllRegex_Study", sub { benchmark_routine(\&Tanktalus_AllRegex_Study, "Tanktalus_AllRegex_Study", $testsRef) }, "Tanktalus_AllIndex ", sub { benchmark_routine(\&Tanktalus_AllIndex, "Tanktalus_AllIndex", $testsRef) }, # Doesn't have same repeated letter semantics specified in # OP post. #"oshalla_scan ", #sub { benchmark_routine(\&oshalla_scan, # "oshalla_scan", $testsRef) }, "varian_hash ", sub { benchmark_routine(\&varian_hash, "varian_hash", $testsRef) }, "RMGir_slice ", sub { benchmark_routine(\&RMGir_slice, "RMGir_slice", $testsRef) }, } ); $testsRef = \@shortLongTestCases; print "ShortLong cases\n"; cmpthese(-1, { "tallulah_OriginalPost", sub { benchmark_routine(\&tallulah_OriginalPost, "tallulah_OriginalPost", $testsRef) }, "moritz_BuildRegex", sub { benchmark_routine(\&moritz_BuildRegex, "moritz_BuildRegex", $testsRef) }, "moritz_BuildRegex_WithStudy", sub { benchmark_routine(\&moritz_BuildRegex_WithStudy, "moritz_BuildRegex_WithStudy", $testsRef) }, "RMGir_index", sub { benchmark_routine(\&RMGir_index, "RMGir_index", $testsRef) }, "Tanktalus_AllRegex", sub { benchmark_routine(\&Tanktalus_AllRegex, "Tanktalus_AllRegex", $testsRef) }, "Tanktalus_AllRegex_Study", sub { benchmark_routine(\&Tanktalus_AllRegex_Study, "Tanktalus_AllRegex_Study", $testsRef) }, "Tanktalus_AllIndex ", sub { benchmark_routine(\&Tanktalus_AllIndex, "Tanktalus_AllIndex", $testsRef) }, # Doesn't have same repeated letter semantics specified in # OP post. #"oshalla_scan ", #sub { benchmark_routine(\&oshalla_scan, # "oshalla_scan", $testsRef) }, "varian_hash ", sub { benchmark_routine(\&varian_hash, "varian_hash", $testsRef) }, "RMGir_slice ", sub { benchmark_routine(\&RMGir_slice, "RMGir_slice", $testsRef) }, } ); $testsRef = \@longLongTestCases; print "LongLong cases\n"; cmpthese(-1, { "tallulah_OriginalPost", sub { benchmark_routine(\&tallulah_OriginalPost, "tallulah_OriginalPost", $testsRef) }, "moritz_BuildRegex", sub { benchmark_routine(\&moritz_BuildRegex, "moritz_BuildRegex", $testsRef) }, "moritz_BuildRegex_WithStudy", sub { benchmark_routine(\&moritz_BuildRegex_WithStudy, "moritz_BuildRegex_WithStudy", $testsRef) }, "RMGir_index", sub { benchmark_routine(\&RMGir_index, "RMGir_index", $testsRef) }, "Tanktalus_AllRegex", sub { benchmark_routine(\&Tanktalus_AllRegex, "Tanktalus_AllRegex", $testsRef) }, "Tanktalus_AllRegex_Study", sub { benchmark_routine(\&Tanktalus_AllRegex_Study, "Tanktalus_AllRegex_Study", $testsRef) }, "Tanktalus_AllIndex ", sub { benchmark_routine(\&Tanktalus_AllIndex, "Tanktalus_AllIndex", $testsRef) }, # Doesn't have same repeated letter semantics specified in # OP post. #"oshalla_scan ", #sub { benchmark_routine(\&oshalla_scan, # "oshalla_scan", $testsRef) }, "varian_hash ", sub { benchmark_routine(\&varian_hash, "varian_hash", $testsRef) }, "RMGir_slice ", sub { benchmark_routine(\&RMGir_slice, "RMGir_slice", $testsRef) }, } );
|
|---|