in reply to Re: Re: Re: Re: Matching First Character of Strings Efficiently
in thread Matching First Character of Strings Efficiently
It requires a little work, but it is possible to add the loopup table to the benchmark.
The results are that it shows benefits after 4 iterations and an order of magnitude by the time you get to 100 iterations.
The lookup table will only need to be built once rather than everytime, so I've ensured that it gets built only on the first iteration of the benchmark. Whether this is legitimate will depend on whether your application requires processing of the array more than once. The breakpoint appears to occur at 4 iterations for the overhead of building the lookup to amortise out and give a clear winner. Of course, at this few iterations, there are lots of "Too few iterations" warnings.
I added a check to ensure that the expensive function was called the same number of times in every case (the last line of each group of results below).
This obviously requires the use of a positive iteration count rather than a negative one.
P:\test>336795 -N=4 (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) Rate HB1 HB2 tchn LR tye2 kval delim tye1 BUK HB1 36.7/s -- -57% -72% -85% -85% -85% -85% -86% -86% HB2 85.1/s 132% -- -34% -66% -66% -66% -66% -68% -68% tchn 129/s 252% 52% -- -48% -48% -48% -48% -52% -52% LR 250/s 581% 194% 94% -- 0% 0% -0% -6% -6% tye2 250/s 581% 194% 94% 0% -- 0% -0% -6% -6% kval 250/s 581% 194% 94% 0% 0% -- -0% -6% -6% delim 250/s 581% 194% 94% 0% 0% 0% -- -6% -6% tye1 267/s 627% 213% 107% 7% 7% 7% 7% -- 0% BUK 267/s 627% 213% 107% 7% 7% 7% 7% 0% -- 1472 - 1472 - 1472 - 1472 - 1472 - 1472 - 1472 - 1472 - 1472 P:\test>336795 -N=10 (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) Rate HB1 HB2 tchn delim LR tye1 kval tye2 BUK HB1 33.7/s -- -68% -74% -79% -84% -84% -84% -89% -95% HB2 106/s 216% -- -17% -33% -50% -50% -50% -66% -83% tchn 128/s 281% 21% -- -19% -40% -40% -40% -59% -79% delim 159/s 371% 49% 24% -- -25% -25% -25% -49% -75% LR 213/s 532% 100% 66% 34% -- -0% -0% -32% -66% tye1 213/s 532% 100% 66% 34% 0% -- -0% -32% -66% kval 213/s 532% 100% 66% 34% 0% -0% -- -32% -66% tye2 312/s 828% 194% 144% 97% 47% 47% 47% -- -50% BUK 625/s 1756% 487% 387% 294% 194% 194% 194% 100% -- 3830 - 3830 - 3830 - 3830 - 3830 - 3830 - 3830 - 3830 - 3830 P:\test>336795 -N=100 (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) Rate HB1 HB2 tchn delim kval LR tye1 tye2 BUK HB1 33.5/s -- -64% -70% -75% -81% -81% -84% -86% -99% HB2 94.1/s 181% -- -16% -31% -46% -47% -56% -60% -97% tchn 112/s 235% 19% -- -18% -35% -37% -47% -53% -97% delim 136/s 307% 45% 21% -- -21% -23% -36% -43% -96% kval 173/s 415% 84% 54% 27% -- -3% -19% -27% -95% LR 178/s 430% 89% 58% 30% 3% -- -17% -25% -94% tye1 214/s 538% 127% 90% 57% 24% 20% -- -10% -93% tye2 237/s 607% 152% 111% 74% 37% 33% 11% -- -93% BUK 3226/s 9526% 3329% 2771% 2268% 1768% 1716% 1410% 1261% -- 40600 - 40600 - 40600 - 40600 - 40600 - 40600 - 40600 - 40600 - 40600 P:\test>336795 -N=1000 (warning: too few iterations for a reliable count) Rate HB1 HB2 tchn delim LR kval tye1 tye2 + BUK HB1 33.8/s -- -62% -70% -76% -81% -81% -84% -86% + -99% HB2 89.8/s 166% -- -19% -36% -48% -49% -58% -62% + -98% tchn 111/s 228% 24% -- -21% -36% -37% -48% -53% + -97% delim 141/s 317% 57% 27% -- -19% -20% -34% -41% + -96% LR 173/s 413% 93% 56% 23% -- -2% -18% -27% + -95% kval 177/s 423% 97% 59% 25% 2% -- -17% -26% + -95% tye1 213/s 529% 137% 92% 51% 23% 20% -- -11% + -94% tye2 238/s 604% 165% 114% 69% 37% 35% 12% -- + -94% BUK 3759/s 11020% 4088% 3289% 2567% 2067% 2026% 1668% 1480% + -- 370000 - 370000 - 370000 - 370000 - 370000 - 370000 - 370000 - 370000 +- 370000
The modfied benchmark
#!/usr/bin/perl -s use strict; use warnings; use Benchmark 'cmpthese'; our $N ||= 100_000; my @list; open (LIST, '<', 'list.rnd') or die "Unable to open list.rnd for readi +ng : !"; while ( <LIST> ) { chomp; push @list, $_; } my %lookup; my $length = int(rand 240) + 10; my $str_a = ""; $str_a .= ('a' .. 'z')[ rand 26] while length $str_a < $length; sub expensive_function { # my ($str_a, $str_b) = @_; # my $foo; # for ( split // , $str_a . $str_b) { # $foo++; # } $_[ 2 ]++ } our( $buk, $tchn, $lr, $hb1, $hb2, $delim, $tye1, $tye2, $kval ) = (0) +x10; cmpthese $N, { 'BUK' => sub { unless( keys %lookup ) { push @{ $lookup{ chr ord } }, $_ for @list; } expensive_function( $str_a, $_, $buk ) for @{ $lookup{ chr + ord $str_a }; } }, 'tchn' => sub { use Inline C =>; for my $str_b ( @list ) { next if ! same_scan( $str_a, $str_b ); expensive_function ( $str_a, $str_b, $tchn ); } }, 'LR' => sub { for my $str_b ( @list ) { next if index($str_a, substr($str_b, 0, 1)); expensive_function ( $str_a, $str_b, $lr ); } }, 'HB1' => sub { my ($a_1st, $rest) = split '', $str_a, 2; for my $str_b ( @list ) { my ($b_1st, $rest) = split '', $str_b, 2; next if $b_1st ne $a_1st; expensive_function ( $str_a, $str_b, $hb1 ); } }, 'HB2' => sub { my $rev_a = reverse $str_a; my $a_1st = chop $rev_a; for my $str_b ( @list ) { my $rev_b = reverse $str_b; my $b_1st = chop $rev_b; next if $b_1st ne $a_1st; expensive_function ( $str_a, $str_b, $hb2 ); } }, 'delim' => sub { my $fc = substr($str_a,0,1); for my $str_b ( @list ) { next if $str_b !~ /^$fc/; expensive_function ( $str_a, $str_b, $delim ); } }, 'tye1' => sub { for my $str_b ( @list ) { next if ord $str_a != ord $str_b; expensive_function ( $str_a, $str_b, $tye1 ); } }, 'tye2' => sub { my $fc = ord $str_a; for my $str_b ( @list ) { next if $fc != ord( $str_b ); expensive_function ( $str_a, $str_b, $tye2 ); } }, 'kval' => sub { my $fc = substr $str_a, 0, 1; for my $str_b ( @list ) { next if $fc ne substr $str_b, 0, 1; expensive_function ( $str_a, $str_b, $kval ); } }, }; $,= ' - '; print $buk, $tchn, $lr, $hb1, $hb2, $delim, $tye1, $tye2, $kval; __END__ __C__ int same_scan(char* str1, char* str2) { return str1[0] == str2[0] ? 1 : 0; }
|
|---|