#!/usr/bin/perl use strict; use warnings; use File::Slurp qw(slurp); use Benchmark qw(cmpthese); sub binary_search { my ($str, $a) = @_; my $l = 0; my $h = @$a; while ($l < $h) { my $p = int (($l + $h) / 2); if ($a->[$p] lt $str) { $l = $p + 1; } else { $h = $p; } } $l } sub make_start { my $a = shift; my $last = $a->[0]; my @start = (0); for my $ix (1..$#$a) { my $current = $a->[$ix]; if ($current ne $last) { push @start, $ix; $last = $current; } } return \@start; } chomp (my @words = slurp '/usr/share/dict/words'); @words = grep /^\w+$/, @words; for my $size (100, 1000, 100_000, 1_000_000) { for my $dups (3, 10, 100) { next unless $size > $dups; for my $reps (100, 100_000, 1_000_000) { print "size: $size, dups: $dups, reps: $reps\n"; # generate data: my @a = map $words[rand @words], 1..1+($size/$dups); push @a, $a[rand @a] while @a < $size; @a = sort @a; cmpthese(-30, { naive => sub { my $ix; $ix = binary_search($a[rand @a], \@a) for (1..$reps); }, salva => sub { my $ix; my $start = make_start(\@a); my @a_start = @a[@$start]; $ix = $start->[binary_search($a[rand @a], \@a_start)] for (1..$reps); } }); print "\n"; } } } #### size: 100, dups: 3, reps: 100 Rate salva naive salva 837/s -- -2% naive 854/s 2% -- size: 100, dups: 3, reps: 100000 s/iter naive salva naive 1.21 -- -10% salva 1.09 11% -- size: 100, dups: 3, reps: 1000000 s/iter naive salva naive 13.0 -- -16% salva 10.9 19% -- size: 100, dups: 10, reps: 100 Rate naive salva naive 817/s -- -20% salva 1020/s 25% -- size: 100, dups: 10, reps: 100000 s/iter naive salva naive 1.19 -- -26% salva 0.878 36% -- size: 100, dups: 10, reps: 1000000 s/iter naive salva naive 11.9 -- -24% salva 9.08 31% -- size: 1000, dups: 3, reps: 100 Rate salva naive salva 476/s -- -31% naive 692/s 45% -- size: 1000, dups: 3, reps: 100000 s/iter naive salva naive 1.46 -- -10% salva 1.32 11% -- size: 1000, dups: 3, reps: 1000000 s/iter naive salva naive 14.8 -- -12% salva 13.1 13% -- size: 1000, dups: 10, reps: 100 Rate salva naive salva 597/s -- -12% naive 681/s 14% -- size: 1000, dups: 10, reps: 100000 s/iter naive salva naive 1.46 -- -20% salva 1.17 25% -- size: 1000, dups: 10, reps: 1000000 s/iter naive salva naive 15.2 -- -25% salva 11.4 33% -- size: 1000, dups: 100, reps: 100 Rate naive salva naive 688/s -- -9% salva 755/s 10% -- size: 1000, dups: 100, reps: 100000 s/iter naive salva naive 1.46 -- -40% salva 0.879 66% -- size: 1000, dups: 100, reps: 1000000 s/iter naive salva naive 14.7 -- -39% salva 8.99 64% -- size: 100000, dups: 3, reps: 100 Rate salva naive salva 9.62/s -- -98% naive 420/s 4268% -- size: 100000, dups: 3, reps: 100000 s/iter naive salva naive 2.38 -- -12% salva 2.11 13% -- size: 100000, dups: 3, reps: 1000000 s/iter naive salva naive 24.1 -- -11% salva 21.4 13% -- size: 100000, dups: 10, reps: 100 Rate salva naive salva 11.4/s -- -97% naive 415/s 3557% -- size: 100000, dups: 10, reps: 100000 s/iter naive salva naive 2.42 -- -14% salva 2.07 17% -- size: 100000, dups: 10, reps: 1000000 s/iter naive salva naive 23.8 -- -17% salva 19.7 21% -- size: 100000, dups: 100, reps: 100 Rate salva naive salva 14.5/s -- -97% naive 423/s 2821% -- size: 100000, dups: 100, reps: 100000 s/iter naive salva naive 2.36 -- -34% salva 1.55 52% -- size: 100000, dups: 100, reps: 1000000 s/iter naive salva naive 23.7 -- -35% salva 15.5 53% -- size: 1000000, dups: 3, reps: 100 Rate salva naive salva 1.22/s -- -100% naive 337/s 27580% -- size: 1000000, dups: 3, reps: 100000 s/iter salva naive salva 3.35 -- -10% naive 3.01 11% -- size: 1000000, dups: 3, reps: 1000000 s/iter naive salva naive 30.1 -- -14% salva 25.9 16% -- size: 1000000, dups: 10, reps: 100 Rate salva naive salva 1.16/s -- -100% naive 332/s 28395% -- size: 1000000, dups: 10, reps: 100000 s/iter salva naive salva 3.20 -- -4% naive 3.06 5% -- size: 1000000, dups: 10, reps: 1000000 s/iter naive salva naive 30.1 -- -18% salva 24.8 21% -- size: 1000000, dups: 100, reps: 100 Rate salva naive salva 1.36/s -- -100% naive 330/s 24141% -- size: 1000000, dups: 100, reps: 100000 s/iter naive salva naive 3.01 -- -8% salva 2.78 9% -- size: 1000000, dups: 100, reps: 1000000 s/iter naive salva naive 30.3 -- -29% salva 21.4 41% --