sub kennethk {
my $string = shift;
my $off = 0;
my $step = 10;
my @indices = sort {$off = 0;
until (substr($string, $a+$off, $step) cmp substr($string, $b+$off, $step)){
$off += $step;
} } 0 .. length($string) - 1;
$_++ for @indices;
return @indices;
}
####
use strict;
use warnings;
use Benchmark ':all';
verify();
comparison(20000,26,20);
comparison(20000,4,20);
comparison(200000,26,5);
comparison(200000,4,5);
sub verify {
my $input = join '', map { ('a'..'z')[rand(4)] } 0..20000-1;
my @RobertCraven = RobertCraven($input);
my @hdb = hdb($input);
my @kennethk = kennethk($input);
die "Mismatch 1" if @RobertCraven != @hdb;
die "Mismatch 2" if @RobertCraven != @kennethk;
for my $i (0 .. $#RobertCraven) {
die "Char mismatch 1($i)" if $RobertCraven[$i] ne $hdb[$i];
die "Char mismatch 2($i)" if $RobertCraven[$i] ne $kennethk[$i];
}
}
sub comparison {
my ($n, $letters, $count) = @_;
printf "%10d %2d %2d\n", @_;
my $input = join '', map { ('a'..'z')[rand($letters)] } 0..$n-1;
cmpthese($count, {
$n < 25000 ? (
'RobertCraven'=> sub{RobertCraven($input)},
) : (),
'hdb' => sub{hdb($input)},
'kennethk' => sub{kennethk($input)},
});
print "\n";
}
sub RobertCraven {
my $string = shift;
my $length = length($string);
my @ordered_list = sort map { substr($string, $_) } 0 .. $length - 1;
my @indices = map $length - length($_) + 1, @ordered_list;
return @indices;
}
sub hdb {
my $string = shift;
my @array = sort { substr( $string, $a ) cmp substr( $string, $b ) } 0..length($string)-1;
$_++ for @array;
return @array;
}
sub kennethk {
my $string = shift;
my $off = 0;
my $step = 10;
my @indices = sort {$off = 0;
until (substr($string, $a+$off, $step) cmp substr($string, $b+$off, $step)){
$off += $step;
} } 0 .. length($string) - 1;
$_++ for @indices;
return @indices;
}
####
20000 26 20
Rate hdb RobertCraven kennethk
hdb 2.96/s -- -11% -49%
RobertCraven 3.32/s 12% -- -43%
kennethk 5.80/s 96% 75% --
20000 4 20
Rate hdb RobertCraven kennethk
hdb 2.96/s -- -9% -51%
RobertCraven 3.25/s 10% -- -46%
kennethk 6.08/s 105% 87% --
200000 26 5
s/iter hdb kennethk
hdb 47.0 -- -95%
kennethk 2.13 2107% --
200000 4 5
s/iter hdb kennethk
hdb 45.2 -- -95%
kennethk 2.05 2104% --