sub kennethk { my $string = shift; my $off = 0; my $step = 10; my @indices = sort {$off = 0; until (substr($string, $a+$off, $step) cmp substr($string, $b+$off, $step)){ $off += $step; } } 0 .. length($string) - 1; $_++ for @indices; return @indices; } #### use strict; use warnings; use Benchmark ':all'; verify(); comparison(20000,26,20); comparison(20000,4,20); comparison(200000,26,5); comparison(200000,4,5); sub verify { my $input = join '', map { ('a'..'z')[rand(4)] } 0..20000-1; my @RobertCraven = RobertCraven($input); my @hdb = hdb($input); my @kennethk = kennethk($input); die "Mismatch 1" if @RobertCraven != @hdb; die "Mismatch 2" if @RobertCraven != @kennethk; for my $i (0 .. $#RobertCraven) { die "Char mismatch 1($i)" if $RobertCraven[$i] ne $hdb[$i]; die "Char mismatch 2($i)" if $RobertCraven[$i] ne $kennethk[$i]; } } sub comparison { my ($n, $letters, $count) = @_; printf "%10d %2d %2d\n", @_; my $input = join '', map { ('a'..'z')[rand($letters)] } 0..$n-1; cmpthese($count, { $n < 25000 ? ( 'RobertCraven'=> sub{RobertCraven($input)}, ) : (), 'hdb' => sub{hdb($input)}, 'kennethk' => sub{kennethk($input)}, }); print "\n"; } sub RobertCraven { my $string = shift; my $length = length($string); my @ordered_list = sort map { substr($string, $_) } 0 .. $length - 1; my @indices = map $length - length($_) + 1, @ordered_list; return @indices; } sub hdb { my $string = shift; my @array = sort { substr( $string, $a ) cmp substr( $string, $b ) } 0..length($string)-1; $_++ for @array; return @array; } sub kennethk { my $string = shift; my $off = 0; my $step = 10; my @indices = sort {$off = 0; until (substr($string, $a+$off, $step) cmp substr($string, $b+$off, $step)){ $off += $step; } } 0 .. length($string) - 1; $_++ for @indices; return @indices; } #### 20000 26 20 Rate hdb RobertCraven kennethk hdb 2.96/s -- -11% -49% RobertCraven 3.32/s 12% -- -43% kennethk 5.80/s 96% 75% -- 20000 4 20 Rate hdb RobertCraven kennethk hdb 2.96/s -- -9% -51% RobertCraven 3.25/s 10% -- -46% kennethk 6.08/s 105% 87% -- 200000 26 5 s/iter hdb kennethk hdb 47.0 -- -95% kennethk 2.13 2107% -- 200000 4 5 s/iter hdb kennethk hdb 45.2 -- -95% kennethk 2.05 2104% --