in reply to "Capturing the nth word in a string" Algorithm Analysis
Below some reworked code. It's still using the same approaches, with the exception of 'Pg's approach. It scored well on the matching case, but if you take out the whitespace in the string, it's too slow to wait for an answer.
Note how 'Grantm' sometimes is the fastest, sometimes is the slowest.use strict; use Benchmark qw/cmpthese/; my @str; my $index = 82; # construct a small string foreach (1 .. 100) { $str [0] .= "Element$_" . ' '; $str [1] .= "Element$_"; } foreach (1 .. 50) { $str [2] .= "Element$_" . ' '; $str [3] .= "Element$_"; } $str [4] = "*&()-=-_<> " x 100; $str [5] = "*&()-=-_<>" x 100; $str [6] = "*&()-=-_<> " x 50; $str [7] = "*&()-=-_<>" x 50; my @info = ( "100 words separated by whitespace", "Same, without the whitespace", "50 words separated by whitespace", "Same, without the whitespace", "100 non-words separated by whitespace", "Same, without the whitespace", "50 non-words separated by whitespace", "Same, without the whitespace", ); my $str; for (my $i = 0; $i < @str && $i < @info; $i ++) { print $info [$i], "\n"; $str = $str [$i]; cmpthese (-1, { Roger => \&FindNth_Roger, Zaxo_Array => \&FindNth_Zaxo_Array, Zaxo_Split => \&FindNth_Zaxo_Split, Ysth => \&FindNth_Ysth, # Pg => \&FindNth_Pg, Grantm => \&FindNth_Grantm, Jasper => \&FindNth_Jasper, }); } sub FindNth_Roger() { my $nth = @{[$str =~ m/\w+/g]}[$index-1]; } sub FindNth_Zaxo_Array() { my $nth = ($str =~ m/\w+/g)[$index-1]; } sub FindNth_Zaxo_Split() { my $nth = (split ' ', $str)[$index-1]; } sub FindNth_Ysth() { my $nth = [$str =~ m/\w+/g]->[$index-1]; } sub FindNth_Pg() { my ($nth) = ($str =~ m/(\w+\s*){$index}/); } sub FindNth_Grantm() { my $idx = $index - 1; my ($nth) = $str =~ /(?:\w+\W+){$idx}(\w+)/; } sub FindNth_Jasper() { my $nth; my %h; for ($str=~/.?/g) { $h{space} += !/\S/; $nth .= $_ if $h{space} == $index-1 && /\S/ .. !/\S/ } } __END__ 100 words separated by whitespace (warning: too few iterations for a reliable count) Rate Jasper Roger Ysth Zaxo_Array Zaxo_Split + Grantm Jasper 429/s -- -95% -95% -96% -98% + -98% Roger 8614/s 1910% -- -0% -19% -54% + -69% Ysth 8650/s 1918% 0% -- -19% -54% + -69% Zaxo_Array 10676/s 2391% 24% 23% -- -43% + -62% Zaxo_Split 18797/s 4286% 118% 117% 76% -- + -33% Grantm 27926/s 6416% 224% 223% 162% 49% + -- Same, without the whitespace Rate Jasper Grantm Roger Ysth Zaxo_Array +Zaxo_Split Jasper 491/s -- -93% -100% -100% -100% + -100% Grantm 6636/s 1250% -- -94% -94% -95% + -95% Roger 110277/s 22340% 1562% -- -4% -22% + -23% Ysth 114840/s 23269% 1631% 4% -- -19% + -20% Zaxo_Array 141940/s 28783% 2039% 29% 24% -- + -1% Zaxo_Split 143360/s 29072% 2060% 30% 25% 1% + -- 50 words separated by whitespace Rate Grantm Jasper Roger Ysth Zaxo_Array +Zaxo_Split Grantm 610/s -- -32% -96% -96% -97% + -98% Jasper 896/s 47% -- -95% -95% -96% + -97% Roger 16440/s 2595% 1734% -- -1% -20% + -54% Ysth 16593/s 2620% 1751% 1% -- -19% + -53% Zaxo_Array 20479/s 3257% 2185% 25% 23% -- + -42% Zaxo_Split 35544/s 5727% 3866% 116% 114% 74% + -- Same, without the whitespace Rate Jasper Grantm Roger Ysth Zaxo_Array +Zaxo_Split Jasper 984/s -- -93% -99% -99% -100% + -100% Grantm 13398/s 1262% -- -92% -92% -94% + -94% Roger 165414/s 16714% 1135% -- -6% -29% + -31% Ysth 175812/s 17770% 1212% 6% -- -25% + -26% Zaxo_Array 234057/s 23691% 1647% 41% 33% -- + -2% Zaxo_Split 238602/s 24153% 1681% 44% 36% 2% + -- 100 non-words separated by whitespace Rate Jasper Roger Ysth Zaxo_Array Zaxo_Split + Grantm Jasper 393/s -- -96% -96% -97% -98% + -99% Roger 9050/s 2206% -- 0% -20% -51% + -67% Ysth 9050/s 2206% 0% -- -20% -51% + -67% Zaxo_Array 11270/s 2771% 25% 25% -- -39% + -60% Zaxo_Split 18442/s 4598% 104% 104% 64% -- + -34% Grantm 27836/s 6992% 208% 208% 147% 51% + -- Same, without the whitespace Rate Jasper Ysth Roger Zaxo_Array Grantm +Zaxo_Split Jasper 432/s -- -95% -95% -96% -98% + -100% Ysth 9050/s 1993% -- -0% -20% -67% + -93% Roger 9050/s 1993% 0% -- -20% -67% + -93% Zaxo_Array 11270/s 2506% 25% 25% -- -60% + -91% Grantm 27836/s 6337% 208% 208% 147% -- + -79% Zaxo_Split 130327/s 30038% 1340% 1340% 1056% 368% + -- 50 non-words separated by whitespace Rate Grantm Jasper Roger Ysth Zaxo_Array +Zaxo_Split Grantm 369/s -- -53% -98% -98% -98% + -99% Jasper 785/s 113% -- -95% -95% -96% + -98% Roger 17231/s 4568% 2095% -- -0% -22% + -51% Ysth 17300/s 4586% 2104% 0% -- -21% + -51% Zaxo_Array 21976/s 5853% 2699% 28% 27% -- + -38% Zaxo_Split 35484/s 9512% 4420% 106% 105% 61% + -- Same, without the whitespace Rate Grantm Jasper Roger Ysth Zaxo_Array +Zaxo_Split Grantm 410/s -- -52% -98% -98% -98% + -100% Jasper 861/s 110% -- -95% -95% -96% + -100% Roger 17230/s 4101% 1902% -- -1% -22% + -92% Ysth 17397/s 4142% 1922% 1% -- -21% + -92% Zaxo_Array 21976/s 5259% 2454% 28% 26% -- + -90% Zaxo_Split 219429/s 53407% 25398% 1174% 1161% 899% + --
Abigail
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Re: "Capturing the nth word in a string" Algorithm Analysis
by Roger (Parson) on Oct 15, 2003 at 10:26 UTC |