in reply to Can this code be optimized further?

#!/usr/bin/perl -w use strict; use Time::HiRes qw( gettimeofday tv_interval); my @temp=("a_1","b_1","a_2","a_3","a_4","b_2","a_5","b_3","a_6","b_4") +; my @startTime = gettimeofday; for ( 1..10000 ) { my (@a, @b) ; foreach my $value (@temp) { push @a, $1 if ($value =~ /a_(.*)/) ; push @b, $1 if ($value =~ /b_(.*)/) ; } } print "Time: ".tv_interval( \@startTime )."\n"; @temp = qw( a_1 b_1 a_2 a_3 a_4 b_2 a_5 b_3 a_6 b_4 ); @startTime = gettimeofday; for ( 1..10000 ) { my %values; foreach (@temp) { /^([ab])_(.*)/ && do { push @{$values{$1}}, $2; next; }; die "'$_' doesn't match the pattern of ^[ab]_.*\n"; } } print "Time: ".tv_interval( \@startTime )."\n"; @temp = qw( a_1 b_1 a_2 a_3 a_4 b_2 a_5 b_3 a_6 b_4 ); @startTime = gettimeofday; for ( 1..10000 ) { my (@a, @b); foreach (@temp) { my $prefix = substr( $_, 0, 2 ); if ( $prefix eq 'a_' ) { push @a, substr( $_, 2 ); } elsif ( $prefix eq 'b_' ) { push @b, substr( $_, 2 ); } } } print "Time: ".tv_interval( \@startTime )."\n"; @startTime = gettimeofday; for ( 1..10000 ) { my( @a, @b ); my %arrays; eval qq{\$arrays{ "$_" } = \\\@$_} for qw( a b ); for( @temp ) { die "Invalid prefix on element '$_'\n" unless /^([ab])_(.*)/; push @{ $arrays{ $1 } }, $2; } } print "Time: ".tv_interval( \@startTime )."\n"; @startTime = gettimeofday; for ( 1..10000 ) { my @a = map {/a_(.*)/ ? $1 : ()} @temp; my @b = map {/b_(.*)/ ? $1 : ()} @temp; } print "Time: ".tv_interval( \@startTime )."\n"; @startTime = gettimeofday; for ( 1..10000 ) { my @a_arr = grep { s/^a_(.*)/$1/} @temp; my @b_arr = grep { s/^b_(.*)/$1/} @temp; } print "Time: ".tv_interval( \@startTime )."\n"; # GAVE ME substr outside of string at ./testme2.pl line 88. #@startTime = gettimeofday; #for ( 1..10000 ) { # my %parts; # push @{ $parts{ substr $_, 0, 1 }}, substr $_, 2 # foreach @temp; #} #print "Time: ".tv_interval( \@startTime )."\n"; @startTime = gettimeofday; for ( 1..10000 ) { my (@a, @b) ; m[^([ab])_(.*)$] and push @{$1 eq 'a' ? \@a : \@b}, $2 for @temp; } print "Time: ".tv_interval( \@startTime )."\n"; Output: Time: 0.575739 Time: 0.717803 Time: 0.46604 Time: 1.731715 Time: 0.777471 Time: 0.462416 Time: 0.103439
I'd go with BrowserUK's code :)


Update!!!:

Somewhere along the way the @temp was being affected, hence the warnings on phaylon's code.

Here's the updated output w/ mine removed and phaylon's added (also a little more descriptive):

Baseline Time: 0.584196 Regexp w/ Hash Time: 0.7385 Eval Time: 1.774581 Map Time: 0.76179 Grep Time: 0.456991 Hash w/ substr Time: 0.411059 Regexp w/ eq Time: 0.683154 Router Time: 0.668033


Hence, the HoA using substrings would be the most flexible and quickest (albeit a bit risky). Grep is a close second with the eval being 3x as long.

Replies are listed 'Best First'.
Re^2: Can this code be optimized further?
by eric256 (Parson) on Feb 10, 2005 at 16:10 UTC

    You should use the benchmark module to do benchmarking. Hopefully I got these right.

    #!/usr/bin/perl -w use strict; use Benchmark qw/cmpthese/; my @temp=("a_1","b_1","a_2","a_3","a_4","b_2","a_5","b_3","a_6","b_4") +; # samy_kumar sub original { my (@a, @b) ; foreach my $value (@temp) { push @a, $1 if ($value =~ /a_(.*)/) ; push @b, $1 if ($value =~ /b_(.*)/) ; } } # dragonchild sub hash_style { my %values; foreach (@temp) { /^([ab])_(.*)/ && do { push @{$values{$1}}, $2; next; }; } } # roy jonhson sub router_style { my (@a, @b); my %router = (a => \@a, b => \@b); foreach my $value (@temp) { push @{$router{$1}}, $2 if $value =~ /([ab])_(.*)/; } } sub switch_style { my (@a, @b); foreach (@temp) { my $prefix = substr( $_, 0, 2 ); if ( $prefix eq 'a_' ) { push @a, substr( $_, 2 ); } elsif ( $prefix eq 'b_' ) { push @b, substr( $_, 2 ); } } } sub map_style { my @a = map {/a_(.*)/ ? $1 : ()} @temp; my @b = map {/b_(.*)/ ? $1 : ()} @temp; } sub grep_style { my @a_arr = grep { s/^a_(.*)/$1/} @temp; my @b_arr = grep { s/^b_(.*)/$1/} @temp; } sub grep_map_style { my @a = map {/a_(.*)/; $1} grep /a_/, @temp; my @b = map {/b_(.*)/; $1} grep /b_/, @temp; } sub trinary { my (@a, @b) ; m[^([ab])_(.*)$] and push @{$1 eq 'a' ? \@a : \@b}, $2 for @temp; } cmpthese( 100_000, { "Original" => \&original, "Hash" => \&hash_style, "Router" => \&router_style, "Switch" => \&switch_style, "Map" => \&map_style, "Grep" => \&grep_style, "Grep + Map" => \&grep_map_style, "Trinary" => \&trinary }); __DATA__ C:\test>perl 429768.pl Rate Grep Switch Map Router Original Grep + Map Ha +sh Trinary Grep 32489/s -- -67% -70% -78% -81% -83% -8 +5% -87% Switch 98425/s 203% -- -9% -34% -42% -49% -5 +5% -60% Map 108460/s 234% 10% -- -27% -36% -44% -5 +1% -56% Router 149031/s 359% 51% 37% -- -12% -23% -3 +2% -40% Original 168634/s 419% 71% 55% 13% -- -13% -2 +3% -32% Grep + Map 194175/s 498% 97% 79% 30% 15% -- -1 +2% -21% Hash 220264/s 578% 124% 103% 48% 31% 13% +-- -11% Trinary 246914/s 660% 151% 128% 66% 46% 27% 1 +2% -- C:\test>perl 429768.pl Rate Grep Switch Map Router Original Grep + Map Ha +sh Trinary Grep 32658/s -- -66% -69% -78% -81% -83% -8 +5% -86% Switch 95602/s 193% -- -10% -36% -43% -49% -5 +5% -60% Map 106610/s 226% 12% -- -28% -37% -43% -5 +0% -55% Router 148810/s 356% 56% 40% -- -12% -21% -3 +0% -37% Original 168350/s 415% 76% 58% 13% -- -11% -2 +1% -29% Grep + Map 188324/s 477% 97% 77% 27% 12% -- -1 +2% -21% Hash 213220/s 553% 123% 100% 43% 27% 13% +-- -10% Trinary 236967/s 626% 148% 122% 59% 41% 26% 1 +1% -- C:\test>perl 429768.pl Rate Grep Switch Map Router Original Grep + Map Ha +sh Trinary Grep 32819/s -- -66% -69% -78% -81% -82% -8 +6% -87% Switch 96899/s 195% -- -9% -35% -42% -47% -5 +8% -61% Map 106724/s 225% 10% -- -28% -37% -42% -5 +3% -57% Router 148810/s 353% 54% 39% -- -12% -19% -3 +5% -40% Original 168350/s 413% 74% 58% 13% -- -8% -2 +6% -32% Grep + Map 182815/s 457% 89% 71% 23% 9% -- -2 +0% -26% Hash 228833/s 597% 136% 114% 54% 36% 25% +-- -7% Trinary 246305/s 650% 154% 131% 66% 46% 35% +8% -- C:\test>

    ___________
    Eric Hodges
      I added a testing mode to the code, so you can verify that each sub yields proper results. When running, if you pass a n argument to the program, it will run a test instead of a benchmark. Benchmark runs for three seconds instead of 100_000 iterations, now.

      I also tweaked a few of the routines, which made substantial differences in their runtimes. And I dumped a couple of uninteresting subs. The code is in the readmore.

      My results:
      Rate Grep Trinary Hash Map Original New_Map New_Grep Tr +i_Substr Switch Tri_Substr2 Grep 969/s -- -30% -33% -37% -43% -50% -56% + -68% -71% -76% Trinary 1379/s 42% -- -4% -10% -19% -29% -37% + -55% -59% -66% Hash 1442/s 49% 5% -- -6% -15% -26% -34% + -53% -57% -64% Map 1528/s 58% 11% 6% -- -10% -21% -31% + -50% -54% -62% Original 1702/s 76% 23% 18% 11% -- -12% -23% + -44% -49% -57% New_Map 1937/s 100% 40% 34% 27% 14% -- -12% + -36% -42% -52% New_Grep 2202/s 127% 60% 53% 44% 29% 14% -- + -28% -34% -45% Tri_Substr 3050/s 215% 121% 111% 100% 79% 57% 39% + -- -9% -24% Switch 3353/s 246% 143% 132% 119% 97% 73% 52% + 10% -- -16% Tri_Substr2 4000/s 313% 190% 177% 162% 135% 106% 82% + 31% 19% --
      Tri_Substr2 is effectively a synthesis of switch and trinary. You can see the differences little design changes make. Using references slows things down. Matching and substituting back in instead of just deleting makes a big difference for grep vs. new_grep.

      Caution: Contents may have been coded under pressure.
        sub hash_style2 { my %values; my @temp = @master; push @{$values{substr($_,0,1)}}, substr($_,2) for @temp; print "@{$values{a}} <==> @{$values{b}}\n" if $testing; } sub ikegami { my @temp = @master; local $_ = "@temp"; my @a = /\ba_(\d+)\b/g; my @b = /\bb_(\d+)\b/g; print "@a <==> @b\n" if $testing; }
        When I rerun adding this method (removing the three worst performers), here's what I get:
        Rate Trinary New_Grep Original ikegami Hash2 Tri_Substr + Switch Tri_Substr2 Trinary 10273/s -- -5% -22% -34% -40% -41% + -41% -50% New_Grep 10766/s 5% -- -18% -31% -37% -38% + -38% -48% Original 13096/s 27% 22% -- -16% -23% -25% + -25% -37% ikegami 15683/s 53% 46% 20% -- -8% -10% + -10% -24% Hash2 17016/s 66% 58% 30% 8% -- -2% + -3% -18% Tri_Substr 17437/s 70% 62% 33% 11% 2% -- + -0% -16% Switch 17453/s 70% 62% 33% 11% 3% 0% + -- -15% Tri_Substr2 20651/s 101% 92% 58% 32% 21% 18% + 18% --
        Changing to substr() brings the hash into respectable range.

        Being right, does not endow the right to be rude; politeness costs nothing.
        Being unknowing, is not the same as being stupid.
        Expressing a contrary opinion, whether to the individual or the group, is more often a sign of deeper thought than of cantankerous belligerence.
        Do not mistake your goals as the only goals; your opinion as the only opinion; your confidence as correctness. Saying you know better is not the same as explaining you know better.

      Redefine @temp before each algorithm or pass it in as an argument.

      I see that Benchmark has a different output format, but the results should be close to the same, no?

        Hmm.. Redefining makes a significant difference. That is odd, I didn't beleive that any of those functions where modifying the list. Which one is modifying the list?

        C:\test>perl 429768.pl (warning: too few iterations for a reliable count) Rate Grep Grep + Map Hash Map Router Trinary Origi +nal Switch Grep 9699/s -- -36% -39% -42% -44% -47% - +52% -65% Grep + Map 15244/s 57% -- -5% -9% -12% -17% - +24% -45% Hash 16000/s 65% 5% -- -5% -8% -12% - +20% -43% Map 16835/s 74% 10% 5% -- -3% -8% - +16% -40% Router 17301/s 78% 13% 8% 3% -- -5% - +13% -38% Trinary 18282/s 88% 20% 14% 9% 6% -- +-9% -34% Original 20000/s 106% 31% 25% 19% 16% 9% + -- -28% Switch 27855/s 187% 83% 74% 65% 61% 52% +39% -- C:\test>perl 429768.pl Rate Grep Grep + Map Map Hash Router Trinary Origi +nal Switch Grep 9625/s -- -38% -41% -41% -45% -47% - +53% -66% Grep + Map 15420/s 60% -- -5% -5% -12% -16% - +25% -46% Map 16194/s 68% 5% -- -0% -8% -11% - +22% -43% Hash 16194/s 68% 5% 0% -- -8% -11% - +22% -43% Router 17544/s 82% 14% 8% 8% -- -4% - +15% -38% Trinary 18282/s 90% 19% 13% 13% 4% -- - +11% -36% Original 20640/s 114% 34% 27% 27% 18% 13% + -- -27% Switch 28450/s 196% 84% 76% 76% 62% 56% +38% --

        ___________
        Eric Hodges