Re^3: Calculated % of data greater than $x in an array

Replies are listed 'Best First'.
Re^4: Calculated % of data greater than $x in an array by BrowserUk (Patriarch) on Mar 05, 2010 at 21:44 UTC
for our $n ( map 10*$_, 1, 3, 6 ) { print "Benchmarking $n values"; our @n = map int( rand $n ), 1 .. $n; cmpthese -log $n, { sort => q[ my $t = $n >> 1; my @m = sort{ $a<=>$b } @n; my $c=0; $c++ while $m[ $c ] <= $t; my $pct= ( @m - $c ) / @m 100; ], grep => q[ my $t = $n >> 1; my $pct = grep({ $_ > $t } @n ) / @n * 100; ], }; };; Benchmarking 10 values Rate sort grep sort 285546/s -- -62% grep 751484/s 163% -- Benchmarking 1000 values Rate sort grep sort 3525/s -- -71% grep 12328/s 250% -- Benchmarking 1000000 values Rate sort grep sort 1.13/s -- -90% grep 11.4/s 911% -- [download]	[reply] [d/l]
Re^5: Calculated % of data greater than $x in an array by ikegami (Patriarch) on Mar 05, 2010 at 22:26 UTC
Seeing the amount of Perl code involved outside the sort, makes sense. Thanks. Note that sorting is still faster than the original `use strict; use warnings; use Benchmark qw( cmpthese ); for our $n ( map 10*$_, 1, 3, 6 ) { print "Benchmarking $n values\n"; our @n = map int( rand $n ), 1 .. $n; cmpthese -log $n, { orig => q[ my $c=0; for my $elem (@n) { if ($elem > $t){ $c++; } } my $pct= ( @n - $c ) / @n 100; ], sort => q[ my $t = $n >> 1; @n = sort{ $a<=>$b } @n; my $c=0; $c++ while $n[ $c ] <= $t; my $pct= ( @n - $c ) / @n * 100; ], grep => q[ my $t = $n >> 1; my $pct = grep({ $_ > $t } @n ) / @n * 100; ], }; }` [download] `Benchmarking 10 values Rate orig sort grep orig 272566/s -- -11% -41% sort 306129/s 12% -- -34% grep 462968/s 70% 51% -- Benchmarking 1000 values Rate orig grep sort orig 4332/s -- -42% -44% grep 7459/s 72% -- -4% sort 7795/s 80% 5% -- Benchmarking 1000000 values ^C` [download] Lots of variation in the results due to the random input. The first test with orig I ran actually showed sort as being the fastest, although it usually isn't.	[reply] [d/l] [select]
Re^6: Calculated % of data greater than $x in an array by JavaFan (Canon) on Mar 06, 2010 at 01:02 UTC
You 'orig' variation never sets $t, but I don't think that matters much. Anyway, I made a slight variation, to reduce the chance the PRNG favours one method over the other: use strict; use warnings; use Benchmark qw( cmpthese ); for our $n ( map 10*$_, 1, 3, 6 ) { print "Benchmarking $n values\n"; my $l = int log $n; our @data = (); our @target = (); foreach (1 .. $l) { push @data, [map int rand $n, 1 .. $n]; push @target, int rand $n; } cmpthese -log $n, { orig => q[ for (my $i = 0; $i < @data; $i++) { my $c = 0; for my $elem (@{$data[$i]}) { $c++ if $elem > $target[$i]; } my $pct= (@{$data[$i]} - $c ) / @{$data[$i]} 100; } ], sort => q[ for (my $i = 0; $i < @data; $i++) { my $c = 0; my @n = sort {$a <=> $b} @{$data[$i]}; $c++ while $n[$c] <= $target[$i]; my $pct= (@{$data[$i]} - $c ) / @{$data[$i]} * 100; } ], grep => q[ for (my $i = 0; $i < @data; $i++) { my $pct = grep({$_ > $target[$i]} @{$data[$i]}) / @{$data[$i]} * 100; } ], }; } [download] This shows 'sort' to be always slower, and 'grep' always winning: Benchmarking 10 values Rate sort orig grep sort 34448/s -- -11% -53% orig 38503/s 12% -- -47% grep 72551/s 111% 88% -- Benchmarking 1000 values Rate sort orig grep sort 162/s -- -57% -62% orig 381/s 135% -- -11% grep 427/s 164% 12% -- Benchmarking 1000000 values (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) (warning: too few iterations for a reliable count) s/iter sort orig grep sort 31.2 -- -83% -85% orig 5.18 502% -- -10% grep 4.67 567% 11% -- [download]	[reply] [d/l] [select]

for our $n ( map 10**$_, 1, 3, 6 ) {
    print "Benchmarking $n values";
    our @n = map int( rand $n ), 1 .. $n;
    cmpthese -log $n, {
        sort => q[ 
            my $t = $n >> 1; 
            my @m = sort{ $a<=>$b } @n; 
            my $c=0; 
            $c++ while $m[ $c ] <= $t; 
            my $pct= ( @m - $c ) / @m * 100; 
        ],
        grep => q[ 
           my $t = $n >> 1; 
           my $pct = grep({ $_ > $t } @n ) / @n * 100; 
        ],
    };
};;

Benchmarking 10 values
         Rate sort grep
sort 285546/s   -- -62%
grep 751484/s 163%   --

Benchmarking 1000 values
        Rate sort grep
sort  3525/s   -- -71%
grep 12328/s 250%   --

Benchmarking 1000000 values
       Rate sort grep
sort 1.13/s   -- -90%
grep 11.4/s 911%   --
[download]

[reply]
[d/l]

use strict;
use warnings;

use Benchmark qw( cmpthese );

for our $n ( map 10**$_, 1, 3, 6 ) {
    print "Benchmarking $n values\n";
    our @n = map int( rand $n ), 1 .. $n;
    cmpthese -log $n, {
        orig => q[
            my $c=0;
            for my $elem (@n) {
                if ($elem > $t){
                    $c++;
                }
            }
            my $pct= ( @n - $c ) / @n * 100;
        ],
        sort => q[
            my $t = $n >> 1;
            @n = sort{ $a<=>$b } @n;
            my $c=0;
            $c++ while $n[ $c ] <= $t;
            my $pct= ( @n - $c ) / @n * 100;
        ],
        grep => q[
           my $t = $n >> 1;
           my $pct = grep({ $_ > $t } @n ) / @n * 100;
        ],
    };
}
[download]

Benchmarking 10 values
         Rate orig sort grep
orig 272566/s   -- -11% -41%
sort 306129/s  12%   -- -34%
grep 462968/s  70%  51%   --
Benchmarking 1000 values
       Rate orig grep sort
orig 4332/s   -- -42% -44%
grep 7459/s  72%   --  -4%
sort 7795/s  80%   5%   --
Benchmarking 1000000 values
^C
[download]

Lots of variation in the results due to the random input. The first test with orig I ran actually showed sort as being the fastest, although it usually isn't.

[reply]
[d/l]
[select]

use strict;
use warnings;
     
use Benchmark qw( cmpthese );
        
for our $n ( map 10**$_, 1, 3, 6 ) {
    print "Benchmarking $n values\n";
    my $l = int log $n;
    our @data = ();
    our @target = ();
    foreach (1 .. $l) {
        push @data, [map int rand $n, 1 .. $n];
        push @target, int rand $n;
    }
    cmpthese -log $n, {
        orig => q[
            for (my $i = 0; $i < @data; $i++) {
                my $c = 0;
                for my $elem (@{$data[$i]}) {
                    $c++ if $elem > $target[$i];
                }
                my $pct= (@{$data[$i]} - $c ) / @{$data[$i]} * 100;
            }
        ],
        sort => q[
            for (my $i = 0; $i < @data; $i++) {
                my $c = 0;
                my @n = sort {$a <=> $b} @{$data[$i]};
                $c++ while $n[$c] <= $target[$i];
                my $pct= (@{$data[$i]} - $c ) / @{$data[$i]} * 100;
            }
        ],
        grep => q[
            for (my $i = 0; $i < @data; $i++) {
                my $pct = grep({$_ > $target[$i]} @{$data[$i]})
                          / @{$data[$i]} * 100;
            }
        ],
    };
}
[download]

Benchmarking 10 values 
        Rate sort orig grep
sort 34448/s   -- -11% -53%
orig 38503/s  12%   -- -47%
grep 72551/s 111%  88%   --
Benchmarking 1000 values
      Rate sort orig grep
sort 162/s   -- -57% -62% 
orig 381/s 135%   -- -11%
grep 427/s 164%  12%   --
Benchmarking 1000000 values
            (warning: too few iterations for a reliable count)
            (warning: too few iterations for a reliable count)
            (warning: too few iterations for a reliable count)
     s/iter sort orig grep
sort   31.2   -- -83% -85%
orig   5.18 502%   -- -10%
grep   4.67 567%  11%   --
[download]

[reply]
[d/l]
[select]