in reply to Re: calculate average from range of hash values (optimize)(reply)
in thread calculate average from range of hash values (optimize)

The code for the benchmark. I happen to have a hacked Benchmark.pm that includes support for HTML benchmarks (er, I do now anyway, thats why the code took so long to follow, I sort of got distracted :-), so this was written with that in mind. However as posted it should run just fine on your system. (Assuming you have a C compiler and etc.)

Updates:

  1. I forgot to point out that I changed the way the values are set in the test set from rand() to being the same as the key. This makes spotting errors and proving correctness much easier.
  2. Added aristotles new variant.

#!/usr/bin/perl package Average; use strict; use warnings; use Benchmark qw(cmpthese); use List::Util qw(sum); use Inline C => Config => TYPEMAPS => "$0.typemap"; use Inline 'C'; #Content of $0.typemap (thats tabs in between the columns): #long * T_OPAQUEPTR #double * T_OPAQUEPTR # the master hash my %Data; $Data{$_} = $_ for ( 0 .. 100, 10000 .. 30000, 100000 .. 150000 ); # what we pass in my $Data = {}; #what we will test my @Tests = ( [ 0, 150000 ], [ 0, 100 ], [ 1000, 10002 ], [ 25000 .. 125000 ] ); # seconds to test for my $timefor = 10; # output style (not much good for anybody but the author) my $style = 'auto'; # 'html table'; $|++; foreach my $test (@Tests) { # Build the test subs. # Can't put ari2() in here as it corrupts the hash for the others my %tests = map { $_ => eval "sub { $_(\$Data, $test->[0], $test->[ +1]) }" } qw(demc demcp orig ari1 dem1 dem2 buk ofix); if ( $style =~ /html/i ) { print qq(<h2 align="center">Testing $test->[0] - $test->[1]</h +2>\n); print qq(<table width="100%" border=1>\n); print qq(<tr><th>Routine</th><th>Average</th></tr>\n); } else { print "\n\nTesting $test->[0] - $test->[1]\n"; } # Run the tests once and print out the results. foreach my $t ( keys %tests ) { %$Data = %Data; # reset the hash # get the result of the test my $av = $tests{$t}->(); #print it out if ( defined $av ) { if ( $style =~ /html/ ) { printf qq(<tr><td align="center">%-5s</td>). qq(<td align="center">%.2f</td></tr>\n), $t, $a +v; } else { printf "%-5s => %f\n", $t, $av; } } else { if ( $style =~ /html/ ) { printf qq(<tr><td align="center">%-5s</td>). qq(<td align="center">undef</td></tr>\n), $t; } else { printf "%-5s => undef\n", $t, $av; } } } print "</table>\n" if $style =~ /html/; print "<c"."ode>\n" if $style!~/html/; %$Data = %Data; cmpthese( -$timefor, \%tests, $style ); print $style =~ /html/ ? "<hr />\n" : "</c" . "ode>\n"; } sub orig { my ( $data, $start_point, $end_point ) = @_; my @keys = sort keys %{$data}; my $start_pos; my $end_pos; for ( 0 .. $#keys ) { $start_pos = $_ if $keys[$_] eq $start_point; $end_pos = $_ if $keys[$_] eq $end_point; } return undef unless defined $start_pos and defined $end_pos; my $count = $end_pos - $start_pos + 1; my $amount; $amount += $data->{$keys[$_]} for ( $start_pos .. $end_pos ); return $amount / $count; } sub ofix { my ( $data, $start_point, $end_point ) = @_; my @keys = sort { $a <=> $b } keys %{$data}; my $start_pos; my $end_pos; for ( 0 .. $#keys ) { $start_pos = $_ if $keys[$_] eq $start_point; $end_pos = $_ if $keys[$_] eq $end_point; } return undef unless defined $start_pos and defined $end_pos; my $count = $end_pos - $start_pos + 1; my $amount; $amount += $data->{$keys[$_]} for ( $start_pos .. $end_pos ); return $amount / $count; } sub buk { my ( $hashref, $start, $end ) = @_; my @keys = grep { $_ >= $start && $_ <= $end } keys %{$hashref}; return sum( @{$hashref}{@keys} ) / @keys; } sub dem1 { my ( $data, $start_point, $end_point ) = @_; my ( $sum, $count ) = (0) x 2; foreach my $key ( keys %$data ) { next unless $start_point <= $key and $key <= $end_point; $sum += $data->{$key}; $count++; } return $count ? $sum / $count : undef; } sub dem2 { my ( $data, $start_point, $end_point ) = @_; my @values = map { ( $start_point <= $_ and $_ <= $end_point ) ? $data->{$_} : () } keys %$data; return @values ? sum(@values) / @values : undef; } sub ari1 { my ( $data, $first, $last ) = @_; my @goodkeys = grep exists $data->{$_}, $first .. $last; sum( @{$data}{@goodkeys} ) / @goodkeys; } sub ari2 { my ( $data, $first, $last ) = @_; my @values = grep defined, @{$data}{$first .. $last}; sum(@values) / @values; } #fix for ari2 sub ari3 { my ($data, $first, $last) = @_; my @values = map { local $_ = $data->{$_}; defined() ? $_ : () } $first .. $last; sum(@values) / @values; } { my %cache; sub demcp { my ( $data, $first, $last, $reset ) = @_; if ( !$cache{"$data"} or $reset ) { my @keys = sort { $a <=> $b } keys %$data; $cache{"$data"}{k} = \@keys; $cache{"$data"}{v} = [ @{$data}{@keys} ]; } my ( $keys, $vals ) = @{$cache{"$data"}}{'k', 'v'}; return unless @$keys; my ( $l, $m, $r ) = ( 0, 0, $#$keys ); my $p; while ( $l <= $r ) { use integer; $m = ( $l + $r ) / 2; #printf "(%6d) %6d %6d %6d (%6d)\n",$first,$l,$m,$r,$key +s->[$m]; if ( $first <= $keys->[$m] and ( ( $m > 0 and $keys->[ $m - 1 ] < $first ) or $m +== 0 ) ) { $p = $m; last; } elsif ( $keys->[$m] < $first ) { $l = $m + 1; } else { $r = $m - 1; } } return unless defined $p; my $sum = 0; while ( $p <= $#$keys and $keys->[$p] <= $last ) { $sum += $vals->[ $p++ ]; } return $sum / ( $p - $m ); } } { my %cache; sub demc { my ( $data, $first, $last, $reset ) = @_; if ( !$cache{"$data"} or $reset ) { my @keys = sort { $a <=> $b } keys %$data; $cache{"$data"} = { f => $keys[0], l => $keys[-1], c => scalar(@keys), k => pack("l!*",@keys), v => pack("d*",@{$data}{@keys}), }; } my $cache = $cache{"$data"}; return if !$cache->{c} or $first > $cache->{l} or $last < $cache->{f}; return _c_average( @{$cache}{qw(k v)}, $first, $last, $cache-> +{c} ); } } __DATA__ This assumes that first is in range. It doesn't check. __C__ double _c_average(long *k, double *v, long first, long last, long coun +t) { long l=0; long r=count-1; long m; long p=-1; double sum=0.0; while (l<=r) { m=(l+r)/2; if (first<= k[m] && ((m>0 && k[m-1] < first) || m==0)) { p=m; break; } else if (k[m] < first) { l=m+1; } else { r=m-1; } } while (p<count && k[p]<=last) { sum+=v[p++]; } return sum/(p-m); }

---
demerphq

<Elian> And I do take a kind of perverse pleasure in having an OO assembly language...