C:\test>235765 Benchmark: timing 1 iterations of msf_small, sf_small... msf_small: 0 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU) sf broken at C:\test\235765.pl line 25. sf_small: 0 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU) Rate sf_small msf_small sf_small 1000000000000000/s -- 0% msf_small 1000000000000000/s 0% -- Benchmark: running msf_bigger, sf_bigger, each for at least 1 CPU seconds ... msf_bigger: 1 wallclock secs ( 1.04 usr + 0.00 sys = 1.04 CPU) @ 134.49/s (n=140) sf_bigger: 1 wallclock secs ( 1.09 usr + 0.00 sys = 1.09 CPU) @ 51.33/s (n=56) Rate sf_bigger msf_bigger sf_bigger 51.3/s -- -62% msf_bigger 134/s 162% -- #### exists $self->{data} ? keys %{$self->{data}} : wantarray ? () : undef; #### return unless exists $self->{data}; keys %{$self->{data}}; #### if (...) { ... } if (...) { ... } elsif (...) { ... } else { ... } #### my $min; my $max = $min = exists $self->{data} ? $self->{data}->{each %{$self->{data}}} : undef; #### package My::Stats::Freq; use strict; use constant MAXINT => 2**32; #! Could be bigger. Must be defined somewhere? use constant CLASS => 0; use constant THIS => 0; use constant ELEMENT => 1; use constant FREQUENCY => 1; my (%selves); sub new{ my $self = bless \rand, shift; $self->clear(); $self->add( @_ ); return $self; } sub clear{ $selves{$_[THIS]} = [{}, 0, MAXINT, 0]; return; }; use constant DATA => 0; use constant SUM => 1; use constant MIN => 2; use constant MAX => 3; sub add{ my $self = $selves{+shift}; return $self->[SUM] unless @_; my $ref = ref $_[0]; if ($ref eq 'HASH') { for my $key (keys %{$_[0]}) { $self->[SUM] += $_[0]->{$key}; $self->[DATA]{$key} += $_[0]->{$key}; } } elsif ($ref eq 'ARRAY') { $self->[DATA]{$_}++ for(@{$_[0]}); } else { $self->[DATA]{$_}++ for(@_); } _stats($self); return $self->[SUM]; } sub _stats { my $self = shift; @{$self}[SUM, MIN, MAX] = (0, MAXINT, 0); for(values %{$self->[DATA]}) { $self->[SUM] += $_; $self->[MAX] < $_ and $self->[MAX] = $_; $self->[MIN] > $_ and $self->[MIN] = $_; } } sub remove{ my $self = $selves{+shift}; return $self->[SUM] unless @_; while(my $next= shift) { my $ref = ref $next; push(@_, @{$next}), next if $ref eq 'ARRAY'; if ($ref eq 'HASH') { for (keys %{$next}) { $self->[DATA]{$_} -= $next->{$_}; $self->[SUM] -= $next->{$_}; } next; } next unless $self->[DATA]{$next} > 0; $self->[SUM]--; $self->[DATA]{$next}--; } _stats($self); $self->[SUM]; } sub remove_elements{ my $self = $selves{$_[THIS]}; $self->[SUM] -= delete $self->[DATA]{$_} for @_[1..$#_]; ($self->[MAX], $self->[MIN]) = (0, MAXINT); _stats(); $self->[SUM]; } sub max{ $selves{$_[THIS]}[MAX]; } sub min{ $selves{$_[THIS]}[MIN]; } sub sum{ $selves{$_[THIS]}[SUM]; } sub elements{ keys %{ $selves{$_[THIS]}[DATA] }; } sub frequencies{ values %{ $selves{$_[THIS]}[DATA] }; } sub as_hash{ %{ $selves{$_[THIS]}[DATA] }; } sub frequency{ $selves{$_[THIS]}[DATA]{$_[ELEMENT]}; } sub by_frequency{ grep{ $selves{$_[THIS]}[DATA]{$_} == $_[FREQUENCY]; } $_[THIS]->elements(); } sub proportion{ my $self = $selves{$_[THIS]}; return unless exists $self->[DATA]{$_[ELEMENT]}; $self->[DATA]{$_[ELEMENT]} / $self->[SUM]; } sub proportions{ my $self = $selves{$_[THIS]}; map{ $_ => $self->[DATA]{$_} / $self->[SUM] } $_[THIS]->elements(); } #! Should consider what happens if we are subclassed? sub clone{ __PACKAGE__->new( { $_[THIS]->as_hash() } ) }; return 1 if caller(); package main; local $\=$/, $, = $" = ', '; #!" my $msf = My::Stats::Freq->new( map{ chr 65+rand 26 } 1..100); #! 100 elements print $msf->max(), $msf->min(), $msf->sum(); print scalar $msf->elements, $msf->elements(); print scalar $msf->frequencies, $msf->frequencies(); my %hash = $msf->as_hash(); {local $\; while(my ($k,$v)=each%hash){ print "$k=>$v,"; } }; print''; print $msf->by_frequency( $msf->max() ); #! add 152 elements or maybe less if the random generator didn't it them all. print $msf->add( [$msf->elements()] ); print $msf->add( $msf->elements() ); print $msf->add( \%hash ); print scalar $msf->elements, $msf->elements(); print scalar $msf->frequencies, $msf->frequencies(); print $msf->min(), $msf->max(), $msf->sum(); print $msf->by_frequency( $msf->max() ); #! remove them again print $msf->remove( [$msf->elements()] ); print $msf->remove( $msf->elements() ); print $msf->remove( \%hash ); print $msf->elements(); print $msf->frequencies(); print $msf->min(), $msf->max(), $msf->sum(); print $msf->by_frequency( $msf->max() ); #! can we clone? Is the test sufficient? my $clone = $msf->clone(); my ($msf_stringy, $clone_stringy) = ("@{[ $msf->as_hash() ]}", "@{[ $clone->as_hash() ]}"); print $msf_stringy eq $clone_stringy ? 'Cloned OK' : 'Clone failed' . $/ . $msf_stringy . $/ .$clone_stringy; #! Test the proportion stuff though this can easily be derived by the user? print $msf->proportion( $msf->by_frequency( $msf->max() ) ); print $msf->proportions(); #! remove all the elements and check the stats. $msf->remove_elements( $msf->elements ); print $msf->min(), $msf->max(), $msf->sum(); print $msf->elements; print $msf->frequencies; #### #! perl -slw use strict; use Statistics::Frequency; use My::Stats::Freq; use Data::Dumper; use Benchmark qw( cmpthese ); my @data_small = qw( bob tom ); my @data_bigger = (qw[bob bob bob tom sally jim bob bob bob tom sally jim]) x 100; cmpthese( 1, { msf_small => \&msf_small, sf_small => \&sf_small, }); cmpthese( -1, { msf_bigger => \&msf_bigger, sf_bigger => \&sf_bigger, }); sub sf_small { my $f = Statistics::Frequency->new( @data_small ); my %f = reverse $f->frequencies; warn "sf broken" unless $f{$f->frequencies_max} eq 'bob'; } sub sf_bigger { my $f = Statistics::Frequency->new( @data_bigger ); my %f = reverse $f->frequencies; warn "sf broken" unless $f{$f->frequencies_max} eq 'bob'; } sub msf_small { my $f = My::Stats::Freq->new( @data_small ); my @max = $f->by_frequency( $f->max() ); warn "msf broken" unless "@max" eq 'tom bob'; } sub msf_bigger { my $f = My::Stats::Freq->new( @data_bigger ); my ($max) = $f->by_frequency( $f->max() ); warn "msf broken" unless $max eq 'bob'; }