C:\test>235765
Benchmark: timing 1 iterations of msf_small, sf_small...
msf_small: 0 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU)
sf broken at C:\test\235765.pl line 25.
sf_small: 0 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU)
Rate sf_small msf_small
sf_small 1000000000000000/s -- 0%
msf_small 1000000000000000/s 0% --
Benchmark: running msf_bigger, sf_bigger, each for at least 1 CPU seconds ...
msf_bigger: 1 wallclock secs ( 1.04 usr + 0.00 sys = 1.04 CPU) @ 134.49/s (n=140)
sf_bigger: 1 wallclock secs ( 1.09 usr + 0.00 sys = 1.09 CPU) @ 51.33/s (n=56)
Rate sf_bigger msf_bigger
sf_bigger 51.3/s -- -62%
msf_bigger 134/s 162% --
####
exists $self->{data} ? keys %{$self->{data}} : wantarray ? () : undef;
####
return unless exists $self->{data};
keys %{$self->{data}};
####
if (...) {
...
} if (...) {
...
} elsif (...) {
...
} else {
...
}
####
my $min;
my $max = $min = exists $self->{data} ? $self->{data}->{each %{$self->{data}}} : undef;
####
package My::Stats::Freq;
use strict;
use constant MAXINT => 2**32; #! Could be bigger. Must be defined somewhere?
use constant CLASS => 0;
use constant THIS => 0;
use constant ELEMENT => 1;
use constant FREQUENCY => 1;
my (%selves);
sub new{
my $self = bless \rand, shift;
$self->clear();
$self->add( @_ );
return $self;
}
sub clear{ $selves{$_[THIS]} = [{}, 0, MAXINT, 0]; return; };
use constant DATA => 0;
use constant SUM => 1;
use constant MIN => 2;
use constant MAX => 3;
sub add{
my $self = $selves{+shift};
return $self->[SUM] unless @_;
my $ref = ref $_[0];
if ($ref eq 'HASH') {
for my $key (keys %{$_[0]}) {
$self->[SUM] += $_[0]->{$key};
$self->[DATA]{$key} += $_[0]->{$key};
}
}
elsif ($ref eq 'ARRAY') {
$self->[DATA]{$_}++ for(@{$_[0]});
}
else {
$self->[DATA]{$_}++ for(@_);
}
_stats($self);
return $self->[SUM];
}
sub _stats {
my $self = shift;
@{$self}[SUM, MIN, MAX] = (0, MAXINT, 0);
for(values %{$self->[DATA]}) {
$self->[SUM] += $_;
$self->[MAX] < $_ and $self->[MAX] = $_;
$self->[MIN] > $_ and $self->[MIN] = $_;
}
}
sub remove{
my $self = $selves{+shift};
return $self->[SUM] unless @_;
while(my $next= shift) {
my $ref = ref $next;
push(@_, @{$next}), next if $ref eq 'ARRAY';
if ($ref eq 'HASH') {
for (keys %{$next}) {
$self->[DATA]{$_} -= $next->{$_};
$self->[SUM] -= $next->{$_};
}
next;
}
next unless $self->[DATA]{$next} > 0;
$self->[SUM]--;
$self->[DATA]{$next}--;
}
_stats($self);
$self->[SUM];
}
sub remove_elements{
my $self = $selves{$_[THIS]};
$self->[SUM] -= delete $self->[DATA]{$_} for @_[1..$#_];
($self->[MAX], $self->[MIN]) = (0, MAXINT);
_stats();
$self->[SUM];
}
sub max{ $selves{$_[THIS]}[MAX]; }
sub min{ $selves{$_[THIS]}[MIN]; }
sub sum{ $selves{$_[THIS]}[SUM]; }
sub elements{ keys %{ $selves{$_[THIS]}[DATA] }; }
sub frequencies{ values %{ $selves{$_[THIS]}[DATA] }; }
sub as_hash{ %{ $selves{$_[THIS]}[DATA] }; }
sub frequency{ $selves{$_[THIS]}[DATA]{$_[ELEMENT]}; }
sub by_frequency{
grep{ $selves{$_[THIS]}[DATA]{$_} == $_[FREQUENCY]; } $_[THIS]->elements();
}
sub proportion{
my $self = $selves{$_[THIS]};
return unless exists $self->[DATA]{$_[ELEMENT]};
$self->[DATA]{$_[ELEMENT]} / $self->[SUM];
}
sub proportions{
my $self = $selves{$_[THIS]};
map{ $_ => $self->[DATA]{$_} / $self->[SUM] } $_[THIS]->elements();
}
#! Should consider what happens if we are subclassed?
sub clone{ __PACKAGE__->new( { $_[THIS]->as_hash() } ) };
return 1 if caller();
package main;
local $\=$/, $, = $" = ', '; #!"
my $msf = My::Stats::Freq->new( map{ chr 65+rand 26 } 1..100); #! 100 elements
print $msf->max(), $msf->min(), $msf->sum();
print scalar $msf->elements, $msf->elements();
print scalar $msf->frequencies, $msf->frequencies();
my %hash = $msf->as_hash();
{local $\; while(my ($k,$v)=each%hash){ print "$k=>$v,"; } }; print'';
print $msf->by_frequency( $msf->max() );
#! add 152 elements or maybe less if the random generator didn't it them all.
print $msf->add( [$msf->elements()] );
print $msf->add( $msf->elements() );
print $msf->add( \%hash );
print scalar $msf->elements, $msf->elements();
print scalar $msf->frequencies, $msf->frequencies();
print $msf->min(), $msf->max(), $msf->sum();
print $msf->by_frequency( $msf->max() );
#! remove them again
print $msf->remove( [$msf->elements()] );
print $msf->remove( $msf->elements() );
print $msf->remove( \%hash );
print $msf->elements();
print $msf->frequencies();
print $msf->min(), $msf->max(), $msf->sum();
print $msf->by_frequency( $msf->max() );
#! can we clone? Is the test sufficient?
my $clone = $msf->clone();
my ($msf_stringy, $clone_stringy) = ("@{[ $msf->as_hash() ]}", "@{[ $clone->as_hash() ]}");
print $msf_stringy eq $clone_stringy
? 'Cloned OK'
: 'Clone failed' . $/ . $msf_stringy . $/ .$clone_stringy;
#! Test the proportion stuff though this can easily be derived by the user?
print $msf->proportion( $msf->by_frequency( $msf->max() ) );
print $msf->proportions();
#! remove all the elements and check the stats.
$msf->remove_elements( $msf->elements );
print $msf->min(), $msf->max(), $msf->sum();
print $msf->elements;
print $msf->frequencies;
####
#! perl -slw
use strict;
use Statistics::Frequency;
use My::Stats::Freq;
use Data::Dumper;
use Benchmark qw( cmpthese );
my @data_small = qw( bob tom );
my @data_bigger = (qw[bob bob bob tom sally jim bob bob bob tom sally jim]) x 100;
cmpthese( 1, {
msf_small => \&msf_small,
sf_small => \&sf_small,
});
cmpthese( -1, {
msf_bigger => \&msf_bigger,
sf_bigger => \&sf_bigger,
});
sub sf_small {
my $f = Statistics::Frequency->new( @data_small );
my %f = reverse $f->frequencies;
warn "sf broken" unless $f{$f->frequencies_max} eq 'bob';
}
sub sf_bigger {
my $f = Statistics::Frequency->new( @data_bigger );
my %f = reverse $f->frequencies;
warn "sf broken" unless $f{$f->frequencies_max} eq 'bob';
}
sub msf_small {
my $f = My::Stats::Freq->new( @data_small );
my @max = $f->by_frequency( $f->max() );
warn "msf broken" unless "@max" eq 'tom bob';
}
sub msf_bigger {
my $f = My::Stats::Freq->new( @data_bigger );
my ($max) = $f->by_frequency( $f->max() );
warn "msf broken" unless $max eq 'bob';
}