The code below doesn't add much to ivancho's very nice implementation++, except that I roll my own class to take care of the as-you-go computation of the desired statistics. (Good thing you didn't want the median!)
I thought it was a rare example of an OOP application that is both simple enough to be used as, say, a classroom illustration or a tutorial, and entirely useful "as is". Plus, it illustrates techniques that are being discussed in another thread.
use strict; use warnings; my @indices; my @headers; my %stats; while ( <DATA> ) { chomp; if ( /^Year/ ) { @headers = split /,/; @indices = grep $headers[ $_ ] !~ /TIME|YEAR/i, 0..$#headers; } elsif ( /^\d{4},/ ) { my @data = split /,/; ( $stats{ $headers[ $_ ] } ||= Stats->new() ) ->add_data( $data[ $_ ] ) for @indices; } } for my $header ( @headers[ @indices ] ) { my $stats = $stats{ $header }; printf "$header: MIN:(%0.1f) MEAN:(%0.1f) MAX:(%0.1f) STDEV:(%0.1f) +POINTS:(%d)\n", $stats->min, $stats->mean, $stats->max, $stats->std_dev, $stats->c +ount; } exit; package Stats; sub new { my $class = shift; my $self = bless +{}, $class; $self->init( @_ ); return $self; } BEGIN { # define getsets for my $field ( qw( count min max sum sum2 ) ) { no strict 'refs'; *$field = sub { my $self = shift; return @_ ? $self->{ $field } = shift : $self->{ $field }; }; } # define accumulators for my $field ( qw( count sum sum2 ) ) { no strict 'refs'; my $accum = "add_to_$field"; *$accum = sub { my $self = shift; return $self->$field( $self->$field + shift ); } } } sub init { my $self = shift; $self->count( 0 ); $self-> sum( 0 ); $self-> sum2( 0 ); return; } sub add_data { my $self = shift; my $data = shift; $self->min( $data ) if !defined $self->min || $self->min > $data; $self->max( $data ) if !defined $self->max || $self->max < $data; $self->add_to_count( 1 ); $self->add_to_sum ( $data ); $self->add_to_sum2 ( $data*$data ); return; } sub mean { my $self = shift; return $self->count ? $self->sum/$self->count : undef; } sub variance { my $self = shift; return undef unless $self->count; my $sigma2 = $self->sum2/$self->count - $self->mean()**2; return $sigma2 < 0 ? 0 : $sigma2; } sub std_dev { my $self = shift; return undef unless $self->count; return sqrt $self->variance; } sub unbiased_variance { my $self = shift; return undef unless $self->count > 1; return $self->variance()*$self->count/( $self->count - 1 ); } sub unbiased_std_dev { my $self = shift; return undef unless $self->count > 1; return sqrt $self->unbiased_variance; } package main; __DATA__ Year,Time,foo,bar,baz,quux,frobozz 1977,17:03:49,1959,4001,2158,7099,4072 1981,22:26:37,1959,4003,8408,7914,2716 1989,01:22:43,1959,4005,814,5399,8547 __END__ foo: MIN:(1959.0) MEAN:(1959.0) MAX:(1959.0) STDEV:(0.0) POINTS:(3) bar: MIN:(4001.0) MEAN:(4003.0) MAX:(4005.0) STDEV:(1.6) POINTS:(3) baz: MIN:(814.0) MEAN:(3793.3) MAX:(8408.0) STDEV:(3308.9) POINTS:(3) quux: MIN:(5399.0) MEAN:(6804.0) MAX:(7914.0) STDEV:(1047.7) POINTS:(3 +) frobozz: MIN:(2716.0) MEAN:(5111.7) MAX:(8547.0) STDEV:(2491.4) POINTS +:(3)
the lowliest monk
In reply to Re: Efficient use of memory
by tlm
in thread Efficient use of memory
by K_M_McMahon
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |