# # Statistics::Descriptive::ByColumns module # Compute statistics on columns of data. # # Idea based on a program by Benjamin Kuperman # Rewritten in module form by Diego Zamboni # # June 6, 2001. package Statistics::Descriptive::ByColumns; use strict; use Carp; use Statistics::Descriptive; use vars qw(%default_params); =head1 NAME ByColumns - Compute statistic values on columns of data =head1 SYNOPSIS $s=Statistics::Descriptive::ByColumns->new(type => 'Full', columns = +> 5); $s->read_file("values.dat"); $s->add_row(5, 10, 23, 81, 150); $s->add_line("35 12 7 3.2 81"); $s->add_to_row(1, 38, 25, 41, 12, 99); $s->print; @objs=$s->get_objects; $c1=$s->get_object(1); =head1 DESCRIPTION This module is a wrapper around C<Statistics::Descriptive> to make it easier to store and get statistics on diferent columns of data. Each column is considered as a separate data set, and its values are added to a different C<Statistics::Descriptive> object that can later be obtained and queried. =cut %default_params = ( type => 'Full', columns => 3, ); # Create a new object, with the specified (or default) parameters. sub new { my $what=shift; my $class=ref($what)||$what; my $self={@_}; foreach (keys %default_params) { $self->{$_} = $default_params{$_} unless exists($self->{$_}); } # Check parameters croak "Invalid type parameter '$self->{type}'" unless $self->{type} eq 'Full' || $self->{type} eq 'Sparse'; croak "Invalid number of columns: $self->{columns}" if $self->{columns}<1; # Create the objects $self->{objs}=[ map { ($self->{type} eq 'Full')? Statistics::Descriptive::Full->new(): Statistics::Descriptive::Sparse->new(); } 1 .. $self->{columns} ]; bless $self, $class; return $self; } # Add a row of values. sub add_row { my $self=shift; my @vals=@_; if (@vals != $self->{columns}) { carp "Invalid number of columns: got ".scalar(@vals). ", need $self->{columns}"; return; } foreach my $o (@{$self->{objs}}) { $o->add_data(shift @vals); } return $self; } # Split (on whitespace) and add lines of text. sub add_line { my $self=shift; my @lines=@_; foreach (@lines) { chomp; my @vals=split; $self->add_row(@vals) or carp "Invalid line, ignored: $_\n"; } return $self; } # Add data to a specific column sub add_to_column { my $self=shift; my $col=shift; my @vals=@_; if ($col<0 || $col>=$self->{columns}) { carp "Invalid column number $col. Valid range: 0-".($self->{column +s}-1); return; } $self->{objs}->[$col]->add_data(@vals); return $self; } # Read data from a file. Argument is a filename or a handle. sub read_file { my $self=shift; my $fname=shift; my $h; if (!ref($fname)) { # It's a filename open IN, "<$fname" or do { carp "Error opening file '$fname': $!"; return }; $h=\*IN; } else { # It's a handle $h=$fname; } while (<$h>) { chomp; my @vals=split; $self->add_row(@vals) or carp "Invalid line $., ignored: $_\n"; } close IN; return $self; } # Get the Statistics::Descriptive objects sub get_objects { my $self=shift; return @{$self->{objs}}; } # Get a specific object sub get_object { my $self=shift; my $col=shift; if ($col<0 || $col>=$self->{columns}) { carp "Invalid column number $col. Valid range: 0-$self->{columns}" +; return; } return $self->{objs}->[$col]; } # Print the currently accumulated values. # Can be passed a handle to print to and a header sub print { my $self=shift; my $h=shift || \*STDOUT; my $hdr=shift; my $c=0; my $f=($self->{type} eq 'Full'); if ($hdr) { print "#" x length $hdr; print "####\n"; print "# $hdr #\n"; print "#" x length $hdr; print "####\n"; } foreach my $o (@{$self->{objs}}) { print $h "Column $c:\n"; printf $h "\t Number of items: %d\n", $o->count(); printf $h "\t Mean: %f\n", $o->mean(); printf $h "\t Median: %f\n", $o->median() if $f; printf $h "\t Mode: %f\n", ($o->mode()||0) if $f; printf $h "\t variance: %f\n", $o->variance(); printf $h "\tstandard_deviation: %f\n", $o->standard_deviation(); printf $h "\t min: %f\n", $o->min(); printf $h "\t max: %f\n", $o->max(); printf $h "\t Middle 50%% mean: %f\n", $o->trimmed_mean(.25,.25) + if $f; $c++; } } 1; __END__ #!perl -w # Usage: col-stats.pl numcols [file ...] use Statistics::Descriptive::ByColumns; my $n=shift @ARGV or die "Usage: $0 numcols [file ...]\n"; unless (@ARGV) { @ARGV=(\*STDIN); } foreach my $f (@ARGV) { my $s=Statistics::Descriptive::ByColumns->new(columns => $n); $s->read_file($f); $s->print(undef, ref($f)?undef:$f); }

In reply to Statistics::Descriptive::ByColumns by ZZamboni

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.