By accumulating your float values as strings of packed values rather than arrays, 100,000 doubles requires only 800 kb instead of ~ 2.5 MB. Multiply that by the 36 vectors in your sample dataset and you reduce the memory requirement for processing 100,000 lines (35MB) from 350 MB to 50 MB with no loss of performance. It should now run on your 128 MB machine easily without swapping.
Original (tweaked) + results
#!/usr/bin/perl -w use strict; use Statistics::Basic::Mean; use Statistics::Basic::StdDev; print 'Start: ' . localtime() . $/; my( %HoA, %hash_keys ); my $filename = '224_APID003_report.csv'; open READ_IN, "<$filename" or die "I can't open $filename to read.\n"; while (<READ_IN>) { chomp; create_hash( $_ ) if /^Year/; pop_hash( $_ ) if /^\d{4},/; } close READ_IN; for( sort{ $a <=> $b } keys %hash_keys ) { next if $hash_keys{$_} =~ m/(?:TIME|YEAR)/i; my $count = @{ $HoA{ $hash_keys{ $_ } } }; my $pointer = \ @{ $HoA{ $hash_keys{ $_ } } }; my @hi_low = sort{ $a <=> $b } @{ $HoA{ $hash_keys{ $_ } } }; my $low = shift( @hi_low ); my $hi = pop( @hi_low ); my $mean = Statistics::Basic::Mean->new( $pointer )->query; my $stddev = Statistics::Basic::StdDev->new( $pointer )->query; printf "%-17s MIN:(%8g) MEAN( %8g) MAX:(%8g) STDDEV:(%8g) POINTS(% +5d)\n", $hash_keys{$_}, $low, $mean, $hi, $stddev, $count; } sub create_hash { my @columns = split /,/, shift ; my $i=0; for( @columns ) { $i++; $hash_keys{ $i } = $_; $HoA{ $_ } = (); } } sub pop_hash { my @values = split( /,/, shift ); for( sort{ $a <=> $b } keys %hash_keys ) { push @{ $HoA{ $hash_keys{ $_ } } }, shift @values; } } print 'Stop: ' . localtime() . $/; printf 'Check memory'; <STDIN>; __END__ P:\test>463483-org Start: Sat Jun 4 11:46:43 2005 224_P003PVNO MIN:( 0) MEAN( 0) MAX:( 0) STDDEV +:( 0) POINTS(99999) 224_P003PCKT MIN:( 0) MEAN( 0) MAX:( 0) STDDEV +:( 0) POINTS(99999) 224_P003SHDF MIN:( 1) MEAN( 1) MAX:( 1) STDDEV +:( 0) POINTS(99999) 224_P003ID MIN:( 3) MEAN( 3) MAX:( 3) STDDEV +:( 0) POINTS(99999) 224_P003SEGF MIN:( 3) MEAN( 3) MAX:( 3) STDDEV +:( 0) POINTS(99999) 224_P003SCNT MIN:( 11466) MEAN( 11468) MAX:( 11470) STDDEV +:( 1.41421) POINTS(99999) 224_P003PLEN MIN:( 35) MEAN( 35) MAX:( 35) STDDEV +:( 0) POINTS(99999) 224_MCDHANSGND MIN:( 0) MEAN( 0) MAX:( 0) STDDEV +:( 0) POINTS(99999) 224_MCDH5VSVOLT MIN:( 4.96176) MEAN( 4.96176) MAX:( 4.96176) STDDEV +:(6.51035e-013) POINTS(99999) 224_MCDH5VSCUR MIN:(0.496248) MEAN( 0.496248) MAX:(0.496248) STDDEV +:(8.18678e-013) POINTS(99999) 224_MCDH33VSVOLT MIN:( 3.32078) MEAN( 3.32078) MAX:( 3.32078) STDDEV +:(4.5306e-012) POINTS(99999) 224_MCDH33VSCUR MIN:( 0.09408) MEAN( 0.09408) MAX:( 0.09408) STDDEV +:(6.10623e-015) POINTS(99999) 224_MCDH25VSVOLT MIN:( 2.51989) MEAN( 2.51989) MAX:( 2.51989) STDDEV +:(3.87734e-012) POINTS(99999) 224_MCDH25VSCUR MIN:(0.037647) MEAN( 0.037647) MAX:(0.037647) STDDEV +:(7.57797e-014) POINTS(99999) 224_PBUSURBVOLT MIN:( 6.9834) MEAN( 7.01162) MAX:( 7.01867) STDDEV +:(0.0141077) POINTS(99999) 224_PEPURLCUR MIN:( 1.275) MEAN( 1.293) MAX:( 1.32) STDDEV +:(0.014697) POINTS(99999) 224_PBATV1 MIN:(0.247917) MEAN( 0.247917) MAX:(0.247917) STDDEV +:(3.57547e-013) POINTS(99999) 224_PBATFIVOLT MIN:( 5.83) MEAN( 5.83) MAX:( 5.83) STDDEV +:(5.71099e-013) POINTS(99999) 224_PBATCUR MIN:( 0.045) MEAN( 0.045) MAX:( 0.045) STDDEV +:(5.21319e-014) POINTS(99999) 224_PBATCPOL MIN:( 49) MEAN( 49) MAX:( 49) STDDEV +:( 0) POINTS(99999) 224_PBATCPOLV MIN:( 0.735) MEAN( 0.735) MAX:( 0.735) STDDEV +:(5.01155e-013) POINTS(99999) 224_PEPSAVOLT MIN:( 7.44399) MEAN( 7.44399) MAX:( 7.44399) STDDEV +:(1.12834e-011) POINTS(99999) 224_PEPSACUR MIN:( 1.575) MEAN( 1.575) MAX:( 1.575) STDDEV +:(2.43805e-013) POINTS(99999) 224_PBATHV1 MIN:(0.125373) MEAN( 0.125373) MAX:(0.125373) STDDEV +:(1.98896e-013) POINTS(99999) 224_PBATHV2 MIN:(0.125373) MEAN( 0.125373) MAX:(0.125373) STDDEV +:(1.98896e-013) POINTS(99999) 224_PBATHV3 MIN:(0.125373) MEAN( 0.125373) MAX:(0.125373) STDDEV +:(1.98896e-013) POINTS(99999) 224_PBATHV4 MIN:(0.125373) MEAN( 0.125373) MAX:(0.125373) STDDEV +:(1.98896e-013) POINTS(99999) 224_PBATHV5 MIN:(0.125373) MEAN( 0.125373) MAX:(0.125373) STDDEV +:(1.98896e-013) POINTS(99999) 224_PBATHV6 MIN:(0.125373) MEAN( 0.125373) MAX:(0.125373) STDDEV +:(1.98896e-013) POINTS(99999) 224_PEP5VBM MIN:( 5.27354) MEAN( 5.27354) MAX:( 5.27354) STDDEV +:(7.62057e-012) POINTS(99999) 224_PBATV2 MIN:( 0) MEAN( 0) MAX:( 0) STDDEV +:( 0) POINTS(99999) 224_AMAGCUR MIN:( 0.13726) MEAN( 0.137599) MAX:(0.138108) STDDEV +:(0.000415434) POINTS(99999) 224_MCDH21VRVOLT MIN:( 2.09014) MEAN( 2.09014) MAX:( 2.09014) STDDEV +:(4.21441e-013) POINTS(99999) 224_XRCVAGCGS MIN:(0.019593) MEAN( 0.019593) MAX:(0.019593) STDDEV +:(1.28682e-014) POINTS(99999) 224_XRCVCLSTR MIN:(0.019593) MEAN( 0.019593) MAX:(0.019593) STDDEV +:(1.28682e-014) POINTS(99999) 224_XRCVRFPS MIN:( 20.753) MEAN( 20.753) MAX:( 20.753) STDDEV +:(2.24532e-011) POINTS(99999) Stop: Sat Jun 4 11:47:45 2005 Check memory 354,620 kb
Modified + results
#!/usr/bin/perl -w use strict; use Statistics::Basic::Mean; use Statistics::Basic::StdDev; print 'Start: ' . localtime() . $/; my $filename = '224_APID003_report.csv'; open READ_IN, "<$filename" or die "I can't open $filename to read.\n"; ## Extracting the label values first and storing them in an array ## saves re-sorting the hash every time ## and removes the one-time-true condtional code from the loop chomp( my $header = <READ_IN> ); my @hash_keys = split ',', $header; my %HoA; while (<READ_IN>) { chomp; pop_hash( $_ ) } close READ_IN; for( @hash_keys ) { next if m/(?:TIME|YEAR)/i; my @data = unpack 'd*', $HoA{ $_ }; ## Unpack each vector in turn my $count = @data; my( $hi, $low ) = ( sort{ $a <=> $b } @data ) [ -1, 0 ]; my $mean = Statistics::Basic::Mean->new( \@data )->query; my $stddev = Statistics::Basic::StdDev->new( \@data )->query; printf "%-17s MIN:(%8g) MEAN( %8g) MAX:(%8g) STDDEV:(%8g) POINTS(% +5d)\n", $_, $low, $mean, $hi, $stddev, $count; } sub pop_hash { no warnings 'numeric'; ## Avoid warnings from non mumeric fields my @values = split( /,/, shift ); ## Note: Sorting unnecessary as we stored the keys in an array in +their original order! for( @hash_keys ) { ## Accumulating data values as a string of packed doubles. ## instead of an array of scalars ## reduces memory consumption $HoA{ $_ } .= pack 'd', 0 + shift @values; } } print 'Stop: ' . localtime() . $/; printf 'Check memory'; <STDIN>; __END__ P:\test>463483 Start: Sat Jun 4 11:49:02 2005 224_P003PVNO MIN:( 0) MEAN( 0) MAX:( 0) STDDEV +:( 0) POINTS(99999) 224_P003PCKT MIN:( 0) MEAN( 0) MAX:( 0) STDDEV +:( 0) POINTS(99999) 224_P003SHDF MIN:( 1) MEAN( 1) MAX:( 1) STDDEV +:( 0) POINTS(99999) 224_P003ID MIN:( 3) MEAN( 3) MAX:( 3) STDDEV +:( 0) POINTS(99999) 224_P003SEGF MIN:( 3) MEAN( 3) MAX:( 3) STDDEV +:( 0) POINTS(99999) 224_P003SCNT MIN:( 11466) MEAN( 11468) MAX:( 11470) STDDEV +:( 1.41421) POINTS(99999) 224_P003PLEN MIN:( 35) MEAN( 35) MAX:( 35) STDDEV +:( 0) POINTS(99999) 224_MCDHANSGND MIN:( 0) MEAN( 0) MAX:( 0) STDDEV +:( 0) POINTS(99999) 224_MCDH5VSVOLT MIN:( 4.96176) MEAN( 4.96176) MAX:( 4.96176) STDDEV +:(6.51035e-013) POINTS(99999) 224_MCDH5VSCUR MIN:(0.496248) MEAN( 0.496248) MAX:(0.496248) STDDEV +:(8.18678e-013) POINTS(99999) 224_MCDH33VSVOLT MIN:( 3.32078) MEAN( 3.32078) MAX:( 3.32078) STDDEV +:(4.5306e-012) POINTS(99999) 224_MCDH33VSCUR MIN:( 0.09408) MEAN( 0.09408) MAX:( 0.09408) STDDEV +:(6.10623e-015) POINTS(99999) 224_MCDH25VSVOLT MIN:( 2.51989) MEAN( 2.51989) MAX:( 2.51989) STDDEV +:(3.87734e-012) POINTS(99999) 224_MCDH25VSCUR MIN:(0.037647) MEAN( 0.037647) MAX:(0.037647) STDDEV +:(7.57797e-014) POINTS(99999) 224_PBUSURBVOLT MIN:( 6.9834) MEAN( 7.01162) MAX:( 7.01867) STDDEV +:(0.0141077) POINTS(99999) 224_PEPURLCUR MIN:( 1.275) MEAN( 1.293) MAX:( 1.32) STDDEV +:(0.014697) POINTS(99999) 224_PBATV1 MIN:(0.247917) MEAN( 0.247917) MAX:(0.247917) STDDEV +:(3.57547e-013) POINTS(99999) 224_PBATFIVOLT MIN:( 5.83) MEAN( 5.83) MAX:( 5.83) STDDEV +:(5.71099e-013) POINTS(99999) 224_PBATCUR MIN:( 0.045) MEAN( 0.045) MAX:( 0.045) STDDEV +:(5.21319e-014) POINTS(99999) 224_PBATCPOL MIN:( 49) MEAN( 49) MAX:( 49) STDDEV +:( 0) POINTS(99999) 224_PBATCPOLV MIN:( 0.735) MEAN( 0.735) MAX:( 0.735) STDDEV +:(5.01155e-013) POINTS(99999) 224_PEPSAVOLT MIN:( 7.44399) MEAN( 7.44399) MAX:( 7.44399) STDDEV +:(1.12834e-011) POINTS(99999) 224_PEPSACUR MIN:( 1.575) MEAN( 1.575) MAX:( 1.575) STDDEV +:(2.43805e-013) POINTS(99999) 224_PBATHV1 MIN:(0.125373) MEAN( 0.125373) MAX:(0.125373) STDDEV +:(1.98896e-013) POINTS(99999) 224_PBATHV2 MIN:(0.125373) MEAN( 0.125373) MAX:(0.125373) STDDEV +:(1.98896e-013) POINTS(99999) 224_PBATHV3 MIN:(0.125373) MEAN( 0.125373) MAX:(0.125373) STDDEV +:(1.98896e-013) POINTS(99999) 224_PBATHV4 MIN:(0.125373) MEAN( 0.125373) MAX:(0.125373) STDDEV +:(1.98896e-013) POINTS(99999) 224_PBATHV5 MIN:(0.125373) MEAN( 0.125373) MAX:(0.125373) STDDEV +:(1.98896e-013) POINTS(99999) 224_PBATHV6 MIN:(0.125373) MEAN( 0.125373) MAX:(0.125373) STDDEV +:(1.98896e-013) POINTS(99999) 224_PEP5VBM MIN:( 5.27354) MEAN( 5.27354) MAX:( 5.27354) STDDEV +:(7.62057e-012) POINTS(99999) 224_PBATV2 MIN:( 0) MEAN( 0) MAX:( 0) STDDEV +:( 0) POINTS(99999) 224_AMAGCUR MIN:( 0.13726) MEAN( 0.137599) MAX:(0.138108) STDDEV +:(0.000415434) POINTS(99999) 224_MCDH21VRVOLT MIN:( 2.09014) MEAN( 2.09014) MAX:( 2.09014) STDDEV +:(4.21441e-013) POINTS(99999) 224_XRCVAGCGS MIN:(0.019593) MEAN( 0.019593) MAX:(0.019593) STDDEV +:(1.28682e-014) POINTS(99999) 224_XRCVCLSTR MIN:(0.019593) MEAN( 0.019593) MAX:(0.019593) STDDEV +:(1.28682e-014) POINTS(99999) 224_XRCVRFPS MIN:( 20.753) MEAN( 20.753) MAX:( 20.753) STDDEV +:(2.24532e-011) POINTS(99999) Stop: Sat Jun 4 11:50:03 2005 Check memory 52,808 kb
In reply to Re: Efficient use of memory ( 1/7th the memory requirement)
by BrowserUk
in thread Efficient use of memory
by K_M_McMahon
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |