in reply to Improving the Nested For Loop
Update: as moritz suggested, PDL::Stats::Basic might provide the optimal solution. My response is using plain perl.#!/tools/bin/perl use strict; use warnings; use List::Util 'sum'; my @keys; my $probes; open my $FILE, "data.txt" or die "ERROR: cannot read file $!\n"; while (<$FILE>){ chomp; my ($key, @line) = split /\t/; $probes->{$key}{vals} = \@line; $probes->{$key}{mean} = sum(@line) / @line; push @keys, $key; } close($FILE) or die $!; my $cache; ## Sum of Squared Deviations to the mean for my $key (@keys) { my $mean = $probes->{$key}{mean}; my $ss; for my $dat (@{ $probes->{$key}{vals} }) { $ss += ($dat - $mean) ** 2; } $cache->{$key} = $ss; } ## Correlation Calculation my $count = 1; for my $i (0 .. $#keys){ for my $j ($i+1 .. $#keys){ my $cor = correlation($probes, $cache, @keys[$i, $j]); # $calProbes{$probesArray[$i]."-".$probesArray[$j]} = $cor; print $count++,"\t", "$keys[$i]-$keys[$j]\t$cor\n"; } } ## Sum of Squared Deviations to the mean sub correlation { my ($probes, $cache, $key1, $key2) = @_; my $arr1 = $probes->{$key1}{vals}; my $arr2 = $probes->{$key2}{vals}; my $mean_x = $probes->{$key1}{mean}; my $mean_y = $probes->{$key2}{mean}; my ($ssxx, $ssxy, $ssyy) = ($cache->{$key1}, 0, $cache->{$key2}); for(my $i = 0; $i < @$arr1; $i++){ $ssxy += ($arr1->[$i] - $mean_x)*($arr2->[$i] - $mean_y) ; } return $ssxy/sqrt( $ssxx * $ssyy ); }
|
|---|