in reply to Re^4: extract relevent lines according to array
in thread extract relevent lines according to array
If the data can fit into memory, then I like to to that way rather than deal with these things. There of course many ways to do this, here is one:
#!/usr/bin/perl -w use strict; use Data::Dumper; my %data; #a hash of array my $chrom; while ( defined(my $line =<DATA>) ) { chomp ($line); if ($line =~ /chrom=(\w+)$/) {$chrom = $1; next;} push ( @{$data{$chrom}}, $line); } my @triples = ("chr1 9837 9840", #same as your @triples "chr1 99998 99999", #just different spacing "chr2 9838 9840"); #print Dumper \%data; # uncomment this line and see what it does # a very powerful tool foreach (@triples) { my ($chrom, $start, $stop) = split; my @values = get_values(\%data, $chrom, $start, $stop); if (!@values) { print "No values for $chrom tags found between ". "$start and $stop inclusive\n"; } else { print "mean for $chrom tags {$start..$stop} is ", average(\@values),"\n"; print " values were: @values\n"; } } sub get_values { my ($HoA_ref, $chrom, $start, $stop) = @_; my @result; foreach my $number_string (@{$HoA_ref->{$chrom}}) { my ($tag, $value) = split(/\s+/,$number_string); push (@result, $value) if ($tag >= $start and $tag <= $stop); } return @result; } sub average #your average (mean) routine # { my ($array_ref) = @_; my $sum; my $count = scalar @$array_ref; foreach (@$array_ref) { $sum += $_; } return $sum / $count; } =prints mean for chr1 tags {9837..9840} is 0.00725 values were: 0.010 0.008 0.007 0.004 No values for chr1 tags found between 99998 and 99999 inclusive mean for chr2 tags {9838..9840} is 0.033 values were: 0.038 0.017 0.044 =cut __DATA__ variableStep chrom=chr1 9837 0.010 9838 0.008 9839 0.007 9840 0.004 9841 0.002 9842 0.001 variableStep chrom=chr2 9837 0.090 9838 0.038 9839 0.017 9840 0.044 9841 0.052 9842 0.091
|
|---|