in reply to Help with Mean/Median for data analysis
use Data::Dumper; use strict; use warnings; my (%data, $k, $r, $n); <DATA>; while (<DATA>) { chomp; my %d; @d{qw/chr1 start end pos1 pos2 cgtype cov methy strand/} = split / +\s+/; push @{$data{"$d{'chr1'}:$d{'start'}-$d{'end'}"}}, \%d; } for $k (sort { $data{$a}[0]{'chr1'} cmp $data{$b}[0]{'chr1'} || $data{$a}[0]{'start'} <=> $data{$b}[0]{'start'} || $data{$a}[0]{'end'} <=> $data{$b}[0]{'end'} } keys %data) { my %d; $d{'cpg'} = $k; $d{'length'} = $#{$data{$k}} + 1; for $r (@{$data{$k}}) { if ($r->{'cgtype'} eq 'CG') { $d{'cg'}++; for $n (0, 5, 10) { if ($r->{'cov'} > $n) { $d{"cg>$n"}++; push @{$d{"cg>${n}vals"}}, $r->{'methy'}; } } } } for $n (0, 5, 10) { if ($d{"cg>${n}vals"}) { $d{"cg>${n}mean"} += $_ for @{$d{"cg>${n}vals"}}; $d{"cg>${n}mean"} /= $d{"cg>${n}"}; @{$d{"cg>${n}vals"}} = sort { $a <=> $b } @{$d{"cg>${n}val +s"}}; if ($d{"cg>$n"} % 2 == 1) { $d{"cg>${n}median"} = $d{"cg>${n}vals"}[$d{"cg>$n"} / +2]; } else { $d{"cg>${n}median"} = ($d{"cg>${n}vals"}[$d{"cg>$n"} / + 2 - 1] + $d{"cg>${n}vals"}[$d{"cg>$n"} / 2]) / 2; } } else { $d{"cg>${n}mean"} = 0; $d{"cg>${n}median"} = 0; } } ### DO SOMETHING WITH DATA use Data::Dumper; print Dumper(\%d); } __DATA__ chr start end pos1 pos2 CGtype Cov. Methy Stra +nd chr1 18598 19673 18676 18676 CHH 0 0 + chr1 18598 19673 18689 18689 CG 2 0 + chr1 18598 19673 18997 18997 CHH 0 0 + chr1 18598 19673 19546 19546 CG 4 0 + chr1 18598 19673 19671 19671 CHG 7 0 + chr1 124987 125426 125001 125001 CHH 1 0 + chr1 124987 125426 125226 125226 CG 0 0 + chr1 124987 125426 125426 125426 CG 0 0 + chr1 317653 318092 317653 317653 CHG 11 0 + chr1 317653 318092 317795 317795 CHG 0 0 + chr1 317653 318092 318090 318090 CHH 3 0 + chr1 427014 428027 427025 427025 CHH 0 0 + chr1 439136 440407 439687 439687 CHH 9 0 + chr1 523082 523977 523167 523167 CG 0 0 + chr1 534601 536512 535789 535789 CHH 1 0 + chr1 703847 704410 703999 703999 CHH 0 0 + chr1 752279 753308 753330 753330 CHH 0 0 + chr2 5000 100000 100000 100000 CG 1 9 + chr2 5000 100000 100000 100000 CG 3 2 + chr2 5000 100000 100000 100000 CG 6 7 + chr2 5000 100000 100000 100000 CG 9 11 +
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: Help with Mean/Median for data analysis
by Anonymous Monk on Dec 21, 2011 at 05:43 UTC | |
by TJPride (Pilgrim) on Dec 21, 2011 at 07:05 UTC |