#!/usr/bin/perl use strict; use Benchmark; my %methhash; my %methrange; my $t0 = Benchmark->new; open(IN1,"Methylation.gtf"); while () { chomp; if ( /^Gm10/ ) { my ( $bgn, $end, $methcount ) = (split /\t/ )[3,4,10]; $methhash{$_}{methcount} = $methcount; $methrange{$bgn}{$end} = $_; } } my @sorted_meth_starts = sort {$a<=>$b} keys %methrange; open(INPUT, "mc_11268_10.txt"); while() { next unless /^(\d+)\t(\d+)\t\S\t(\w+)/; # skip header line and any blank lines chomp; my ( $meth, $position, $strand ) = ( "Gm$1", $2, $3 ); my $indx = $#sorted_meth_starts; while ( $indx >= 0 and $position < $sorted_meth_starts[$indx] ) { $indx--; } next if ( $indx < 0 ); # skip if $position is lower than start of lowest range my $bgn = $sorted_meth_starts[$indx]; for my $end ( keys %{$methrange{$bgn}} ) { next if ( $position > $end ); # skip if position is outside this range my $match = $methrange{$bgn}{$end}; next if ( $match !~ /^$meth/ ); # skip if assembly doesn't match if ( $strand =~ /^(CH?G|CHH)$/ ) { $methhash{$match}{$strand}++; } } } open(FILEIT,">res_11268_10.txt"); print FILEIT join("\t",qw(Chromosome Gene Feature Start END CG_count_by_C CHG_count_by_C CHH_count_by_C)),"\n"; for my $ge (sort keys %methhash) { my ( $chh, $chg, $cg, $total_count ); if ( $methhash{$ge}{methcount} == 0 ) { $chh = $chg = $cg = "INF"; } else { my $denom = $methhash{$ge}{methcount}; $chh = sprintf( "%.3f", $methhash{$ge}{CHH} / $denom ); $chg = sprintf( "%.3f", $methhash{$ge}{CHG} / $denom ); $cg = sprintf( "%.3f", $methhash{$ge}{CG} / $denom ); $total_count = sprintf( "%.3f", ( $methhash{$ge}{CHH}+$methhash{$ge}{CHG}+$methhash{$ge}{CG}) / $denom ); } print FILEIT join( "\t", ( split /\t/, $ge )[0..4], $cg, $chg, $chh ),"\n"; } my $t1 = Benchmark->new; my $td = timediff($t1, $t0); print "the code took:",timestr($td),"\n";