#!/usr/bin/perl use warnings; use strict; use Getopt::Long; use List::Util qw(shuffle); my $k=6; my $window=31; my $bins=2*$window+$k; my %ktc; my $sequence_fname; my $wiggle_fname; my $permute=0; my $normalize_fname; my $out='default_out'; GetOptions ( "o=s" => \$out, "s=s" => \$sequence_fname, "wig=s" => \$wiggle_fname, "n=s" => \$normalize_fname, "p=i" => \$permute); ############initialize %ktc $out=$out.'counts.tab'; my $letters=[ qw(A C G T N) ]; my @words=&combin($letters,$k ); foreach(@words){ @{$ktc{$_}}= ((0)x$bins) ; } ########## ##########Load normalization file data open(NORM,"<$normalize_fname")||die "Cannot open normalization file\n"; my %normalize; while (){ my @a=split(/\t/,$_); $normalize{$a[0]}=$a[1]; } close NORM; ##########Load position counts my %counts; open (CTS,'<',$wiggle_fname)||die "Cannot open wiggle file\n"; my $cur_chr=''; my $pos=''; my $counts=''; while (){ if($_=~/variableStep chrom=(.*?)$/){ $cur_chr=$1; $counts{$cur_chr}={}; } else{ ($pos,$counts)=split(/\t/,$_); $counts{$cur_chr}{$pos}=$counts; } } close CTS; ####### #########Map counts to orfs my $test=0; if(!($permute)){ open(SEQ,'<',$sequence_fname)||die "Cannot open sequence file\n"; while (my @line=split (/\t/,)){ my $ct=0; my $start=$line[1]; if ($start < $line[2]){##Watson Strand until ($start > $line[2]-$window){ for(my $i=0;$i<$bins;++$i){ if(defined $normalize{$line[3]} && $normalize{$line[3]} > 0 && $counts{$line[0]}{$start-$window+$i}){ ${$ktc{substr($line[4],$ct,$k)}}[$i]+=$counts{$line[0]}{$start-$window+$i}/$normalize{$line[3]}; } else{ ++$test; } } ++$start; ++$ct; } } else{##Crick Strand until($start < $line[2]+$window){ for(my $i=0;$i<$bins;++$i){ if(defined $normalize{$line[3]} && $normalize{$line[3]} > 0 && $counts{$line[0]}{$start-$window+$i}){ ${$ktc{substr($line[4],$ct,$k)}}[$i]+=$counts{$line[0]}{$start-$window+$i}/$normalize{$line[3]}; } else{ ++$test; } } --$start; ++$ct; } } } foreach my $key(keys %ktc){ if($key=~/N/){ next; } print $key,"\t", "@{$ktc{$key}}\n"; } } else{ for(my $r=0;$r<$permute;++$r){#for all permutation iterations foreach my $key(keys %counts){#for each chromosome my @values = shuffle(values %{$counts{$key}});#make a array of values $counts{$key}{$_} = shift @values for keys %{$counts{$key}};#permute the values to a read position in the chromosome } foreach(@words){ @{$ktc{$_}}= ((0)x$bins) ; } open(SEQ,'<',$sequence_fname)||die "Cannot open sequence file\n";#open the sequence file while (my @line=split (/\t/,)){#for each sequence record the nmer information my $ct=0; my $start=$line[1]; if ($start < $line[2]){##Watson Strand until ($start > $line[2]-$window){ for(my $i=0;$i<$bins;++$i){ if(defined $normalize{$line[3]} && $normalize{$line[3]} > 0 && $counts{$line[0]}{$start-$window+$i}){ ${$ktc{substr($line[4],$ct,$k)}}[$i]+=$counts{$line[0]}{$start-$window+$i}/$normalize{$line[3]}; } else{ ++$test; } } ++$start; ++$ct; } } else{##Crick Strand until($start < $line[2]+$window){ for(my $i=0;$i<$bins;++$i){ if(defined $normalize{$line[3]} && $normalize{$line[3]} > 0 && $counts{$line[0]}{$start-$window+$i}){ ${$ktc{substr($line[4],$ct,$k)}}[$i]+=$counts{$line[0]}{$start-$window+$i}/$normalize{$line[3]}; } else{ ++$test; } } --$start; ++$ct; } } } close SEQ; open(OUT,">>$out")||die "Cannot open $out\n"; foreach my $key(keys %ktc){ if($key=~/N/){#skip if the nmer contains an N # $ktc{$key}=((0)x$bins);#reset the hash next; } print OUT $key,"\t","@{$ktc{$key}}\n";#print result # $ktc{$key}=((0)x$bins);#reset hash } close OUT; } } sub combin{ my ($data, $q) = @_; return if $q < 1; my $results = $data; while (--$q) { my @new; for my $letter (@$data) { push @new, map { $letter . $_ } @$results; } # end for $letter in @$data $results = \@new; } # end while --$q is not 0 return @$results; } # end ordered_combinations ####

Filename/home/jgardin/data/profiling_data/hiseq_07-29/Sample_gap_FT/nmer_mapping_quant.pl
StatementsExecuted 74725779 statements in 63.3s
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
77578351268ms268msmain::::CORE:readline main::CORE:readline (opcode)
79820921162ms162msmain::::CORE:match main::CORE:match (opcode)
81921116.1ms16.1msmain::::CORE:print main::CORE:print (opcode)
1116.68ms13.0msmain::::BEGIN@3 main::BEGIN@3
1116.31ms6.31msmain::::combin main::combin
1111.41ms1.44msmain::::BEGIN@2 main::BEGIN@2
111234µs254µsmain::::BEGIN@2.1 main::BEGIN@2.1
111219µs466µsmain::::BEGIN@4 main::BEGIN@4
641146µs146µsmain::::CORE:open main::CORE:open (opcode)
641107µs107µsmain::::CORE:close main::CORE:close (opcode)
131112µs12µsmro::::method_changed_in mro::method_changed_in (xsub)
13118µs8µsutf8::::is_utf8 utf8::is_utf8 (xsub)
13115µs5µsInternals::::SvREADONLYInternals::SvREADONLY (xsub)
0000s0smain::::RUNTIME main::RUNTIME
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1#!/usr/bin/perl
241.55ms41.70ms
# spent 254µs (234+19) within main::BEGIN@2.1 which was called: # once (234µs+19µs) by main::RUNTIME at line 2 # spent 1.44ms (1.41+25µs) within main::BEGIN@2 which was called: # once (1.41ms+25µs) by main::RUNTIME at line 2
use warnings; use strict;
# spent 1.44ms making 1 call to main::BEGIN@2 # spent 254µs making 1 call to main::BEGIN@2.1 # spent 7µs making 1 call to warnings::import # spent 2µs making 1 call to strict::import
3280µs214.2ms
# spent 13.0ms (6.68+6.28) within main::BEGIN@3 which was called: # once (6.68ms+6.28ms) by main::RUNTIME at line 3
use Getopt::Long;
# spent 13.0ms making 1 call to main::BEGIN@3 # spent 1.26ms making 1 call to Getopt::Long::import
42784µs2504µs
# spent 466µs (219+247) within main::BEGIN@4 which was called: # once (219µs+247µs) by main::RUNTIME at line 4
use List::Util qw(shuffle);
# spent 466µs making 1 call to main::BEGIN@4 # spent 38µs making 1 call to Exporter::import
51500nsmy $k=6;
61100nsmy $window=31;
71900nsmy $bins=2*$window+$k;
81100nsmy %ktc;
91100nsmy $sequence_fname;
101100nsmy $wiggle_fname;
111200nsmy $permute=0;
121100nsmy $normalize_fname;
131500nsmy $out='default_out';
1413µs15µsGetOptions (
# spent 5µs making 1 call to Getopt::Long::GetOptions
15 "o=s" => \$out,
16 "s=s" => \$sequence_fname,
17 "wig=s" => \$wiggle_fname,
18 "n=s" => \$normalize_fname,
19 "p=i" => \$permute);
20
21############initialize %ktc
221500ns$out=$out.'counts.tab';
2311µsmy $letters=[ qw(A C G T N) ];
2411.02ms16.31msmy @words=&combin($letters,$k );
# spent 6.31ms making 1 call to main::combin
2515µsforeach(@words){
261562556.3ms @{$ktc{$_}}= ((0)x$bins) ;
27}
28##########
29##########Load normalization file data
30142µs129µsopen(NORM,"<$normalize_fname")||die "Cannot open normalization file\n";
# spent 29µs making 1 call to main::CORE:open
311400nsmy %normalize;
32118µs113µswhile (<NORM>){
# spent 13µs making 1 call to main::CORE:readline
3380664.62ms my @a=split(/\t/,$_);
34806620.3ms80662.49ms $normalize{$a[0]}=$a[1];
# spent 2.49ms making 8066 calls to main::CORE:readline, avg 309ns/call
35}
36114µs110µsclose NORM;
# spent 10µs making 1 call to main::CORE:close
37##########Load position counts
381200nsmy %counts;
3919µs16µsopen (CTS,'<',$wiggle_fname)||die "Cannot open wiggle file\n";
# spent 6µs making 1 call to main::CORE:open
401400nsmy $cur_chr='';
411200nsmy $pos='';
421300nsmy $counts='';
4318µs16µswhile (<CTS>){
# spent 6µs making 1 call to main::CORE:readline
447669592.81s1533918389ms if($_=~/variableStep chrom=(.*?)$/){
# spent 242ms making 766959 calls to main::CORE:readline, avg 316ns/call # spent 147ms making 766959 calls to main::CORE:match, avg 191ns/call
451639µs $cur_chr=$1;
461624µs $counts{$cur_chr}={};
47 }
48 else{
49766943591ms ($pos,$counts)=split(/\t/,$_);
50766943646ms $counts{$cur_chr}{$pos}=$counts;
51 }
52}
53112µs16µsclose CTS;
# spent 6µs making 1 call to main::CORE:close
54#######
55#########Map counts to orfs
561500nsmy $test=0;
571456msif(!($permute)){
58 open(SEQ,'<',$sequence_fname)||die "Cannot open sequence file\n";
59 while (my @line=split (/\t/,<SEQ>)){
60 my $ct=0;
61 my $start=$line[1];
62 if ($start < $line[2]){##Watson Strand
63 until ($start > $line[2]-$window){
64 for(my $i=0;$i<$bins;++$i){
65 if(defined $normalize{$line[3]} && $normalize{$line[3]} > 0 && $counts{$line[0]}{$start-$window+$i}){
66 ${$ktc{substr($line[4],$ct,$k)}}[$i]+=$counts{$line[0]}{$start-$window+$i}/$normalize{$line[3]};
67 }
68 else{
69 ++$test;
70 }
71 }
72
73 ++$start;
74 ++$ct;
75 }
76 }
77 else{##Crick Strand
78 until($start < $line[2]+$window){
79 for(my $i=0;$i<$bins;++$i){
80 if(defined $normalize{$line[3]} && $normalize{$line[3]} > 0 && $counts{$line[0]}{$start-$window+$i}){
81 ${$ktc{substr($line[4],$ct,$k)}}[$i]+=$counts{$line[0]}{$start-$window+$i}/$normalize{$line[3]};
82 }
83 else{
84 ++$test;
85 }
86 }
87 --$start;
88 ++$ct;
89 }
90 }
91 }
92 foreach my $key(keys %ktc){
93 if($key=~/N/){
94 next;
95 }
96 print $key,"\t", "@{$ktc{$key}}\n";
97 }
98}
99else{
10017µs for(my $r=0;$r<$permute;++$r){#for all permutation iterations
101219µs foreach my $key(keys %counts){#for each chromosome
10232443ms3219.7ms my @values = shuffle(values %{$counts{$key}});#make a array of values
# spent 19.7ms making 32 calls to List::Util::shuffle, avg 616µs/call
103321.24s $counts{$key}{$_} = shift @values for keys %{$counts{$key}};#permute the values to a read position in the chromosome
104 }
10526µs foreach(@words){
10631250197ms @{$ktc{$_}}= ((0)x$bins) ;
107 }
108268µs246µs open(SEQ,'<',$sequence_fname)||die "Cannot open sequence file\n";#open the sequence file
# spent 46µs making 2 calls to main::CORE:open, avg 23µs/call
109245.9ms75623.0ms while (my @line=split (/\t/,<SEQ>)){#for each sequence record the nmer information
# spent 23.0ms making 756 calls to main::CORE:readline, avg 30µs/call
110754237µs my $ct=0;
111754249µs my $start=$line[1];
1127541.42ms if ($start < $line[2]){##Watson Strand
113754394µs until ($start > $line[2]-$window){
114111244449.7s for(my $i=0;$i<$bins;++$i){
115 if(defined $normalize{$line[3]} && $normalize{$line[3]} > 0 && $counts{$line[0]}{$start-$window+$i}){
116 ${$ktc{substr($line[4],$ct,$k)}}[$i]+=$counts{$line[0]}{$start-$window+$i}/$normalize{$line[3]};
117 }
118 else{
119689588866.27s ++$test;
120 }
121 }
122111244466.2ms ++$start;
1231112444330ms ++$ct;
124 }
125 }
126 else{##Crick Strand
127 until($start < $line[2]+$window){
128 for(my $i=0;$i<$bins;++$i){
129 if(defined $normalize{$line[3]} && $normalize{$line[3]} > 0 && $counts{$line[0]}{$start-$window+$i}){
130 ${$ktc{substr($line[4],$ct,$k)}}[$i]+=$counts{$line[0]}{$start-$window+$i}/$normalize{$line[3]};
131 }
132 else{
133 ++$test;
134 }
135 }
136 --$start;
137 ++$ct;
138 }
139 }
140 }
141225µs215µs close SEQ;
# spent 15µs making 2 calls to main::CORE:close, avg 7µs/call
142276µs265µs open(OUT,">>$out")||die "Cannot open $out\n";
# spent 65µs making 2 calls to main::CORE:open, avg 32µs/call
143210.8ms foreach my $key(keys %ktc){
1443125075.0ms3125015.8ms if($key=~/N/){#skip if the nmer contains an N
# spent 15.8ms making 31250 calls to main::CORE:match, avg 504ns/call
145 # $ktc{$key}=((0)x$bins);#reset the hash
146230583.82ms next;
147 }
1488192358ms819216.1ms print OUT $key,"\t","@{$ktc{$key}}\n";#print result
# spent 16.1ms making 8192 calls to main::CORE:print, avg 2µs/call
149# $ktc{$key}=((0)x$bins);#reset hash
150 }
1512103µs276µs close OUT;
# spent 76µs making 2 calls to main::CORE:close, avg 38µs/call
152 }
153}
154
155
156
# spent 6.31ms within main::combin which was called: # once (6.31ms+0s) by main::RUNTIME at line 24
sub combin{
1571500ns my ($data, $q) = @_;
158
1591200ns return if $q < 1;
160
1611200ns my $results = $data;
162
1631400ns while (--$q) {
1645400ns my @new;
16552µs for my $letter (@$data) {
166254.50ms push @new, map { $letter . $_ } @$results;
167 } # end for $letter in @$data
168
169592µs $results = \@new;
170 } # end while --$q is not 0
171
17211.73ms return @$results;
173} # end ordered_combinations
 
# spent 5µs within Internals::SvREADONLY which was called 13 times, avg 392ns/call: # 13 times (5µs+0s) by constant::import at line 132 of constant.pm, avg 392ns/call
sub Internals::SvREADONLY; # xsub
# spent 107µs within main::CORE:close which was called 6 times, avg 18µs/call: # 2 times (76µs+0s) by main::RUNTIME at line 151, avg 38µs/call # 2 times (15µs+0s) by main::RUNTIME at line 141, avg 7µs/call # once (10µs+0s) by main::RUNTIME at line 36 # once (6µs+0s) by main::RUNTIME at line 53
sub main::CORE:close; # opcode
# spent 162ms within main::CORE:match which was called 798209 times, avg 203ns/call: # 766959 times (147ms+0s) by main::RUNTIME at line 44, avg 191ns/call # 31250 times (15.8ms+0s) by main::RUNTIME at line 144, avg 504ns/call
sub main::CORE:match; # opcode
# spent 146µs within main::CORE:open which was called 6 times, avg 24µs/call: # 2 times (65µs+0s) by main::RUNTIME at line 142, avg 32µs/call # 2 times (46µs+0s) by main::RUNTIME at line 108, avg 23µs/call # once (29µs+0s) by main::RUNTIME at line 30 # once (6µs+0s) by main::RUNTIME at line 39
sub main::CORE:open; # opcode
# spent 16.1ms within main::CORE:print which was called 8192 times, avg 2µs/call: # 8192 times (16.1ms+0s) by main::RUNTIME at line 148, avg 2µs/call
sub main::CORE:print; # opcode
# spent 268ms within main::CORE:readline which was called 775783 times, avg 345ns/call: # 766959 times (242ms+0s) by main::RUNTIME at line 44, avg 316ns/call # 8066 times (2.49ms+0s) by main::RUNTIME at line 34, avg 309ns/call # 756 times (23.0ms+0s) by main::RUNTIME at line 109, avg 30µs/call # once (13µs+0s) by main::RUNTIME at line 32 # once (6µs+0s) by main::RUNTIME at line 43
sub main::CORE:readline; # opcode
# spent 12µs within mro::method_changed_in which was called 13 times, avg 885ns/call: # 13 times (12µs+0s) by constant::import at line 147 of constant.pm, avg 885ns/call
sub mro::method_changed_in; # xsub
# spent 8µs within utf8::is_utf8 which was called 13 times, avg 615ns/call: # 13 times (8µs+0s) by constant::import at line 123 of constant.pm, avg 615ns/call
sub utf8::is_utf8; # xsub