in reply to Speeding up stalled script
Having integrated comments from fellow monks I would suggest four additional changes:
use strict; use warnings; my $start_time = time; my $input1 = shift @ARGV; my $input2 = shift @ARGV; my %dstrbtn_hash; open(IN1, '<', $input1) or die "Can't read source file $input1 : $!\n" +; while(<IN1>){ chomp; $dstrbtn_hash{ length($_) }++ unless />/; } close IN1; open(IN2, '<', $input2) or die "Can't read source file $input2 : $!\n" +; my @source; while(<IN2>){ # ignore header my $seq = <IN2>; chomp $seq; my $len = length( $seq ); push @source, [ $seq, $len ]; # keep length alongside with header } close IN2; my $filename = $input1.$input2."_extracted_seqs.fasta"; open (OUT, '>', $filename) or die "Can't write to file $filename : $!\ +n"; my $header_count = 1; while( my ($key, $freq) = each %dstrbtn_hash) { my $size = $key - 3; for my $iteration (1..$freq) { my $temp_source_seq; my @cand; EXTRACT: { @cand = map { $_->[0] } grep { $_->[1] >= $size } @source; + # filter sequences long enough die "No long enough sequence found in $input2\n" unless @c +and; my $chosen=int(rand(@cand)); $temp_source_seq = $cand[$chosen]; } START: { my $temp_source_seq_len = length ($temp_source_seq); my $random_start_coord = int(rand($temp_source_seq_len-$si +ze)); # substract $size to avoid loop my $extracted_seq = substr($temp_source_seq, $random_start +_coord, $size); print OUT ">".$header_count."extracted_seq", "\n"; print OUT $extracted_seq, "\n"; $header_count++; } } } close OUT; my $end_time = time; my $duration = ($end_time - $start_time)/60; print "Thank you for your patience, this Perl script operation has com +pleted, it took $duration minutes, good bye!", " \n";
UPDATE: Couple of changes pointed out by karlgoethebier. Thanks!
|
---|
Replies are listed 'Best First'. | |
---|---|
Re^2: Speeding up stalled script
by onlyIDleft (Scribe) on Feb 04, 2015 at 02:51 UTC |