in reply to Speed up DNA dotplot
Try this. It produces identical results to your posted code in less than half the time:
#! perl -s use strict; use Math::Random::MT qw[ rand srand ]; use Time::HiRes qw[ time ]; srand 1; ### CREATE TWO SAMPLE DNA SEQUENCES ### my @nucleotides = ('A','T','G','C'); my $seq1 = join '', map $nucleotides[ rand 4 ], 1 .. 1000; my $seq2 = join '', map $nucleotides[ rand 4 ], 1 .. 1000; ### SETTINGS FOR THE DOTPLOT ### my $nWindow = 5; my $maxMisses = 1; open OUT, ">ID_matrix.txt" or die $!; my $nWindow1 = ( ( length $seq1 ) - $nWindow ); my $nWindow2 = ( ( length $seq2 ) - $nWindow ); my $time_start = time; for my $off1 ( 0 .. $nWindow1 ) { my $sub1 = substr $seq1, $off1, $nWindow; for my $off2 ( 0 .. $nWindow2 ) { my $sub2 = substr $seq2, $off2, $nWindow; my $misses = $nWindow - ( ( $sub1 ^ $sub2 ) =~ tr[\0][\0] ); print OUT $misses > $maxMisses ? 0 : 1; } print OUT "\n"; } my $time_end = time; my $time_used = $time_end - $time_start; close OUT; print"Time used: $time_used seconds.\n"; __END__ c:\test>junk9 ## original Time used: 4.12800002098084 seconds. Press any key to continue . . . c:\test>914283 ## This code Time used: 1.87000012397766 seconds. c:\test>dir ID* 14/07/2011 12:19 994,008 ID_matrix.ref 14/07/2011 15:24 994,008 ID_matrix.txt c:\test>diff ID_matrix.ref ID_matrix.txt ## sanity check
|
|---|