in reply to Speed up DNA dotplot
my $MAX = 10_000; my $WINDOW_SIZE = 5; my $MAX_MISMATCH = 1; my $seq1 = join '', qw(A T G C)[ map int rand 4, 1 .. $MAX ]; my $seq2 = join '', qw(A T G C)[ map int rand 4, 1 .. $MAX ]; sub with_regexp { my ($seq1, $seq2, $window, $mismatch) = @_; my $retval = ''; for my $start (0 .. length ($seq1) - $window - 1) { my $regex = build_regexp (substr ($seq1, $start, $window), $mi +smatch); pos $seq2 = 0; do { $retval .= $seq2 =~ m/\G(?=$regex)/gc ? 1 : 0 } while $seq2 =~ m/\G(?=.{$window})./g; $retval .= "\n"; } $retval; } sub build_parts { my ($window, $mismatch) = @_; my $l = length $window; $mismatch = $l if $mismatch > $l; return $window unless $mismatch; return '.' x $l if $l == $mismatch; my ($first, $rest) = split //, $window, 2; return ( (map $first . $_, build_parts ($rest, $mismatch)), (map '.' . $_, build_parts ($rest, $mismatch -1)), ); } sub build_regexp { join '|', map '(?:' . $_ . ')', build_parts (@_); } print with_regexps ($seq1, $seq2, $WINDOW, $MAX_MISMATCH);
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: Speed up DNA dotplot
by happy.barney (Friar) on Jul 14, 2011 at 09:35 UTC |