my $MAX = 10_000; my $WINDOW_SIZE = 5; my $MAX_MISMATCH = 1; my $seq1 = join '', qw(A T G C)[ map int rand 4, 1 .. $MAX ]; my $seq2 = join '', qw(A T G C)[ map int rand 4, 1 .. $MAX ]; sub with_regexp { my ($seq1, $seq2, $window, $mismatch) = @_; my $retval = ''; for my $start (0 .. length ($seq1) - $window - 1) { my $regex = build_regexp (substr ($seq1, $start, $window), $mi +smatch); pos $seq2 = 0; do { $retval .= $seq2 =~ m/\G(?=$regex)/gc ? 1 : 0 } while $seq2 =~ m/\G(?=.{$window})./g; $retval .= "\n"; } $retval; } sub build_parts { my ($window, $mismatch) = @_; my $l = length $window; $mismatch = $l if $mismatch > $l; return $window unless $mismatch; return '.' x $l if $l == $mismatch; my ($first, $rest) = split //, $window, 2; return ( (map $first . $_, build_parts ($rest, $mismatch)), (map '.' . $_, build_parts ($rest, $mismatch -1)), ); } sub build_regexp { join '|', map '(?:' . $_ . ')', build_parts (@_); } print with_regexps ($seq1, $seq2, $WINDOW, $MAX_MISMATCH);
In reply to Re: Speed up DNA dotplot
by happy.barney
in thread Speed up DNA dotplot
by Microcebus
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |