#!/usr/bin/perl -w use strict; use English; # For pre 5.6 users. use vars qw(%patterns @lengths $buf); my $usage =<) { chomp; $patterns{$_} = 0; } close PATS; # Determine pattern lengths. my %lengths; $lengths{length($_)}++ for keys %patterns; @lengths = sort {$a<=>$b} keys %lengths; my $minlength = $lengths[0]; my $maxlength = $lengths[-1]; # Prepare to sift through the DNA data. open(DNA, $ARGV[1]) or die "Couldn't open $ARGV[1]: $!"; # We will be reading one character at a time. $INPUT_RECORD_SEPARATOR = \1; # Read data, check for matches, resize our buffer. while (my $c = ) { next if $c eq "\n"; $buf .= $c; next if length($buf) < $maxlength; check_for_matches(); $buf = substr($buf,1); } # We need to continue checking for matches near the end. while ($buf = substr($buf,1) and length($buf) >= $minlength) { check_for_matches(); } # Finally, we output the patterns which matched more than once. for (sort keys %patterns) { print "$_\n" if $patterns{$_} > 1; }