#! perl -slw use strict; use bytes; $| = 1; our $FUZZY ||= 2; open FUZ, '<', $ARGV[ 0 ] or die "$ARGV[ 0 ] : $!"; my %fuz; while( ) { chomp; $fuz{ $_ } = ''; } close FUZ; warn "Loaded ${ \scalar keys %fuz } 25-ers"; open SEQ, '< :raw', $ARGV[ 1 ] or die "$ARGV[ 1 ] : $!"; my $totalLen = 0; my $fuzzyComps = 0; while( my $seq = ) { chomp $seq; $totalLen += length $seq; for my $offset ( 0 .. length( $seq ) - 25 ) { my $ssref = \substr( $seq, $offset, 25 ); printf STDERR "\rProcessing sequence %5d offset %05d", $., $offset; for my $fuz ( keys %fuz ) { $fuzzyComps++; my $m = 25 - ( $fuz ^ $$ssref ) =~ tr[\0][\0]; if( $m <= $FUZZY ) { ## This stores the lineno/offset/fuzziness where each 25-er matched ## in a compact form for further process; sorting etc. # $fuz{ $fuz } .= pack 'nnn', $., $offset, $m; ## Or just print out the data to a file. print "Matched '$fuz' -v- '", $$ssref, "' in line: $. @ $offset with fuzziness: ", $m; } } } } warn "\n\nProcessed $. sequences"; warn "Average length: ", $totalLen / $.; warn "Total fuzzy comparisons: ", $fuzzyComps; close SEQ;