#! perl -slw use strict; use Time::HiRes qw[ time ]; sub mem{ my( $mem ) = `tasklist /nh /fo csv /fi \"pid eq $$\"` =~ m[\"([\d,]+\s+.)\"$]; $mem } our $FUZZ ||= 2; our $KEYLEN ||= 25; our $MEM ||= 0; die "Need two files" unless @ARGV == 2; my $start = time(); open FRAGS, '<', $ARGV[ 0 ] or die "$ARGV[ 0 ] : $!"; chomp( my @fragments = ); close FRAGS; # initialization code my $chunks = join ")(", map "."x ( int( $_ * $KEYLEN / ( $FUZZ + 1 ) ) - int( ( $_ - 1 ) * $KEYLEN / ( $FUZZ + 1 ) ) ) , 1 .. $FUZZ + 1; my $qr = qr/(?=(($chunks)))./; my @filters; for my $frag ( @fragments ) { my @m = $frag =~ $qr or die "something's horribly wrong, $frag =~ $qr "; push @{ $filters[$_]{ $m[ $_ ] } }, $frag for 1..$#m; } open SEQ, '<', $ARGV[ 1 ] or die "$ARGV[ 1 ] : $!"; my $empty = []; my $count = 0; # search code while( my $seq = ) { while( $seq =~ /$qr/g ) { my %uniq; for ( map @{ $filters[ $_ - 1 ]{ substr $seq, $-[$_], $+[$_]-$-[$_] } || $empty }, 2..$#- ) { if ( ( $1 ^ $_ ) =~ tr/\0//c <= $FUZZ && !$uniq{ $_ }++ ) { ++$count; printf "line:$. offset:%d fuzz: %d '%s'\n", $-[ 0 ], ( ( $1 ^ $_ ) =~ tr/\0//c ), $_; } } } } my $elapsed = time() - $start; close SEQ; warn "Found $count matches in (secs): ", $elapsed, ( $MEM ? ' Mem: ' . mem() : () ), $/;