#! perl -slw use strict; use Fuzzy::Matcher::DFA; use Time::HiRes qw[ time ]; sub mem{ my( $mem ) = `tasklist /nh /fo csv /fi \"pid eq $$\"` =~ m[\"([\d,]+\s+.)\"$]; $mem } our $FUZZ ||= 2; our $KEYLEN ||= 25; our $MEM ||= 0; die "Need two files" unless @ARGV == 2; my $start = time(); my $matcher = Fuzzy::Matcher::DFA->new( $FUZZ, $KEYLEN ); open FRAGS, '<', $ARGV[ 0 ] or die "$ARGV[ 1 ] : $!"; while( ) { chomp; $matcher->fuzz_store( $_ ); } close FRAGS; $matcher->prepare; open SEQ, '<', $ARGV[ 1 ] or die "$ARGV[ 0 ] : $!"; my $count = 0; while( ) { chomp; my $results = $matcher->fuzz_search( $_ ); $count += @$results / 3; printf "line:$. offset:%d fuzz:%d '%s'\n", splice @$results, 0, 3 while @$results; } my $elapsed = time() - $start; close SEQ; warn "Found $count matches in (secs): ", $elapsed, ( $MEM ? ' Mem: ' . mem() : () ), $/;