if( $fuz <= $FUZZY and $offset1+$offset2+$keyLen<=$seqLen) { #### package Fuzzy::Matcher::Xor2; use strict; use warnings; use Fuzzy::Matcher; use vars qw/$VERSION @ISA/; @ISA=qw(Fuzzy::Matcher); $VERSION=0.01; # Original implementation by [BrowserUk] # bugfixes and modified to fit Fuzzy::Matcher interface by [demerphq] sub fuzz_search { my ($self,$seq)=@_; use bytes; my $FUZZY = $self->{fuzz}; my $seqLen = length $seq; my $keyLen = $self->{strlen}; my ($masked,$pos); my @matches; for my $key ( @{$self->{str_array}} ) { my $mask = $key x ( int( $seqLen / $keyLen ) + 1 ); my $maskLen = length $mask; my $minZeros = chr( 0 ) x int( $keyLen / ( $FUZZY + 1 ) ); my $minZlen = length $minZeros; for my $offset1 ( 0 .. $keyLen-1 ) { $masked = $mask ^ substr( $seq, $offset1, $maskLen ); $pos = 0; while( $pos = 1+index $masked, $minZeros, $pos ) { $pos--; my $offset2 = $pos - ($pos % $keyLen ); my $fuz = $keyLen - ( substr( $masked, $offset2, $keyLen ) =~ tr[\0][\0] ); if( $fuz <= $FUZZY and $offset1+$offset2+$keyLen<=$seqLen) { push @matches,$offset1 + $offset2,$fuz,$key; #printf "\tFuzzy matched key:'$key' -v- '%s' in line: " # . "%2d @ %6d (%6d+%6d) with fuzziness: %d\n", # substr( $seq, $offset1 + $offset2, $keyLen ), # $., , $offset1, $offset2, $fuz; } $pos = $offset2 + $keyLen; } } } return @matches; } 1; #### package Fuzzy::Matcher; use strict; use warnings; use Carp qw(croak confess); use vars qw/$VERSION/; $VERSION=0.01; # This is a base class for fuzzy matchers to inherit. # Its where stuff that will be common to all matchers # is located. It also defines the interface that all # matchers will have to follow. # # Usage Sample: # # my $matcher=$class->new(2,25); # $matcher->fuzz_store($_) for @words; # $matcher->prepare() # for my $strings (@strings) { # my @res=$matcher->fuzz_search($strings); # } # # ---------------------------------------------------- # Constructor CLASS->new($fuzz,$strlen) # # Takes the amount of fuzz to use for matching # and the length of the strings to be matched. # # Should not be overriden. # sub new { my $class = shift; my $fuzz = shift; my $strlen = shift; my $self=bless { fuzz => $fuzz||0, strlen => $strlen, },$class; croak "Failed build!" unless $self; $self->_init(@_); return $self; } # # $obj->_init() # # This is a hook for subclass to override without # having to override the default object creation # process. It is called in void context before the # object is returned to the user with any args # remaining after the default ($fuzz,$strlen) # # By default it is a No-Op. # sub _init { } # # $obj->fuzz_store($string) # # Store a string into the object for fuzzy matching # later. # # Default behaviour is to build a hash of stored strings # for dupe checking and a corresponding array of strings. # The array is named fuzz_strings and the hash is named # str_hash. # # sub fuzz_store { my ($self,$str)=@_; push @{$self->{str_array}},$str unless $self->{str_hash}{$str}++; } # # $obj->prepare($string) # # If necessary a subclass may define this sub so # that any actions that need to occur after # adding the words but before search starts. # # By default it deletes the str_hash entry from the object to # preserve memory. # sub prepare { my ($self,$str)=@_; delete $self->{str_hash}; } # # $obj->fuzz_search($string) # # Search a string for results and return # a list of matches. The list will be # of triples so that the first match returns: # my ($match_ofs,$chars_diff,$string_matched)= # $obj->fuzz_search($string) # # Must be overriden # sub fuzz_search { confess((caller(0))[3],"() method must be overriden in ". ref($_[0])); } 1;