if( $fuz <= $FUZZY and $offset1+$offset2+$keyLen<=$seqLen) {
####
package Fuzzy::Matcher::Xor2;
use strict;
use warnings;
use Fuzzy::Matcher;
use vars qw/$VERSION @ISA/;
@ISA=qw(Fuzzy::Matcher);
$VERSION=0.01;
# Original implementation by [BrowserUk]
# bugfixes and modified to fit Fuzzy::Matcher interface by [demerphq]
sub fuzz_search {
my ($self,$seq)=@_;
use bytes;
my $FUZZY = $self->{fuzz};
my $seqLen = length $seq;
my $keyLen = $self->{strlen};
my ($masked,$pos);
my @matches;
for my $key ( @{$self->{str_array}} ) {
my $mask = $key x ( int( $seqLen / $keyLen ) + 1 );
my $maskLen = length $mask;
my $minZeros = chr( 0 ) x int( $keyLen / ( $FUZZY + 1 ) );
my $minZlen = length $minZeros;
for my $offset1 ( 0 .. $keyLen-1 ) {
$masked = $mask ^ substr( $seq, $offset1, $maskLen );
$pos = 0;
while(
$pos = 1+index $masked, $minZeros, $pos
) {
$pos--;
my $offset2 = $pos - ($pos % $keyLen );
my $fuz = $keyLen -
( substr( $masked, $offset2, $keyLen ) =~ tr[\0][\0] );
if( $fuz <= $FUZZY and $offset1+$offset2+$keyLen<=$seqLen) {
push @matches,$offset1 + $offset2,$fuz,$key;
#printf "\tFuzzy matched key:'$key' -v- '%s' in line: "
# . "%2d @ %6d (%6d+%6d) with fuzziness: %d\n",
# substr( $seq, $offset1 + $offset2, $keyLen ),
# $., , $offset1, $offset2, $fuz;
}
$pos = $offset2 + $keyLen;
}
}
}
return @matches;
}
1;
####
package Fuzzy::Matcher;
use strict;
use warnings;
use Carp qw(croak confess);
use vars qw/$VERSION/;
$VERSION=0.01;
# This is a base class for fuzzy matchers to inherit.
# Its where stuff that will be common to all matchers
# is located. It also defines the interface that all
# matchers will have to follow.
#
# Usage Sample:
#
# my $matcher=$class->new(2,25);
# $matcher->fuzz_store($_) for @words;
# $matcher->prepare()
# for my $strings (@strings) {
# my @res=$matcher->fuzz_search($strings);
# }
#
# ----------------------------------------------------
# Constructor CLASS->new($fuzz,$strlen)
#
# Takes the amount of fuzz to use for matching
# and the length of the strings to be matched.
#
# Should not be overriden.
#
sub new {
my $class = shift;
my $fuzz = shift;
my $strlen = shift;
my $self=bless {
fuzz => $fuzz||0,
strlen => $strlen,
},$class;
croak "Failed build!" unless $self;
$self->_init(@_);
return $self;
}
#
# $obj->_init()
#
# This is a hook for subclass to override without
# having to override the default object creation
# process. It is called in void context before the
# object is returned to the user with any args
# remaining after the default ($fuzz,$strlen)
#
# By default it is a No-Op.
#
sub _init { }
#
# $obj->fuzz_store($string)
#
# Store a string into the object for fuzzy matching
# later.
#
# Default behaviour is to build a hash of stored strings
# for dupe checking and a corresponding array of strings.
# The array is named fuzz_strings and the hash is named
# str_hash.
#
#
sub fuzz_store {
my ($self,$str)=@_;
push @{$self->{str_array}},$str
unless $self->{str_hash}{$str}++;
}
#
# $obj->prepare($string)
#
# If necessary a subclass may define this sub so
# that any actions that need to occur after
# adding the words but before search starts.
#
# By default it deletes the str_hash entry from the object to
# preserve memory.
#
sub prepare {
my ($self,$str)=@_;
delete $self->{str_hash};
}
#
# $obj->fuzz_search($string)
#
# Search a string for results and return
# a list of matches. The list will be
# of triples so that the first match returns:
# my ($match_ofs,$chars_diff,$string_matched)=
# $obj->fuzz_search($string)
#
# Must be overriden
#
sub fuzz_search {
confess((caller(0))[3],"() method must be overriden in ".
ref($_[0]));
}
1;