package Fuzzy::Matcher::Ysth; use strict; use warnings; use Fuzzy::Matcher; use vars qw/$VERSION @ISA/; @ISA=qw(Fuzzy::Matcher); $VERSION=0.01; # Original implementation by [ysth] # modified to fit Fuzzy::Matcher interface by [demerphq] sub prepare { my ($self)=@_; my $fuzz=$self->{fuzz}; my $length=$self->{strlen}; die "still hardcoded for fuzz 2" if $fuzz != 2; delete $self->{str_hash}; my $fuzz_strings = delete $self->{str_array}; my $chunks = join ")(", map "."x (int($_*$length/($fuzz+1))-int(($_-1)*$length/($fuzz+1))), 1..$fuzz+1; my @filters; my $qr = qr/(?=(($chunks)))./; for my $frag (@$fuzz_strings) { my @m = $frag =~ $qr or die "something's horribly wrong, $frag =~ $qr "; push @{$filters[$_]{$m[$_]}}, $frag for 1..$#m; } $self->{ysth_filters} = \@filters; $self->{ysth_qr} = $qr; } sub fuzz_search { my ($self, $seq) = @_; die "Unprepared to fuzz_search!" unless $self->{ysth_filters}; my $fuzz=$self->{fuzz}; my @filters = @{$self->{ysth_filters}}; my $qr = $self->{ysth_qr}; my @matches; my $empty = []; while ($seq =~ /$qr/g) { my %uniq; for (map @{$filters[$_-1]{substr $seq, $-[$_], $+[$_]-$-[$_]}||$empty}, 2..$#-) { # print "trying $1 against $_ len $length fuzz $fuzz seq $seq\n"; # die; if ( ($1 ^ $_) =~ tr/\0//c <= $fuzz && !$uniq{$_}++) { push @matches, $-[0], ($1 ^ $_) =~ tr/\0//c, $_; } } } return \@matches; } 1;