Some variations on a Text::LevenshteinXS distance theme. The substr version (d) might be best (i.e., fastest), but I haven't Benchmark-ed anything. All these solutions find overlapping matches. All examples are of distance 1 (Update: but other distances could be used). (Sorry for any line-wrap in the output.)
>perl -wMstrict -le "use Text::LevenshteinXS qw(distance); ;; my $pattern = 'JEJE'; my $string = 'EJKJUJHJDJEJEJEDEJOJOJJJAHJHJSHJEFEJUJEJUJKIJS'; ;; my @matches = match_near_a(1, $pattern, \$string); printf 'a: '; printf q{'%s' at %2d }, @$_ for @matches; print ''; ;; @matches = match_near_b(1, $pattern, \$string); printf 'b: '; printf qq{'$_' } for @matches; print '' ;; @matches = match_near_c(1, $pattern, \$string); printf 'c: '; printf q{'%s' at %2d }, @$_ for @matches; print ''; ;; @matches = match_near_d(1, $pattern, \$string); printf 'd: '; printf q{'%s' at %2d }, @$_ for @matches; ;; sub match_near_a { my ($dist, $p, $sr) = @_; ;; my $len = length $p; local our @m; use re 'eval'; $$sr =~ m{ (.{$len}) (?{ push @m, [ $^N, $-[1] ] if distance($^N, $p) <= $dist }) (*FAIL) }xms; return @m; } ;; sub match_near_b { my ($dist, $p, $sr) = @_; ;; my $len = length $p; return grep distance($_, $p) <= $dist, $$sr =~ m{ (?= (.{$len})) }xmsg; } ;; sub match_near_c { my ($dist, $p, $sr) = @_; ;; my $len = length $p; my @matches; distance($1, $p) <= $dist and push @matches, [ $1, $-[1] ] while $$sr =~ m{ (?= (.{$len})) }xmsg; return @matches; } ;; sub match_near_d { my ($dist, $p, $sr) = @_; ;; my $len = length $p; my $max_offset = length($$sr) - $len; return map { my $ss = substr $$sr, $_, $len; distance($ss, $p) <= $dist ? [ $ss, $_ ] : (); } 0 .. $max_offset ; } " a: 'JDJE' at 7 'JEJE' at 9 'JEJE' at 11 'JEDE' at 13 'JEFE' at 3 +1 'JUJE' at 35 'JEJU' at 37 b: 'JDJE' 'JEJE' 'JEJE' 'JEDE' 'JEFE' + 'JUJE' 'JEJU' c: 'JDJE' at 7 'JEJE' at 9 'JEJE' at 11 'JEDE' at 13 'JEFE' at 3 +1 'JUJE' at 35 'JEJU' at 37 d: 'JDJE' at 7 'JEJE' at 9 'JEJE' at 11 'JEDE' at 13 'JEFE' at 3 +1 'JUJE' at 35 'JEJU' at 37
In reply to Re: approximate regular expression
by AnomalousMonk
in thread approximate regular expression
by jrblas
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |