Anonymous Monk has asked for the wisdom of the Perl Monks concerning the following question:
Hi Perlmonks,
I use the following perl library for determining the edit transcript between two strings but now I want to do the same based on words and not characters. Is there any way to modify it and gain from that? Please note that I don't need the levenshtein distance but the path.
Thanks,
sub EditTranscript { my $str = shift; my $str2 = shift; my $dist; my $transcript; for (my $i = 0; $i <= length($str); $i++) { $dist->[$i]->[0] = $i; $transcript->[$i]->[0] = "D"; } for (my $i = 0; $i <= length($str2); $i++) { $dist->[0]->[$i] = $i; $transcript->[0]->[$i] = "I"; } my $cost; for (my $i = 1; $i <= length($str); $i++) { for (my $j = 1; $j <= length($str2); $j++) { if (substr($str,$i-1,1) eq substr($str2,$j-1,1)) { $cost = 0; } else { $cost = 1; } $dist->[$i]->[$j] = Min($dist->[$i-1]->[$j] + 1, $dist->[$i]->[$j-1] + 1, $dist->[$i-1]->[$j-1] + $cost); if ($dist->[$i]->[$j] eq $dist->[$i]->[$j-1] + 1) { $transcript->[$i]->[$j] = "I"; } if ($dist->[$i]->[$j] eq $dist->[$i-1]->[$j]+1) { $transcript->[$i]->[$j] = "D"; } if ($dist->[$i]->[$j] eq $dist->[$i-1]->[$j-1] + $cost) { if ($cost eq 0) { $transcript->[$i]->[$j] = "-"; } else { $transcript->[$i]->[$j] = "S"; } } } } my $st = Traceback($transcript,length($str),length($str2)); $st = scalar reverse $st; return $st; } sub Traceback { my $transcript = shift; my $i = shift; my $j = shift; my $string; while ($i > 0 || $j > 0) { if (defined $transcript->[$i]->[$j]) { $string .= $transcript->[$i]->[$j]; } last if (!defined $transcript->[$i]->[$j]); # to keep us from getting caught in loops if ($transcript->[$i]->[$j] eq "S" || $transcript->[$i]->[$j] +eq "-") { $i-- if ($i > 0); $j-- if ($j > 0); } elsif ($transcript->[$i]->[$j] eq "I") { $j-- if ($j > 0); } else { $i-- if ($i > 0); } } return $string; } sub Min { my @list = @_; @list = sort {$a <=> $b} @list; return shift @list; }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: word based levenstein distance path
by pme (Monsignor) on May 20, 2015 at 13:39 UTC | |
by Anonymous Monk on May 21, 2015 at 08:59 UTC | |
by AnomalousMonk (Archbishop) on May 21, 2015 at 09:30 UTC | |
by Anonymous Monk on May 21, 2015 at 10:37 UTC | |
|
Re: word based levenstein distance path
by hdb (Monsignor) on May 20, 2015 at 14:12 UTC |