#!/usr/bin/perl # http://perlmonks.org/?node_id=1129709 use Algorithm::Diff qw(traverse_sequences); use strict; use warnings; my (@old, @new, %dictionary); sub addtodictionary { @old || @new and $dictionary{"@old"}{"@new"}++, @old = @new = (); } while(<DATA>) { my @from = split; my @to = split ' ', <DATA> // 'unmatched line'; traverse_sequences( \@from, \@to, { MATCH => sub {addtodictionary()}, DISCARD_A => sub {push @old, $from[shift()]}, DISCARD_B => sub {push @new, $to[pop()]}, } ); addtodictionary(); } use YAML; print Dump \%dictionary; __DATA__ This apple is colored red. This apple is coloured red. I need to buy a round trip ticket. I need to buy a return ticket. Jack rode the elevator to the top floor. Jack took the lift to the top floor.
produces (note that this mapping *can* be "one to many")
--- colored: coloured: 1 elevator: lift: 1 rode: took: 1 round trip: return: 1
In reply to Re: Comparing strings to extract the different words/expressions
by Anonymous Monk
in thread Comparing strings to extract the different words/expressions
by TravelAddict
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |