my @sentences = map { [ split ] } <>;
####
my @uniq;
$uniq[ $_ ] = { map{ $_ => 1} @{ $sentences[ $_ ] } } for 0 .. $#sentences;
####
my( @aFragments, @bFragments );
for my $ai ( 0 .. $#sentences ) {
my $sa = $sentences[ $ai ];
my $ua = $uniq[ $ai ];
for my $bi ( $ai+1 .. $#sentences ) {
my $sb = $sentences[ $bi ];
my $ub = $uniq[ $bi ];
## process sentence[ $ai ] against sentences[ $bi ]
}
}
####
sub fragmentSentence {
my( $sa, $ub ) = @_;
return reduce{
exists $ub->{ $b }
? push @{ $a->[ $#$a ] }, $b
: push @{ $a }, []
;
$a;
} [[]], @{ $sa };
}
####
## if there are no common words between the sentences, exit early
next unless first{ exists $ua->{ $_ } } keys %{ $ub };
## fragment both sentences, discarding one-word fragments
## and sorting them by number of words, longest first.
@aFragments = sort{ @$b <=> @$a } grep{ @$_ > 1 }
@{ fragmentSentence( $sa, $ub ) };
@bFragments = sort{ @$b <=> @$a } grep{ @$_ > 1 }
@{ fragmentSentence( $sb, $ua ) };
## Exit early if either sentence has no remaining fragments
next unless @aFragments and @bFragments;
my @best;
## For each A fragment
for my $aFrag ( @aFragments ) {
## early exit if this fragment is shorter than the best so far
next if @$aFrag <= @best;
## for each B fragment
for my $bFrag ( @bFragments ) {
## Ditto early exit
next if @$bFrag <= @best;
## Finally, perform the LCS algorithm
my @lcws = lcws( $aFrag, $bFrag );
## And save if its the longest yet seen
@best = @lcws if @lcws > @best
}
}
## Skip if we didn't find one
next unless @best;
## Output the sentence numbers and the best LCWS we found.
printf "( %4d / %4d )=>[ %s ]\n", $ai, $bi, join( ' ', @best );
####
#! perl -slw
use strict;
#use List::Util qw[ reduce ];
our $MAX ||= 0;
$|++;
## The pure perl verion of List::Util reduce() and first() subs included here
## because the XS versions leak like a sieve in this application.
sub reduce (&@) {
my $code = shift;
no strict 'refs';
return shift unless @_ > 1;
my $caller = caller;
local(*{$caller."::a"}) = \my $a;
local(*{$caller."::b"}) = \my $b;
$a = shift;
foreach (@_) {
$b = $_;
$a = &{$code}();
}
$a;
}
sub first (&@) {
my $code = shift;
my $x;
$x = &$code() and return $x for @_;
}
sub fragmentSentence {
my( $sa, $ub ) = @_;
return reduce{
exists $ub->{ $b }
? push @{ $a->[ $#$a ] }, $b
: push @{ $a }, []
;
$a;
} [[]], @{ $sa };
}
sub lcws { ## longest common word sequence
my( $a, $b ) = @_;
( $a, $b ) = ( $b, $a ) if @{ $a } > @{ $b };
my $aString = "@{ $a }";
my @best;
for my $start ( 0 .. $#{ $b } - 1 ) {
for my $length ( reverse 1 .. $#{ $b } - $start ) {
last if $length < @best;
if( 1 + index $aString, qq[ @{$b}[$start..$start+$length] ] ) {
@best = @{ $b }[ $start .. $start + $length ];
}
}
}
return @best;
}
my @sentences = map { [ split ] } <>;
my @uniq;
$uniq[ $_ ] = { map{ $_ => 1} @{ $sentences[ $_ ] } } for 0 .. $#sentences;
my( @aFragments, @bFragments );
for my $ai ( 0 .. ( $MAX || $#sentences ) ) {
my $sa = $sentences[ $ai ];
my $ua = $uniq[ $ai ];
for my $bi ( $ai+1 .. ( $MAX || $#sentences ) ) {
my $sb = $sentences[ $bi ];
my $ub = $uniq[ $bi ];
next unless first{ exists $ua->{ $_ } } keys %{ $ub };
@aFragments = sort{ @$b <=> @$a } grep{ @$_ > 1 }
@{ fragmentSentence( $sa, $ub ) };
@bFragments = sort{ @$b <=> @$a } grep{ @$_ > 1 }
@{ fragmentSentence( $sb, $ua ) };
next unless @aFragments and @bFragments;
my @best;
for my $aFrag ( @aFragments ) {
next if @$aFrag <= @best;
for my $bFrag ( @bFragments ) {
next if @$bFrag <= @best;
my @lcws = lcws( $aFrag, $bFrag );
@best = @lcws if @lcws > @best
}
}
next unless @best;
printf "( %4d / %4d )=>[ %s ]\n", $ai, $bi, join( ' ', @best );
}
}