my @sentences = map { [ split ] } <>; #### my @uniq; $uniq[ $_ ] = { map{ $_ => 1} @{ $sentences[ $_ ] } } for 0 .. $#sentences; #### my( @aFragments, @bFragments ); for my $ai ( 0 .. $#sentences ) { my $sa = $sentences[ $ai ]; my $ua = $uniq[ $ai ]; for my $bi ( $ai+1 .. $#sentences ) { my $sb = $sentences[ $bi ]; my $ub = $uniq[ $bi ]; ## process sentence[ $ai ] against sentences[ $bi ] } } #### sub fragmentSentence { my( $sa, $ub ) = @_; return reduce{ exists $ub->{ $b } ? push @{ $a->[ $#$a ] }, $b : push @{ $a }, [] ; $a; } [[]], @{ $sa }; } #### ## if there are no common words between the sentences, exit early next unless first{ exists $ua->{ $_ } } keys %{ $ub }; ## fragment both sentences, discarding one-word fragments ## and sorting them by number of words, longest first. @aFragments = sort{ @$b <=> @$a } grep{ @$_ > 1 } @{ fragmentSentence( $sa, $ub ) }; @bFragments = sort{ @$b <=> @$a } grep{ @$_ > 1 } @{ fragmentSentence( $sb, $ua ) }; ## Exit early if either sentence has no remaining fragments next unless @aFragments and @bFragments; my @best; ## For each A fragment for my $aFrag ( @aFragments ) { ## early exit if this fragment is shorter than the best so far next if @$aFrag <= @best; ## for each B fragment for my $bFrag ( @bFragments ) { ## Ditto early exit next if @$bFrag <= @best; ## Finally, perform the LCS algorithm my @lcws = lcws( $aFrag, $bFrag ); ## And save if its the longest yet seen @best = @lcws if @lcws > @best } } ## Skip if we didn't find one next unless @best; ## Output the sentence numbers and the best LCWS we found. printf "( %4d / %4d )=>[ %s ]\n", $ai, $bi, join( ' ', @best ); #### #! perl -slw use strict; #use List::Util qw[ reduce ]; our $MAX ||= 0; $|++; ## The pure perl verion of List::Util reduce() and first() subs included here ## because the XS versions leak like a sieve in this application. sub reduce (&@) { my $code = shift; no strict 'refs'; return shift unless @_ > 1; my $caller = caller; local(*{$caller."::a"}) = \my $a; local(*{$caller."::b"}) = \my $b; $a = shift; foreach (@_) { $b = $_; $a = &{$code}(); } $a; } sub first (&@) { my $code = shift; my $x; $x = &$code() and return $x for @_; } sub fragmentSentence { my( $sa, $ub ) = @_; return reduce{ exists $ub->{ $b } ? push @{ $a->[ $#$a ] }, $b : push @{ $a }, [] ; $a; } [[]], @{ $sa }; } sub lcws { ## longest common word sequence my( $a, $b ) = @_; ( $a, $b ) = ( $b, $a ) if @{ $a } > @{ $b }; my $aString = "@{ $a }"; my @best; for my $start ( 0 .. $#{ $b } - 1 ) { for my $length ( reverse 1 .. $#{ $b } - $start ) { last if $length < @best; if( 1 + index $aString, qq[ @{$b}[$start..$start+$length] ] ) { @best = @{ $b }[ $start .. $start + $length ]; } } } return @best; } my @sentences = map { [ split ] } <>; my @uniq; $uniq[ $_ ] = { map{ $_ => 1} @{ $sentences[ $_ ] } } for 0 .. $#sentences; my( @aFragments, @bFragments ); for my $ai ( 0 .. ( $MAX || $#sentences ) ) { my $sa = $sentences[ $ai ]; my $ua = $uniq[ $ai ]; for my $bi ( $ai+1 .. ( $MAX || $#sentences ) ) { my $sb = $sentences[ $bi ]; my $ub = $uniq[ $bi ]; next unless first{ exists $ua->{ $_ } } keys %{ $ub }; @aFragments = sort{ @$b <=> @$a } grep{ @$_ > 1 } @{ fragmentSentence( $sa, $ub ) }; @bFragments = sort{ @$b <=> @$a } grep{ @$_ > 1 } @{ fragmentSentence( $sb, $ua ) }; next unless @aFragments and @bFragments; my @best; for my $aFrag ( @aFragments ) { next if @$aFrag <= @best; for my $bFrag ( @bFragments ) { next if @$bFrag <= @best; my @lcws = lcws( $aFrag, $bFrag ); @best = @lcws if @lcws > @best } } next unless @best; printf "( %4d / %4d )=>[ %s ]\n", $ai, $bi, join( ' ', @best ); } }