use Test::More; my @test_data = ( [ 'set 1', 'SALMWN DE EGENNHSEN TON BOOZ EK THS RAXAB BOOZ DE EGENNHSEN TON WBHD EK THS ROUQ WBHD DE EGENNHSEN TON IESSAI', 'SALMWN DE EGENNHSEN TON BOES EK THS RAXAB BOES DE EGENNHSEN TON IWBHD EK THS ROUQ IWBHD DE EGENNHSEN TON IESSAI', [ 'SALMWN DE EGENNHSEN TON ', 'DE EGENNHSEN TON IESSAI ', 'EK THS RAXAB ', 'DE EGENNHSEN TON ', 'EK THS ROUQ ' ] ], [ 'set 2', 'IOUDAS DE EGENNHSEN TON FARES KAI TON ZARA EK THS QAMAR FARES DE EGENNHSEN TON ESRWM ESRWM DE EGENNHSEN TON ARAM', 'IOUDAS DE EGENNHSEN TON FARES KAI TON ZARA EK THS QAMAR FARES DE EGENNHSEN TON ESRWM ESRWM DE EGENNHSEN TON ARAM', [ 'IOUDAS DE EGENNHSEN TON FARES KAI TON ZARA EK THS QAMAR FARES DE EGENNHSEN TON ESRWM ESRWM DE EGENNHSEN TON ARAM ' ] ], [ 'set 3', 'PASAI OUN AI GENEAI APO ABRAAM EWS DABID GENEAI DEKATESSARES KAI APO DABID EWS THS METOIKESIAS BABULWNOS GENEAI DEKATESSARES KAI APO THS METOIKESIAS BABULWNOS EWS TOU XRISTOU GENEAI DEKATESSARES', 'PASAI OUN AI GENEAI APO ABRAAM EWS DAUID GENEAI DEKATESSARES KAI APO DAUID EWS THS METOIKESIAS BABULWNOS GENEAI DEKATESSARES KAI APO THS METOIKESIAS BABULWNOS EWS TOU XRISTOU GENEAI DEKATESSARES', [ 'EWS THS METOIKESIAS BABULWNOS GENEAI DEKATESSARES KAI APO THS METOIKESIAS BABULWNOS EWS TOU XRISTOU GENEAI DEKATESSARES ', 'PASAI OUN AI GENEAI APO ABRAAM EWS ', 'GENEAI DEKATESSARES KAI APO ' ] ], ); plan 'tests' => scalar @test_data; foreach my $test (@test_data) { my $name = $test->[0]; my @input = @{$test}[ 1, 2 ]; my $wanted = $test->[3]; my @result = all_new(@input); is_deeply( \@result, $wanted, $name ); } sub all_new { my ( $str1, $str2 ) = @_; my @s1 = split( /\s+/, $str1 ); my @s2 = split( /\s+/, $str2 ); my @matrix = (); my %substrings = (); my $id = 0; for ( my $i = 0 ; $i <= $#s2 ; $i++ ) { for ( my $j = 0 ; $j <= $#s1 ; $j++ ) { if ( "$s1[$j]" eq "$s2[$i]" ) { if ( $i == 0 || $j == 0 ) { $matrix[$i][$j] = 1; } else { $matrix[$i][$j] = $matrix[ $i - 1 ][ $j - 1 ] + 1; if ( $i == $#s2 || $j == $#s1 ) { $substrings{$id}[0] = $j - $matrix[$i][$j] + 1; $substrings{$id}[1] = $j; $substrings{$id}[2] = $i - $matrix[$i][$j] + 1; $substrings{$id}[3] = $i; $id++; } } } else { $matrix[$i][$j] = 0; if ( $i != 0 && $j != 0 && $matrix[ $i - 1 ][ $j - 1 ] != 0 ) { $substrings{$id}[0] = $j - $matrix[ $i - 1 ][ $j - 1 ]; $substrings{$id}[1] = $j - 1; $substrings{$id}[2] = $i - $matrix[ $i - 1 ][ $j - 1 ]; $substrings{$id}[3] = $i - 1; $id++; } } } } my @substr_mat = (); my %map1 = (); my %map2 = (); foreach my $str ( sort { ( $substrings{$b}[1] - $substrings{$b}[0] ) <=> ( $substrings{$a}[1] - $substrings{$a}[0] ) || $substrings{$a}[0] <=> $substrings{$b}[0] } keys %substrings ) { my $substr_tmp1 = ''; my $substr_tmp2 = ''; foreach my $i ( $substrings{$str}[0] .. $substrings{$str}[1] ) { if ( !$map1{$i}++ ) { $substr_tmp1 .= "$s1[$i] "; } } next if !$substr_tmp1; foreach my $i ( $substrings{$str}[2] .. $substrings{$str}[3] ) { if ( !$map2{$i}++ ) { $substr_tmp2 .= "$s2[$i] "; } } next if !$substr_tmp2; push @substr_mat, ( length $substr_tmp1 <= length $substr_tmp2 ) ? { str => $substr_tmp1, wc => ( $substrings{$str}[1] - $substrings{$str}[0] ), site => $substrings{$str}[0] } : { str => $substr_tmp2, wc => ( $substrings{$str}[3] - $substrings{$str}[2] ), site => $substrings{$str}[0] }; } return map { $_->{str} } sort { $b->{wc} <=> $a->{wc} || $a->{site} <=> $b->{site} } @substr_mat; } #### $VAR1 = [ { 'site' => 0, 'str' => 'SALMWN DE EGENNHSEN TON ', 'wc' => 3 }, { 'site' => 17, 'str' => 'DE EGENNHSEN TON IESSAI ', 'wc' => 3 }, { 'site' => 5, 'str' => 'EK THS RAXAB ', 'wc' => 2 }, { 'site' => 9, 'str' => 'DE EGENNHSEN TON ', 'wc' => 2 }, { 'site' => 13, 'str' => 'EK THS ROUQ ', 'wc' => 2 } ];