#!/usr/bin/perl use String::LCSS_XS qw( lcss ); use utf8; use warnings; my $f1 = shift ; my $f2 = shift ; open (FILE1, "<:encoding(UTF-8)", "$f1") or die "can't open file '$f1' $!"; open (FILE2, "<:encoding(UTF-8)", "$f2") or die "can't open file '$f2' $!"; my @array1 = ; chomp(@array1); close (FILE1); my @array2 = ; chomp(@array2); close (FILE2); my $subseq; my($bestsource, $besttarget, $bestalignment, $bestlength); for my $i (0 .. $#array1) { my $best_subseq = ""; my $best_subseq_words = 0; my $best_subseq_chars = 0; my $found = 0; for my $j (0 .. $#array2) { $subseq = lcssw ("$array1[$i]", "$array2[$j]"); my $num_words = count_words ($subseq); my $num_chars = count_chars ($subseq); if ($num_words > $best_subseq_words && $num_chars > $best_subseq_chars) { $best_subseq = $subseq; $best_subseq_words = $num_words; $best_subseq_chars = $num_chars; $found = 1; } } if ($found == 1) { print "$best_subseq is the lcssw of $array1[$i]\n" } } sub lcssw { my ($s1, $s2) = @_; my $i; my %codes; my %words; for ($s1, $s2) { $_ = join '', map { $codes{$_} = chr(++$i) if !exists($codes{$_}); $codes{$_} } $_ =~ /\w+/g; } my $lcss = lcss($s1, $s2); $lcss = "" if (!defined $lcss); @words{values %codes} = keys %codes; return join ' ', @words{ $lcss =~ /./sg }; } sub count_words { my $line = shift ; my @text_words = split(/\s+/, $line); return scalar(@text_words); } sub count_chars { my $line = shift ; my @text_words = split(//, $line); return scalar(@text_words); }