use strict; use warnings; use Text::Fuzzy; use Data::Dumper; my @stringsToMatch=("extra-articular arthrodesis", "malnutritions");#my first list. I want to check if its elements are in my reference list $referenceNgrams my @goodmatches; my $referenceNgrams="EN.txt";#my huge reference file (second list) print "Loading n-grams from $referenceNgrams\n"; open my $inFH, '<:encoding(UTF-8)', $referenceNgrams or die; chomp(my @Corpus = <$inFH>); close $inFH; #Matching with Fuzzy foreach my $stringToMatch (@stringsToMatch){ print "Working on $stringToMatch\n"; foreach my $corpusElement (@Corpus){ #matching only if $stringToMatch has the same amount of elements of $corpusElement (to save time?) my $elementsInstringToMatch = 1 + ($stringToMatch =~ tr{ }{ }); my $elementsIncorpusElement = 1 + ($corpusElement =~ tr{ }{ }); if ($elementsIncorpusElement eq $elementsInstringToMatch){ my $tf = Text::Fuzzy->new ($stringToMatch); my $distance= $tf->distance ($corpusElement); if ($distance < 2){#sensibility push (@goodmatches, $stringToMatch); last;#go out of loop if match has been found } } } } print "Good matches:\n"; print Dumper @goodmatches;