my @data; while(<>) { push @data, $_; } foreach my $line (@data) { my @temp_array = split "\t", $line; # Split columns into an array $temp_array[1] =~ tr/\"\-\/,/ /; #Change all potential word endings to a single space $temp_array[1] =~ tr/\(\)//d; # Remove parentheses to avoid mishaps during pattern matching $temp_array[2] =~ tr/\"\-\/,/ /; #Same as above $temp_array[2] =~ tr/\(\)//d; #Same as above my @words = split " ", $temp_array[1]; # Split first phrase into individual words for(my $i = 0; $i < @words; $i++) { my $match_count = 1; if(length ($words[$i]) < 3) { next; } elsif(length ($words[$i]) < 5) { if($words[$i] =~ /$temp_array[2]/i) { print "Match $match_count (probable): $words[$i]\n"; $match_count++; } else { next; } } else { if($words[$i] =~ /$temp_array[2]/i) { print "Match $match_count: $words[$i] \n"; $match_count++; } else { next; } } } } #### MIP_00001 Chromosomal replication initiator protein dnaA chromosomal replication initiationprotein MIP_00002 DNA polymerase III subunit beta DNA polymerase III subunit beta MIP_00003 DNA replication and repair protein recF recombination protein F MIP_00004 Hypothetical protein hypothetical protein Rv0004 MIP_00006 DNA gyrase subunit B DNA gyrase subunit B