#SPLIT THE SENTENCES INTO TOKENS FOR INDIVIDUAL COMPARISON @tokens1 = split(/((?:<[^>]+>)+|(?:\s)+|(?:\w[A-Za-z'-]*\w*)+|(?:\W|\P{IsWord})|(?:\p{IsDigit}))/, $line1); @tokens2 = split(/((?:<[^>]+>)+|(?:\s)+|(?:\w[A-Za-z'-]*\w*)+|(?:\W|\P{IsWord})|(?:\p{IsDigit}))/, $line2); foreach $token (@tokens1) { #ESCAPE CHARS TO AVOID REGEXP ISSUES IN SUBSTITUTION $token =~ s/([][}{)\(\?.\+\*])/\\$1/g; if (($token ne '') && ($token !~ /^(?:[ .:;'"}{\]\[\(\)!\?\*\+\-])+$/)) { unless ($line2 =~ m/$token/gi) { $line1 =~ s~\b($token)\b~$1~gi; }} } foreach $token (@tokens2) { $token =~ s/([][}{)\(\?.\+\*])/\\$1/g; if (($token ne '') && ($token !~ /^(?:[ .:;'"}{\]\[\(\)!\?\*\+\-])+$/)) { unless ($line1 =~ m/$token/gi) { $line2 =~ s~\b($token)\b~$1~gi; }} }