#SPLIT THE SENTENCES INTO TOKENS FOR INDIVIDUAL COMPARISON
@tokens1 = split(/((?:<[^>]+>)+|(?:\s)+|(?:\w[A-Za-z'-]*\w*)+|(?:\W|\P{IsWord})|(?:\p{IsDigit}))/, $line1);
@tokens2 = split(/((?:<[^>]+>)+|(?:\s)+|(?:\w[A-Za-z'-]*\w*)+|(?:\W|\P{IsWord})|(?:\p{IsDigit}))/, $line2);
foreach $token (@tokens1) {
#ESCAPE CHARS TO AVOID REGEXP ISSUES IN SUBSTITUTION
$token =~ s/([][}{)\(\?.\+\*])/\\$1/g;
if (($token ne '') && ($token !~ /^(?:[ .:;'"}{\]\[\(\)!\?\*\+\-])+$/)) {
unless ($line2 =~ m/$token/gi) {
$line1 =~ s~\b($token)\b~$1~gi;
}}
}
foreach $token (@tokens2) {
$token =~ s/([][}{)\(\?.\+\*])/\\$1/g;
if (($token ne '') && ($token !~ /^(?:[ .:;'"}{\]\[\(\)!\?\*\+\-])+$/)) {
unless ($line1 =~ m/$token/gi) {
$line2 =~ s~\b($token)\b~$1~gi;
}}
}