sub comparator { my $str1 = shift @_; my $str2 = shift @_; my $original = ''; my $revised = ''; my @from = split(/((?:<[^>]+>)+|(?:\s)+|(?:\w[A-Za-z'-]*\w*)+|(?:\W|\P{IsWord})|(?:\p{IsDigit}))/, $str1); my @to = split(/((?:<[^>]+>)+|(?:\s)+|(?:\w[A-Za-z'-]*\w*)+|(?:\W|\P{IsWord})|(?:\p{IsDigit}))/, $str2); my $OS = qq||; my $OE = qq| |; my $RS = qq||; my $RE = qq| |; traverse_sequences( \@from, \@to, { MATCH => sub { my $oldtext = $from[shift()]; $original .= $oldtext; $revised .= $oldtext }, DISCARD_A => sub { my $oldtext = $from[shift()]; if ($oldtext =~ m/(?:\p{IsPunct})|(?:\s)/) {$original .= $oldtext } else { $original .= $OS.$oldtext.$OE } }, DISCARD_B => sub { my $newtext = $to[pop()]; if ($newtext =~ m/(?:\p{IsPunct})|(?:\s)/) {$revised .= $newtext } else { $revised .= $RS.$newtext.$RE } }, } ); return ($original, $revised); } #END SUB comparator