sub comparator {
my $str1 = shift @_;
my $str2 = shift @_;
my $original = '';
my $revised = '';
my @from = split(/((?:<[^>]+>)+|(?:\s)+|(?:\w[A-Za-z'-]*\w*)+|(?:\W|\P{IsWord})|(?:\p{IsDigit}))/, $str1);
my @to = split(/((?:<[^>]+>)+|(?:\s)+|(?:\w[A-Za-z'-]*\w*)+|(?:\W|\P{IsWord})|(?:\p{IsDigit}))/, $str2);
my $OS = qq||;
my $OE = qq| |;
my $RS = qq||;
my $RE = qq| |;
traverse_sequences( \@from, \@to,
{
MATCH => sub { my $oldtext = $from[shift()]; $original .= $oldtext; $revised .= $oldtext },
DISCARD_A => sub { my $oldtext = $from[shift()]; if ($oldtext =~ m/(?:\p{IsPunct})|(?:\s)/) {$original .= $oldtext } else { $original .= $OS.$oldtext.$OE } },
DISCARD_B => sub { my $newtext = $to[pop()]; if ($newtext =~ m/(?:\p{IsPunct})|(?:\s)/) {$revised .= $newtext } else { $revised .= $RS.$newtext.$RE } },
} );
return ($original, $revised);
} #END SUB comparator