use Algorithm::Diff qw(traverse_sequences); #---------------------------------------------------------------------------------- # C O M P A R A T O R #---------------------------------------------------------------------------------- sub comparator { my $str1 = shift @_; my $str2 = shift @_; my $original = ''; my $revised = ''; my @from = split(/([ ,.:;"?!-])/, $str1); my @to = split(/([ ,.:;"?!-])/, $str2); my $OS = q||; my $OE = q||; my $RS = q||; my $RE = q||; traverse_sequences( \@from, \@to, { MATCH => sub { my $oldtext = $from[shift()]; if ($oldtext =~ /
/) { $original .= "
\n"; $revised .= "
\n" } else { $original .= "$oldtext"; $revised .= "$oldtext" } }, DISCARD_A => sub { my $oldtext = $from[shift()]; if ($oldtext =~ /
/) { $original .= "
\n" } elsif ($original =~ m!\s$!) { if ($oldtext =~ m/(?:[,.:;"?!-])/) { $original =~ s/(?:<\/span>)*\s+$/$oldtext$OE/; } else { $original =~ s/<\/span>\s$/ $oldtext$OE/; } } else { $original .= $OS.$oldtext.$OE } }, DISCARD_B => sub { my $newtext = $to[pop()]; if ($newtext =~ /
/) { $revised .= "
\n" } elsif ($revised =~ m!\s$!) { if ($newtext =~ m/(?:[,.:;"?!-])/) { $revised =~ s/(?:<\/span>)*\s+$/$newtext$RE/; } else { $revised =~ s/<\/span>\s$/ $newtext$RE/; } } else { $revised .= $RS.$newtext.$RE } }, } ); $original =~ s~^(
)(.*+)$~$2$1~; $revised =~ s~^()(.*+)$~$2$1~; return ($original, $revised); } #END SUB comparator