#!/usr/bin/perl -w use strict; require Algorithm::Diff; my $oldText= "was this a test?"; my @oldChunks= $oldText =~ /(\s+|\w+|[^\w\s]+)/g; my @oldIdxs= grep $oldChunks[$_] =~ /\S/, 0..$#oldChunks; my @oldWords= @oldChunks[@oldIdxs]; push @oldIdxs, 0+@oldChunks; my $newText= "this is a test"; my @newChunks= $newText =~ /(\s+|\w+|[^\w\s]+)/g; my @newIdxs= grep $newChunks[$_] =~ /\S/, 0..$#newChunks; my @newWords= @newChunks[@newIdxs]; push @newIdxs, 0+@newChunks; { my @old= @oldChunks[ 0 .. $oldIdxs[0]-1 ]; my @new= @newChunks[ 0 .. $newIdxs[0]-1 ]; Show( \@old, \@new ); } my $diff= Algorithm::Diff->new( \@oldWords, \@newWords ); while( $diff->Next() ) { if( ! $diff->Same() ) { my( @old, @new ); @old= @oldChunks[ $oldIdxs[$diff->Min(1)] .. $oldIdxs[$diff->Max(1)+1]-1 ] if $diff->Range(1); @new= @newChunks[ $newIdxs[$diff->Min(2)] .. $newIdxs[$diff->Max(2)+1]-1 ] if $diff->Range(2); Show( \@old, \@new ); } else { # This is the complex case, because the non-whitespace # parts are the same but the whitespace may differ: my $oldIdx= $diff->Min(1); for my $newIdx ( $diff->Range(2) ) { Show( [], [ $newChunks[$newIdxs[$newIdx]] ] ); my @old= @oldChunks[ $oldIdxs[$oldIdx]+1 .. $oldIdxs[$oldIdx+1]-1 ]; my @new= @newChunks[ $newIdxs[$newIdx]+1 .. $newIdxs[$newIdx+1]-1 ]; Show( \@old, \@new ); $oldIdx++; } } } sub Show { my $oldText= join '', @{shift @_}; my $newText= join '', @{shift @_}; print "$oldText" if '' ne $oldText && $oldText ne $newText; print $newText; }