## full recursive script using CPAN Algorithm::Diff : #!/usr/bin/perl use strict; use lib "/Users/allasso/AWS/utility/cpan/lib/perl5/site_perl"; require Algorithm::Diff; use Time::HiRes qw( time ); my($source_path_1, $source_path_2) = @ARGV; $source_path_1 =~ s@\x2f*$@@; $source_path_2 =~ s@\x2f*$@@; my @src_list_1 = `find $source_path_1 -name "*.htm*"`; my @src_list_2 = `find $source_path_2 -name "*.htm*"`; my $time = time(); for my $count (0..9) { my $list_cnt = 0; for my $file_src_1 (@src_list_1) { my $file_src_2 = $src_list_2[$list_cnt++]; chomp $file_src_1; chomp $file_src_2; my $holdRS = $/; local $/; if (! open(FH, $file_src_1)) { print "unable to open source file 1: $file_src_1\n"; } my $filestring_1 = ; $/ = $holdRS; close(FH); $holdRS = $/; local $/; if (! open(FH, $file_src_2)) { print "unable to open source file 2: $file_src_2\n"; } my $filestring_2 = ; $/ = $holdRS; close(FH); $filestring_1 =~ s@\s+@\n@g; $filestring_2 =~ s@\s+@\n@g; ## begin CPAN algorithm: my @seq1 = split(/\n/, $filestring_1); my @seq2 = split(/\n/, $filestring_2); my $diff = Algorithm::Diff->new( \@seq1, \@seq2 ); $diff->Base( 1 ); # Return line numbers, not indices while( $diff->Next() ) { next if $diff->Same(); my $sep = ''; if( ! $diff->Items(2) ) { printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 )); } elsif( ! $diff->Items(1) ) { printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 )); } else { $sep = "\n---\n"; printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 )); } print "< $_" for $diff->Items(1); print $sep; print "> $_\n" for $diff->Items(2); } ## end CPAN algorithm } } my $time_4sig = time() - $time + .005; $time_4sig =~ s@^(.....).*@$1@; print STDERR "\n\net: ".$time_4sig."\n"; exit; ## full recursive script using diffutils : #!/usr/bin/perl use strict; use lib "/Users/allasso/AWS/utility/cpan/lib/perl5/site_perl"; require Algorithm::Diff; use Time::HiRes qw( time ); my($source_path_1, $source_path_2) = @ARGV; $source_path_1 =~ s@\x2f*$@@; $source_path_2 =~ s@\x2f*$@@; my @src_list_1 = `find $source_path_1 -name "*.htm*"`; my @src_list_2 = `find $source_path_2 -name "*.htm*"`; my $time = time(); for my $count (0..9) { my $list_cnt = 0; for my $file_src_1 (@src_list_1) { my $file_src_2 = $src_list_2[$list_cnt++]; chomp $file_src_1; chomp $file_src_2; my $holdRS = $/; local $/; if (! open(FH, $file_src_1)) { print "unable to open source file 1: $file_src_1\n"; } my $filestring_1 = ; $/ = $holdRS; close(FH); $holdRS = $/; local $/; if (! open(FH, $file_src_2)) { print "unable to open source file 2: $file_src_2\n"; } my $filestring_2 = ; $/ = $holdRS; close(FH); $filestring_1 =~ s@\s+@\n@g; $filestring_2 =~ s@\s+@\n@g; ## begin diffutils algorithm: if (! open(FH, ">/tmp/diff_774885959483_1")) { print "unable to open temporary file\n"; } print FH "$filestring_1"; close (FH); if (! open(FH, ">/tmp/diff_774885959483_2")) { print "unable to open temporary file\n"; } print FH "$filestring_2"; close (FH); #print "$file_src_1 ::: $file_src_1\n"; print `diff --suppress-common-lines -y /tmp/diff_774885959483_1 /tmp/diff_774885959483_2`; ## end diffutils algorithm } } my $time_4sig = time() - $time + .005; $time_4sig =~ s@^(.....).*@$1@; print STDERR "\n\net: ".$time_4sig."\n"; exit;