CPAN Algorithm::Diff method:
time to compare same two files 1000 times:
40.28 sec
39.68 sec
39.97 sec
40.17 sec
39.70 sec
39.71 sec
39.61 sec
39.82 sec
39.71 sec
39.60 sec
avg. = 39.83 sec
diffutils method:
time to compare same two files 1000 times:
11.68 sec
11.72 sec
11.63 sec
11.69 sec
11.78 sec
11.62 sec
11.62 sec
11.78 sec
11.66 sec
11.62 sec
avg. = 11.68 sec
####
CPAN Algorithm::Diff method:
time to compare 105 file pairs, 10 times:
926.7 sec
924.2 sec
time to compare 105 file pairs, 1 time:
91.02 sec
91.09 sec
91.09 sec
91.10 sec
90.93 sec
91.19 sec
93.42 sec
91.58 sec
avg time for a single comparison of 105 file pairs:
92.23 secs
diffutils method:
time to compare 105 file pairs, 10 times:
16.76 sec
16.65 sec
16.67 sec
16.68 sec
16.85 sec
16.71 sec
16.72 sec
16.80 sec
16.92 sec
16.84 sec
avg. time for single comparison of 105 file pairs:
1.676 secs
####
repeatedly compare same two files 1000 times:
average times:
Algorithm::Diff 39.83 sec
diffutils 11.68 sec
compare 105 different pairs of files 1 time:
average times:
Algorithm::Diff 92.23 sec
diffutils 1.676 sec
####
## this is the framework:
## One of the two code snippets below are
## substituted for ### DIFF ALGORITHM HERE..
#!/usr/bin/perl
use strict;
use lib "/Users/allasso/AWS/utility/cpan/lib/perl5/site_perl";
require Algorithm::Diff;
use Time::HiRes qw( time );
my($source_path_1, $source_path_2) = @ARGV;
my $holdRS = $/;
local $/;
if (! open(FH, $source_path_1)) {
print "unable to open source file 1: $source_path_1\n";
}
my $filestring_1 = ;
$/ = $holdRS;
close(FH);
$holdRS = $/;
local $/;
if (! open(FH, $source_path_2)) {
print "unable to open source file 2: $source_path_2\n";
}
my $filestring_2 = ;
$/ = $holdRS;
close(FH);
$filestring_1 =~ s@\s+@\n@g;
$filestring_2 =~ s@\s+@\n@g;
my $time = time();
for my $count (0..999) {
### DIFF ALGORITHM HERE..
}
my $time_4sig = time() - $time + .005;
$time_4sig =~ s@^(.....).*@$1@;
print STDERR "\n\net: ".$time_4sig."\n";
exit;
## this is the CPAN Algorithm::Diff code:
my @seq1 = split(/\n/, $filestring_1);
my @seq2 = split(/\n/, $filestring_2);
my $diff = Algorithm::Diff->new( \@seq1, \@seq2 );
$diff->Base( 1 ); # Return line numbers, not indices
while( $diff->Next() ) {
next if $diff->Same();
my $sep = '';
if( ! $diff->Items(2) ) {
printf "%d,%dd%d\n",
$diff->Get(qw( Min1 Max1 Max2 ));
} elsif( ! $diff->Items(1) ) {
printf "%da%d,%d\n",
$diff->Get(qw( Max1 Min2 Max2 ));
} else {
$sep = "\n---\n";
printf "%d,%dc%d,%d\n",
$diff->Get(qw( Min1 Max1 Min2 Max2 ));
}
print "< $_" for $diff->Items(1);
print $sep;
print "> $_\n" for $diff->Items(2);
}
## this is the diffutils code:
if (! open(FH, ">/tmp/diff_774885959483_1")) {
print "unable to open temporary file\n";
}
print FH "$filestring_1";
close (FH);
if (! open(FH, ">/tmp/diff_774885959483_2")) {
print "unable to open temporary file\n";
}
print FH "$filestring_2";
close (FH);
print "$source_path_1 ::: $source_path_2\n";
print `diff --suppress-common-lines -y /tmp/diff_774885959483_1 /tmp/diff_774885959483_2`;
####
#!/usr/bin/perl
use strict;
use lib "/Users/allasso/AWS/utility/cpan/lib/perl5/site_perl";
require Algorithm::Diff;
use Time::HiRes qw( time );
my($source_path_1, $source_path_2) = @ARGV;
$source_path_1 =~ s@\x2f*$@@;
$source_path_2 =~ s@\x2f*$@@;
my @src_list_1 = `find $source_path_1 -name "*.htm*"`;
my @src_list_2 = `find $source_path_2 -name "*.htm*"`;
my $time = time();
for my $count (0..9) {
my $list_cnt = 0;
for my $file_src_1 (@src_list_1) {
my $file_src_2 = $src_list_2[$list_cnt++];
chomp $file_src_1;
chomp $file_src_2;
my $holdRS = $/;
local $/;
if (! open(FH, $file_src_1)) {
print "unable to open source file 1: $file_src_1\n";
}
my $filestring_1 = ;
$/ = $holdRS;
close(FH);
$holdRS = $/;
local $/;
if (! open(FH, $file_src_2)) {
print "unable to open source file 2: $file_src_2\n";
}
my $filestring_2 = ;
$/ = $holdRS;
close(FH);
$filestring_1 =~ s@\s+@\n@g;
$filestring_2 =~ s@\s+@\n@g;
### DIFF ALGORITHM HERE..
}
}
my $time_4sig = time() - $time + .005;
$time_4sig =~ s@^(.....).*@$1@;
print STDERR "\n\net: ".$time_4sig."\n";
exit;
####
## full recursive script using CPAN Algorithm::Diff :
#!/usr/bin/perl
use strict;
use lib "/Users/allasso/AWS/utility/cpan/lib/perl5/site_perl";
require Algorithm::Diff;
use Time::HiRes qw( time );
my($source_path_1, $source_path_2) = @ARGV;
$source_path_1 =~ s@\x2f*$@@;
$source_path_2 =~ s@\x2f*$@@;
my @src_list_1 = `find $source_path_1 -name "*.htm*"`;
my @src_list_2 = `find $source_path_2 -name "*.htm*"`;
my $time = time();
for my $count (0..9) {
my $list_cnt = 0;
for my $file_src_1 (@src_list_1) {
my $file_src_2 = $src_list_2[$list_cnt++];
chomp $file_src_1;
chomp $file_src_2;
my $holdRS = $/;
local $/;
if (! open(FH, $file_src_1)) {
print "unable to open source file 1: $file_src_1\n";
}
my $filestring_1 = ;
$/ = $holdRS;
close(FH);
$holdRS = $/;
local $/;
if (! open(FH, $file_src_2)) {
print "unable to open source file 2: $file_src_2\n";
}
my $filestring_2 = ;
$/ = $holdRS;
close(FH);
$filestring_1 =~ s@\s+@\n@g;
$filestring_2 =~ s@\s+@\n@g;
## begin CPAN algorithm:
my @seq1 = split(/\n/, $filestring_1);
my @seq2 = split(/\n/, $filestring_2);
my $diff = Algorithm::Diff->new( \@seq1, \@seq2 );
$diff->Base( 1 ); # Return line numbers, not indices
while( $diff->Next() ) {
next if $diff->Same();
my $sep = '';
if( ! $diff->Items(2) ) {
printf "%d,%dd%d\n",
$diff->Get(qw( Min1 Max1 Max2 ));
} elsif( ! $diff->Items(1) ) {
printf "%da%d,%d\n",
$diff->Get(qw( Max1 Min2 Max2 ));
} else {
$sep = "\n---\n";
printf "%d,%dc%d,%d\n",
$diff->Get(qw( Min1 Max1 Min2 Max2 ));
}
print "< $_" for $diff->Items(1);
print $sep;
print "> $_\n" for $diff->Items(2);
}
## end CPAN algorithm
}
}
my $time_4sig = time() - $time + .005;
$time_4sig =~ s@^(.....).*@$1@;
print STDERR "\n\net: ".$time_4sig."\n";
exit;
## full recursive script using diffutils :
#!/usr/bin/perl
use strict;
use lib "/Users/allasso/AWS/utility/cpan/lib/perl5/site_perl";
require Algorithm::Diff;
use Time::HiRes qw( time );
my($source_path_1, $source_path_2) = @ARGV;
$source_path_1 =~ s@\x2f*$@@;
$source_path_2 =~ s@\x2f*$@@;
my @src_list_1 = `find $source_path_1 -name "*.htm*"`;
my @src_list_2 = `find $source_path_2 -name "*.htm*"`;
my $time = time();
for my $count (0..9) {
my $list_cnt = 0;
for my $file_src_1 (@src_list_1) {
my $file_src_2 = $src_list_2[$list_cnt++];
chomp $file_src_1;
chomp $file_src_2;
my $holdRS = $/;
local $/;
if (! open(FH, $file_src_1)) {
print "unable to open source file 1: $file_src_1\n";
}
my $filestring_1 = ;
$/ = $holdRS;
close(FH);
$holdRS = $/;
local $/;
if (! open(FH, $file_src_2)) {
print "unable to open source file 2: $file_src_2\n";
}
my $filestring_2 = ;
$/ = $holdRS;
close(FH);
$filestring_1 =~ s@\s+@\n@g;
$filestring_2 =~ s@\s+@\n@g;
## begin diffutils algorithm:
if (! open(FH, ">/tmp/diff_774885959483_1")) {
print "unable to open temporary file\n";
}
print FH "$filestring_1";
close (FH);
if (! open(FH, ">/tmp/diff_774885959483_2")) {
print "unable to open temporary file\n";
}
print FH "$filestring_2";
close (FH);
#print "$file_src_1 ::: $file_src_1\n";
print `diff --suppress-common-lines -y /tmp/diff_774885959483_1 /tmp/diff_774885959483_2`;
## end diffutils algorithm
}
}
my $time_4sig = time() - $time + .005;
$time_4sig =~ s@^(.....).*@$1@;
print STDERR "\n\net: ".$time_4sig."\n";
exit;