Category: Utility
Author/Contact Info John M. Dlugosz www.dlugosz.com/tools
Description: I got un-understandable errors from the FC.exe that comes with Windows, so I wrote my own simple file compare program that does understandable reporting on error checking (which is the whole reason for running such a program!). I figured that would be reusable, so I started generalizing to a module.

This is different from File::Compare, which does a line-by-line comparison. This module, in contrast, does not assume the data is organized as lines, but reads huge chunks and compares in large gulps and doesn't care about line ending characters.

It is designed to be derived from to provide progress indocator and results within a larger program, as needed. It has support logic to provide a canned file-compare program with just a single call.

Note: website has installer and sample program.
package Dlugosz::CompareFiles;

require 5.005_62;
use strict;
use warnings;
use Carp;
use File::Spec;
use FileHandle;

require Exporter;
our @ISA = qw(Exporter);
our %EXPORT_TAGS = ( 'all' => [ qw( compare_files ) ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT = qw();  #nothing exported by default.
our $VERSION = '0.01';

our $default_chunksize= 10*1024*1024;

sub compare_files
 {
 my $x= __PACKAGE__ ->new();
 my $flag= $x->easy_arguments (@_);
 if ($flag == 2) {
    # compare directories
    }
 elsif ($flag == 1) {
    # compare files
    # TO-DO >> if both files have names, compare size first.
    return $x->compare_handles();
    }
 else {
    # this should never happen.  easy_arguments itself carp's if not 1
+ or 2.
    carp "Unknown or unsupported operation";
    }
 }

sub new
 {
 my $class= shift;
 carp __PACKAGE__ . "::new takes package name as argument, only."  if 
+ref ($class);
 my $self= bless 
    {
       chunksize => $default_chunksize        # default chunk size for
+ reading
    }, $class;
 return $self;
 }

sub _digest_arg
 {
 my $argument= shift;
 my %info;
 if (ref($argument)) {
    # assume it's a handle.
    $info{handle}= $argument;
    $info{type}= 1;
    $info{I_close_handle}= 0;  # I didn't open it.
    }
 else {
    $argument= File::Spec->canonpath ($argument);  #specifically, remo
+ves a trailing (back?)slash, necessary for -d to work under Win32.
    # caveat: if removing the trailing slash turns it into the name of
+ an existing file, than it *should* be an error,
    # since adding the slash means you meant it to be a directory.  Th
+is won't catch that -- no standard way to access
    # the delimiter character to check for it.
    if (-d $argument) {
       $info{type}= 4;  # directory
       $info{directory}= $argument;
       }
    else {
       # assume it's the name of a file
       $info{type}= 2;  #file name
       $info{filename}= $argument;
       }
    }
 return \%info;
 }

sub _open
 {
 my $p= shift;
 unless (exists $p->{handle}) {
    $p->{handle}= new FileHandle;
    open $p->{handle}, "< $p->{filename}"
       or croak qq(Can't open file "$p->{filename}", ($!)($^E));
    binmode $p->{handle};
    $p->{I_close_handle}= 1;
    }
 }
 
sub easy_arguments
 {
 my $self= shift;
 croak "no arguments specified"  if 0 == @_;
 croak "too many arguments specified"  if 2 < @_;
 $self->{leftarg}= _digest_arg (shift @_);
 $self->{rightarg}= _digest_arg ((shift @_)||'.');
 my $mode= $self->{leftarg}{type} | $self->{rightarg}{type};
 if ($mode == 5) {
    croak "Cannot compare a handle against a directory name";
    }
 elsif ($mode == 4) { return 2; }  #compare directories
 elsif ($mode == 1) { return 1; }  # two open handles -- easy.
 if ($mode == 6) {
    # imply the same file name (as the other argument) in the specifie
+d directory.
    my ($left,$right)= ($self->{leftarg}, $self->{rightarg});
    ($left,$right)= ($right.$left)  if $left->{type} != 2;  # file nam
+e on left, dir only on right
    my ($volume, $path, $name)= File::Spec->splitpath ($left->{filenam
+e});
    $right->{filename}= File::Spec->catfile ($right->{directory}, $nam
+e);
    }
 # open 
 _open ($self->{leftarg});
 _open ($self->{rightarg});
 return 1;
 }

sub compare_handles
 {
 my $self= shift;
 my ($buf1, $buf2);
 my $offset = 0;
 my $progress_count= 0;

 for (;;) {
    my $sizeread1= read ($self->{leftarg}{handle}, $buf1, $self->{chun
+ksize});
    my $sizeread2= read ($self->{rightarg}{handle}, $buf2, $self->{chu
+nksize});
    if ($sizeread1 != $sizeread2) {
       $self->report ('readsize', $offset, $buf1, $buf2);
       return undef;  # failed to match
       }
    if ($sizeread1 == 0) {
       $self->report ('match');
       return 1;
       }
    # compare here
    if ($buf1 ne $buf2) {
       $self->report ('nonmatch', $offset, $buf1, $buf2);
       return undef;  # failed to match
       }
    # increment location
    $offset += $sizeread1;
    $self->progress (++$progress_count, $offset);
    }
 return 1;  # status for OK.
 }

sub compare_directories
 {
 }

sub report
# this is called with one of the following argument lists:
#       $self->report ('readsize', $offset, $buf1, $buf2);
#       $self->report ('nonmatch', $offset, $buf1, $buf2);
#       $self->report ('match');
 {
 my $self= shift;
 my $status= shift;
 print "report: $status\n";
 }

sub progress
 {
 my ($self, $count, $offset)= @_;
 # don't know percent yet.
 print "progress: $count chunks, $offset bytes\n";
 }

1;

__END__

=head1 NAME

CompareFiles - Perl extension for comparing binary files

=head1 SYNOPSIS

Simple "canned" usage:

  use Dlugosz::CompareFiles ('compare_files');
  compare_files (@ARGV);



=head1 DESCRIPTION

This is different from File::Compare, in that it does a line-by-line c
+omparison.  This module,
in contrast, does not assume the data is organized as lines, but reads
+ huge chunks and compares
in large gulps and doesn't care about line ending characters.


=head2 EXPORT

None by default.


=head1 AUTHOR

John M. Dlugosz, john@dlugosz.com, http://www.dlugosz.com

=head1 SEE ALSO

perl(1).

=cut