package Dlugosz::CompareFiles; require 5.005_62; use strict; use warnings; use Carp; use File::Spec; use FileHandle; require Exporter; our @ISA = qw(Exporter); our %EXPORT_TAGS = ( 'all' => [ qw( compare_files ) ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw(); #nothing exported by default. our $VERSION = '0.01'; our $default_chunksize= 10*1024*1024; sub compare_files { my $x= __PACKAGE__ ->new(); my $flag= $x->easy_arguments (@_); if ($flag == 2) { # compare directories } elsif ($flag == 1) { # compare files # TO-DO >> if both files have names, compare size first. return $x->compare_handles(); } else { # this should never happen. easy_arguments itself carp's if not 1 or 2. carp "Unknown or unsupported operation"; } } sub new { my $class= shift; carp __PACKAGE__ . "::new takes package name as argument, only." if ref ($class); my $self= bless { chunksize => $default_chunksize # default chunk size for reading }, $class; return $self; } sub _digest_arg { my $argument= shift; my %info; if (ref($argument)) { # assume it's a handle. $info{handle}= $argument; $info{type}= 1; $info{I_close_handle}= 0; # I didn't open it. } else { $argument= File::Spec->canonpath ($argument); #specifically, removes a trailing (back?)slash, necessary for -d to work under Win32. # caveat: if removing the trailing slash turns it into the name of an existing file, than it *should* be an error, # since adding the slash means you meant it to be a directory. This won't catch that -- no standard way to access # the delimiter character to check for it. if (-d $argument) { $info{type}= 4; # directory $info{directory}= $argument; } else { # assume it's the name of a file $info{type}= 2; #file name $info{filename}= $argument; } } return \%info; } sub _open { my $p= shift; unless (exists $p->{handle}) { $p->{handle}= new FileHandle; open $p->{handle}, "< $p->{filename}" or croak qq(Can't open file "$p->{filename}", ($!)($^E)); binmode $p->{handle}; $p->{I_close_handle}= 1; } } sub easy_arguments { my $self= shift; croak "no arguments specified" if 0 == @_; croak "too many arguments specified" if 2 < @_; $self->{leftarg}= _digest_arg (shift @_); $self->{rightarg}= _digest_arg ((shift @_)||'.'); my $mode= $self->{leftarg}{type} | $self->{rightarg}{type}; if ($mode == 5) { croak "Cannot compare a handle against a directory name"; } elsif ($mode == 4) { return 2; } #compare directories elsif ($mode == 1) { return 1; } # two open handles -- easy. if ($mode == 6) { # imply the same file name (as the other argument) in the specified directory. my ($left,$right)= ($self->{leftarg}, $self->{rightarg}); ($left,$right)= ($right.$left) if $left->{type} != 2; # file name on left, dir only on right my ($volume, $path, $name)= File::Spec->splitpath ($left->{filename}); $right->{filename}= File::Spec->catfile ($right->{directory}, $name); } # open _open ($self->{leftarg}); _open ($self->{rightarg}); return 1; } sub compare_handles { my $self= shift; my ($buf1, $buf2); my $offset = 0; my $progress_count= 0; for (;;) { my $sizeread1= read ($self->{leftarg}{handle}, $buf1, $self->{chunksize}); my $sizeread2= read ($self->{rightarg}{handle}, $buf2, $self->{chunksize}); if ($sizeread1 != $sizeread2) { $self->report ('readsize', $offset, $buf1, $buf2); return undef; # failed to match } if ($sizeread1 == 0) { $self->report ('match'); return 1; } # compare here if ($buf1 ne $buf2) { $self->report ('nonmatch', $offset, $buf1, $buf2); return undef; # failed to match } # increment location $offset += $sizeread1; $self->progress (++$progress_count, $offset); } return 1; # status for OK. } sub compare_directories { } sub report # this is called with one of the following argument lists: # $self->report ('readsize', $offset, $buf1, $buf2); # $self->report ('nonmatch', $offset, $buf1, $buf2); # $self->report ('match'); { my $self= shift; my $status= shift; print "report: $status\n"; } sub progress { my ($self, $count, $offset)= @_; # don't know percent yet. print "progress: $count chunks, $offset bytes\n"; } 1; __END__ =head1 NAME CompareFiles - Perl extension for comparing binary files =head1 SYNOPSIS Simple "canned" usage: use Dlugosz::CompareFiles ('compare_files'); compare_files (@ARGV); =head1 DESCRIPTION This is different from File::Compare, in that it does a line-by-line comparison. This module, in contrast, does not assume the data is organized as lines, but reads huge chunks and compares in large gulps and doesn't care about line ending characters. =head2 EXPORT None by default. =head1 AUTHOR John M. Dlugosz, john@dlugosz.com, http://www.dlugosz.com =head1 SEE ALSO perl(1). =cut