| Category: | Utility |
| Author/Contact Info | John M. Dlugosz www.dlugosz.com/tools |
| Description: | I got un-understandable errors from the FC.exe that comes with Windows, so I wrote my own simple file compare program that does understandable reporting on error checking (which is the whole reason for running such a program!). I figured that would be reusable, so I started generalizing to a module. This is different from File::Compare, which does a line-by-line comparison. This module, in contrast, does not assume the data is organized as lines, but reads huge chunks and compares in large gulps and doesn't care about line ending characters. It is designed to be derived from to provide progress indocator and results within a larger program, as needed. It has support logic to provide a canned file-compare program with just a single call. |
Note: website has installer and sample program.
package Dlugosz::CompareFiles;
require 5.005_62;
use strict;
use warnings;
use Carp;
use File::Spec;
use FileHandle;
require Exporter;
our @ISA = qw(Exporter);
our %EXPORT_TAGS = ( 'all' => [ qw( compare_files ) ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT = qw(); #nothing exported by default.
our $VERSION = '0.01';
our $default_chunksize= 10*1024*1024;
sub compare_files
{
my $x= __PACKAGE__ ->new();
my $flag= $x->easy_arguments (@_);
if ($flag == 2) {
# compare directories
}
elsif ($flag == 1) {
# compare files
# TO-DO >> if both files have names, compare size first.
return $x->compare_handles();
}
else {
# this should never happen. easy_arguments itself carp's if not 1
+ or 2.
carp "Unknown or unsupported operation";
}
}
sub new
{
my $class= shift;
carp __PACKAGE__ . "::new takes package name as argument, only." if
+ref ($class);
my $self= bless
{
chunksize => $default_chunksize # default chunk size for
+ reading
}, $class;
return $self;
}
sub _digest_arg
{
my $argument= shift;
my %info;
if (ref($argument)) {
# assume it's a handle.
$info{handle}= $argument;
$info{type}= 1;
$info{I_close_handle}= 0; # I didn't open it.
}
else {
$argument= File::Spec->canonpath ($argument); #specifically, remo
+ves a trailing (back?)slash, necessary for -d to work under Win32.
# caveat: if removing the trailing slash turns it into the name of
+ an existing file, than it *should* be an error,
# since adding the slash means you meant it to be a directory. Th
+is won't catch that -- no standard way to access
# the delimiter character to check for it.
if (-d $argument) {
$info{type}= 4; # directory
$info{directory}= $argument;
}
else {
# assume it's the name of a file
$info{type}= 2; #file name
$info{filename}= $argument;
}
}
return \%info;
}
sub _open
{
my $p= shift;
unless (exists $p->{handle}) {
$p->{handle}= new FileHandle;
open $p->{handle}, "< $p->{filename}"
or croak qq(Can't open file "$p->{filename}", ($!)($^E));
binmode $p->{handle};
$p->{I_close_handle}= 1;
}
}
sub easy_arguments
{
my $self= shift;
croak "no arguments specified" if 0 == @_;
croak "too many arguments specified" if 2 < @_;
$self->{leftarg}= _digest_arg (shift @_);
$self->{rightarg}= _digest_arg ((shift @_)||'.');
my $mode= $self->{leftarg}{type} | $self->{rightarg}{type};
if ($mode == 5) {
croak "Cannot compare a handle against a directory name";
}
elsif ($mode == 4) { return 2; } #compare directories
elsif ($mode == 1) { return 1; } # two open handles -- easy.
if ($mode == 6) {
# imply the same file name (as the other argument) in the specifie
+d directory.
my ($left,$right)= ($self->{leftarg}, $self->{rightarg});
($left,$right)= ($right.$left) if $left->{type} != 2; # file nam
+e on left, dir only on right
my ($volume, $path, $name)= File::Spec->splitpath ($left->{filenam
+e});
$right->{filename}= File::Spec->catfile ($right->{directory}, $nam
+e);
}
# open
_open ($self->{leftarg});
_open ($self->{rightarg});
return 1;
}
sub compare_handles
{
my $self= shift;
my ($buf1, $buf2);
my $offset = 0;
my $progress_count= 0;
for (;;) {
my $sizeread1= read ($self->{leftarg}{handle}, $buf1, $self->{chun
+ksize});
my $sizeread2= read ($self->{rightarg}{handle}, $buf2, $self->{chu
+nksize});
if ($sizeread1 != $sizeread2) {
$self->report ('readsize', $offset, $buf1, $buf2);
return undef; # failed to match
}
if ($sizeread1 == 0) {
$self->report ('match');
return 1;
}
# compare here
if ($buf1 ne $buf2) {
$self->report ('nonmatch', $offset, $buf1, $buf2);
return undef; # failed to match
}
# increment location
$offset += $sizeread1;
$self->progress (++$progress_count, $offset);
}
return 1; # status for OK.
}
sub compare_directories
{
}
sub report
# this is called with one of the following argument lists:
# $self->report ('readsize', $offset, $buf1, $buf2);
# $self->report ('nonmatch', $offset, $buf1, $buf2);
# $self->report ('match');
{
my $self= shift;
my $status= shift;
print "report: $status\n";
}
sub progress
{
my ($self, $count, $offset)= @_;
# don't know percent yet.
print "progress: $count chunks, $offset bytes\n";
}
1;
__END__
=head1 NAME
CompareFiles - Perl extension for comparing binary files
=head1 SYNOPSIS
Simple "canned" usage:
use Dlugosz::CompareFiles ('compare_files');
compare_files (@ARGV);
=head1 DESCRIPTION
This is different from File::Compare, in that it does a line-by-line c
+omparison. This module,
in contrast, does not assume the data is organized as lines, but reads
+ huge chunks and compares
in large gulps and doesn't care about line ending characters.
=head2 EXPORT
None by default.
=head1 AUTHOR
John M. Dlugosz, john@dlugosz.com, http://www.dlugosz.com
=head1 SEE ALSO
perl(1).
=cut
|
|
|
|---|