Note: website has installer and sample program.
package Dlugosz::CompareFiles; require 5.005_62; use strict; use warnings; use Carp; use File::Spec; use FileHandle; require Exporter; our @ISA = qw(Exporter); our %EXPORT_TAGS = ( 'all' => [ qw( compare_files ) ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw(); #nothing exported by default. our $VERSION = '0.01'; our $default_chunksize= 10*1024*1024; sub compare_files { my $x= __PACKAGE__ ->new(); my $flag= $x->easy_arguments (@_); if ($flag == 2) { # compare directories } elsif ($flag == 1) { # compare files # TO-DO >> if both files have names, compare size first. return $x->compare_handles(); } else { # this should never happen. easy_arguments itself carp's if not 1 + or 2. carp "Unknown or unsupported operation"; } } sub new { my $class= shift; carp __PACKAGE__ . "::new takes package name as argument, only." if +ref ($class); my $self= bless { chunksize => $default_chunksize # default chunk size for + reading }, $class; return $self; } sub _digest_arg { my $argument= shift; my %info; if (ref($argument)) { # assume it's a handle. $info{handle}= $argument; $info{type}= 1; $info{I_close_handle}= 0; # I didn't open it. } else { $argument= File::Spec->canonpath ($argument); #specifically, remo +ves a trailing (back?)slash, necessary for -d to work under Win32. # caveat: if removing the trailing slash turns it into the name of + an existing file, than it *should* be an error, # since adding the slash means you meant it to be a directory. Th +is won't catch that -- no standard way to access # the delimiter character to check for it. if (-d $argument) { $info{type}= 4; # directory $info{directory}= $argument; } else { # assume it's the name of a file $info{type}= 2; #file name $info{filename}= $argument; } } return \%info; } sub _open { my $p= shift; unless (exists $p->{handle}) { $p->{handle}= new FileHandle; open $p->{handle}, "< $p->{filename}" or croak qq(Can't open file "$p->{filename}", ($!)($^E)); binmode $p->{handle}; $p->{I_close_handle}= 1; } } sub easy_arguments { my $self= shift; croak "no arguments specified" if 0 == @_; croak "too many arguments specified" if 2 < @_; $self->{leftarg}= _digest_arg (shift @_); $self->{rightarg}= _digest_arg ((shift @_)||'.'); my $mode= $self->{leftarg}{type} | $self->{rightarg}{type}; if ($mode == 5) { croak "Cannot compare a handle against a directory name"; } elsif ($mode == 4) { return 2; } #compare directories elsif ($mode == 1) { return 1; } # two open handles -- easy. if ($mode == 6) { # imply the same file name (as the other argument) in the specifie +d directory. my ($left,$right)= ($self->{leftarg}, $self->{rightarg}); ($left,$right)= ($right.$left) if $left->{type} != 2; # file nam +e on left, dir only on right my ($volume, $path, $name)= File::Spec->splitpath ($left->{filenam +e}); $right->{filename}= File::Spec->catfile ($right->{directory}, $nam +e); } # open _open ($self->{leftarg}); _open ($self->{rightarg}); return 1; } sub compare_handles { my $self= shift; my ($buf1, $buf2); my $offset = 0; my $progress_count= 0; for (;;) { my $sizeread1= read ($self->{leftarg}{handle}, $buf1, $self->{chun +ksize}); my $sizeread2= read ($self->{rightarg}{handle}, $buf2, $self->{chu +nksize}); if ($sizeread1 != $sizeread2) { $self->report ('readsize', $offset, $buf1, $buf2); return undef; # failed to match } if ($sizeread1 == 0) { $self->report ('match'); return 1; } # compare here if ($buf1 ne $buf2) { $self->report ('nonmatch', $offset, $buf1, $buf2); return undef; # failed to match } # increment location $offset += $sizeread1; $self->progress (++$progress_count, $offset); } return 1; # status for OK. } sub compare_directories { } sub report # this is called with one of the following argument lists: # $self->report ('readsize', $offset, $buf1, $buf2); # $self->report ('nonmatch', $offset, $buf1, $buf2); # $self->report ('match'); { my $self= shift; my $status= shift; print "report: $status\n"; } sub progress { my ($self, $count, $offset)= @_; # don't know percent yet. print "progress: $count chunks, $offset bytes\n"; } 1; __END__ =head1 NAME CompareFiles - Perl extension for comparing binary files =head1 SYNOPSIS Simple "canned" usage: use Dlugosz::CompareFiles ('compare_files'); compare_files (@ARGV); =head1 DESCRIPTION This is different from File::Compare, in that it does a line-by-line c +omparison. This module, in contrast, does not assume the data is organized as lines, but reads + huge chunks and compares in large gulps and doesn't care about line ending characters. =head2 EXPORT None by default. =head1 AUTHOR John M. Dlugosz, john@dlugosz.com, http://www.dlugosz.com =head1 SEE ALSO perl(1). =cut

In reply to File Compare by John M. Dlugosz

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.