#!/usr/bin/perl use strict; use Digest::MD5 qw/md5_base64/; @files = <>; # read list of file names (from @ARGV or STDIN) chomp @files; my %sigs; for my $file ( @files ) { local $/; # use "slurp" input mode: whole file in 1 read open( I, "<", $file ) or do { warn "$file: $!\n"; next; } $_ = ; close I; my $siz = -s $file; my $sig = md5_base64( $_ ); push @{$sigs{"$sig $siz"}, $file; } # now check for possible duplicate files my $path = "/media/hda3/2967test"; for my $sig ( grep { @{$sigs{$_}} > 1 } keys %sigs ) { my @files = @{$sigs{$sig}}; for my $i ( 1 .. $#files ) { for my $j ( 0 .. $i-1 ) { $diff = `cmp $path/$files[$i] $path/$files[$j] | wc -l`; print "$files[$i] - $files[$j] are duplicates\n" if ( $diff =~ /^\s*0/ ); } } }