Hi, I wrote a similar script using the MD5 hash for detecting duplicates. It is not cleaned up or optimized - but does the intended job. It is now in my tool collection.
#!/usr/bin/perl # # Find duplicate files in specified directories using md5sum values to # identify duplicates. # # (C) 2009 S.M.Mahesh use strict; use warnings; use File::Find; use Digest::MD5; my $version = 0.1; my %md5sums; my $md5 = Digest::MD5->new(); sub Usage() { print<<USAGEDOC; $0 v$version - FindDuplicate script USAGE: $0 <DIR1> [DIR2...DIRn] where, DIR1..DIRn Specifies the directories to search EXAMPLE: $0 /home/user/downloads /home/user/documents USAGEDOC exit 1; } sub wanted { return unless -f $File::Find::name; # Return if it is not a plain +file return if -l $File::Find::name; # Return in case this is a symlink if (open(FILE, $File::Find::name) ) { binmode(FILE); my $sum = $md5->addfile(*FILE)->hexdigest(); close(FILE); my $aref = $md5sums{$sum}; if ( defined $aref ) { push @$aref, $File::Find::name; } else { my @list = ($File::Find::name); $md5sums{$sum} = \@list; } } else { print "ERROR: Could not open '$File::Find::name' for reading\n +"; } return; } Usage() if( $#ARGV < 0 ); foreach my $dir (@ARGV) { print "$dir \n"; unless ( -d $dir ) { print "ERROR: '$dir' is not a valid directory\n"; next; } find(\&wanted, $dir); } print "\n", '-'x25, "\n"; print "Printing duplicate files (if any)\n"; print '-'x25, "\n\n"; foreach my $sum (sort keys %md5sums) { my $list = $md5sums{$sum}; if ($#$list > 0) { print "$sum :\n"; foreach my $file (@$list) { print "\t $file\n"; } print"\n" } } print '-'x25, "\n";
Mahesh
In reply to Re^2: Find duplicate files with exact same files noted
by smahesh
in thread Find duplicate files with exact same files noted
by Lady_Aleena
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |