in reply to md5sum for each files costly ?!
If you wanted to loop thru all files for duplicates, you could use File::Find to loop thru them, and store the md5sum in a hash.
#!/usr/bin/perl -w use File::Find; use Digest::MD5 qw(md5_hex); my %same_sized; find sub { return unless -f and my $size = -s _; push @{$same_sized{$size}}, $File::Find::name; }, @ARGV; for (values %same_sized) { next unless (@ARGV = @$_) > 1; local $/; my %md5; while (<>) { push @{$md5{md5_hex($_)}}, $ARGV; } for (values %md5) { next unless (my @same = @$_) > 1; print join(" ", sort @same), "\n"; } }
|
|---|