#!/usr/bin/perl -l use strict; use warnings; use File::Find; use Digest::MD5; use Getopt::Std; sub takemd5($); my %opt; getopts 'i:o:', \%opt; @ARGV=grep { -d or !warn "`$_': not a directory!\n" } @ARGV; die <<"EOD" unless @ARGV; Usage: $0 [options] [] -i read chached info from -o write cached info to EOD my %files; if ($opt{i}) { for ($opt{i}) { -f or next; open my $fh, '<', $_ or die "Can't open `$_': $!\n"; while (<$fh>) { chomp; /(\d+)\s{2}([0-9a-z]{32})\s{2}(.*)/ or warn "`$opt{i}': line $. not in the correct format\n" and next; next if -f $3; $files{$1}{$2}=1; } } } find { no_chdir => 1, preprocess => sub { sort {lc $a cmp lc $b} @_; }, wanted => sub { return unless -f and my $sz=-s; for my $f ($files{$sz}) { $f=$_ and return unless $f; $f={takemd5 $f => $f} unless ref $f; my $md5=takemd5 $_; if ($f->{$md5}) { unlink $_ and print "Removing `$_'" or warn "Can't remove `$_': $!\n"; } else { $f->{$md5}=$_; } } } }, @ARGV; if ($opt{o}) { open my $fh, '>>', $opt{o} or die "Can't open `$opt{o}' for updating: $!\n"; for my $k (keys %files) { next unless ref $files{$k}; print $fh $k, ' ', $_, ' ', $files{$k}{$_} for keys %{$files{$k}}; } } sub takemd5($) { my $f=shift; open my $fh, '<:raw', $f or warn "Couldn't open `$f': $!\n"; Digest::MD5->new->addfile($fh)->hexdigest; } __END__