in reply to List Duplicate Files in a given directory

When i do this i tend to be more interested in what files match so do one of these two paths

#!/usr/bin/perl use warnings; use strict; use Data::Dumper; ############## my $md5files={}; my $filesmd5={}; while (my $line=<DATA>){ chomp $line; my ($fn,$md5)=split(' ',$line,2); push @{$md5files->{$md5}},$fn; $filesmd5->{$fn}=$md5; } # line for my $md5 (keys %$md5files){ my $md5list=$md5files->{$md5}; if (scalar(@$md5list) == 1 ) { print $md5."\n ".$md5list->[0]."\n" +;} else { print $md5."\n"; for my $file (sort @$md5list){ print ' '.$file."\n"; } } } # md5 for my $file (sort keys %$filesmd5) { my $md5list=$md5files->{$filesmd5->{$file}}; if (scalar(@$md5list) == 1 ) { print $file." is unique\n";} else { print $file." is the same as\n"; for my $filed (sort @$md5list){ print ' '.$filed."\n" unless ($file eq $filed); } } } # file exit; __DATA__ file2 d41d8cd98f00b204e9800998ecf8427e file1 5bb062356cddb5d2c0ef41eb2660cb06 file3 d41d8cd98f00b204e9800998ecf8427e file4 d41d8cd98f00b204e9800998ecf8427e file5 5bb062356cddb5d2c0ef41eb2660cb06 file6 d617c2deabd27ff86ca9825b2e7578d4
d617c2deabd27ff86ca9825b2e7578d4 file6 d41d8cd98f00b204e9800998ecf8427e file2 file3 file4 5bb062356cddb5d2c0ef41eb2660cb06 file1 file5 file1 is the same as file5 file2 is the same as file3 file4 file3 is the same as file2 file4 file4 is the same as file2 file3 file5 is the same as file1 file6 is unique

Replies are listed 'Best First'.
Re^2: List Duplicate Files in a given directory
by pr33 (Scribe) on Jul 31, 2017 at 05:57 UTC

    Thanks Huck and Keybot . From your solution, It is clear to use md5dum's as the keys of the hash and then hash value as a reference to an array of files . I have changed my original code to create a Hash of an Array to do this.

    #!/usr/bin/perl use warnings; use strict; use Data::Dumper; ############## my $dir = "$ARGV[0]"; my %md5sum; opendir(my $dh, $dir) || die "Unable to Open the Directory: $!\n"; chdir $dir or die "Cannot Change directory: $!\n"; while (my $file = readdir $dh) { chomp $file; next if $file =~ /^\.{1,2}$/g; if (-f $file) { my ($md) = (split /\s+/, qx(/usr/bin/md5sum $file))[0]; if (exists $md5sum{$md}) { push @{$md5sum{$md}}, $file; } else { push @{$md5sum{$md}}, $file; } } } closedir($dh); foreach my $ky (keys %md5sum) { if (scalar( @{$md5sum{$ky}}) == 1) { print "Unique File: @{$md5sum{$ky}} , Md5sum: $ky\n"; } else { print "Duplicate Files: @{$md5sum{$ky}}, Md5sum: $ky\n"; } }
    -bash-3.2$ ./duplicate_files.pl directory Duplicate Files: file4 file2 file3, Md5sum: d41d8cd98f00b204e9800998e +cf8427e Unique File: file6 , Md5sum: d617c2deabd27ff86ca9825b2e7578d4 Duplicate Files: file1 file5, Md5sum: 5bb062356cddb5d2c0ef41eb2660cb0 +6