Hello Monks, Looking for some efficient solutions in printing the duplicate files within a directory
Here is my code.
#!/usr/bin/perl use warnings; use strict; use Data::Dumper; ############## my $dir = "$ARGV[0]"; my %md5sum; my @md5; my $flag = 0; my %seen; opendir(my $dh, $dir) || die "Unable to Open the Directory: $!\n"; chdir $dir or die "Cannot Change directory: $!\n"; while (my $file = readdir $dh) { chomp $file; next if $file =~ /^\.+$/g; if (-f $file) { my ($md) = (split /\s+/, qx(/usr/bin/md5sum $file))[0]; $md5sum{$file} = $md; push @md5, $md; } } closedir($dh); my @uniq = grep { $seen{$_}++ } @md5; foreach my $k (keys %md5sum) { foreach my $md (@uniq) { if ($md eq $md5sum{$k}) { $flag = 1; last; } } if ($flag) { print "$k is a duplicate file with MD5 of $md5sum{$k}\n"; $flag = 0; }else { print "$k is not a duplicate file, It's md5sum is $md5sum{$k}\n" +; } }
-bash-3.2$ ./duplicate_files.pl /users/scripts/perl/test/ file2 is a duplicate file with MD5 of d41d8cd98f00b204e9800998ecf8427e file1 is a duplicate file with MD5 of 5bb062356cddb5d2c0ef41eb2660cb06 file3 is a duplicate file with MD5 of d41d8cd98f00b204e9800998ecf8427e file4 is a duplicate file with MD5 of d41d8cd98f00b204e9800998ecf8427e file5 is a duplicate file with MD5 of 5bb062356cddb5d2c0ef41eb2660cb06 file6 is not a duplicate file, It's md5sum is d617c2deabd27ff86ca9825b +2e7578d4
In reply to List Duplicate Files in a given directory by pr33
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |