#!/usr/bin/perl -w use strict; use File::Find::Duplicates; use File::Temp (); my %stats = ( files_linked => 0, space_saved => 0 ); local $" = "\n"; # Read directory from command line, or default to current. my $directory = $ARGV[0] || "."; # Find duplicates recursively in such directory my @dupes = find_duplicate_files($directory); # For each set of duplicate files, create the hardlinks and save the # information in the stats hash foreach my $set (@dupes) { print $set->size, " bytes each:\n", "@{ $set->files }\n"; my $original = shift @{ $set->files }; my $number_linked = fuse( $original, \@{ $set->files } ); $stats{files_linked} += $number_linked; $stats{space_saved} += $number_linked * $set->size; } # Report the stats print "Files linked: $stats{ files_linked }\n"; print "Space saved: $stats{ space_saved } bytes\n"; sub fuse { # Replace duplicates with hard links and return the number # of links created. my $original = shift; my $duplicates = shift; my $files_linked; foreach my $duplicate (@$duplicates) { # Step 1: link original to tempfile my $tempfile = File::Temp::tempnam( $directory, 'X' x 6 ); link $original, $tempfile or next; # Step 2: move tempfile to duplicate unless ( rename $tempfile, $duplicate ) { next; } if ( -e $tempfile ) { unlink $tempfile or die "Couldn't delete temporary file $tempfile: $!"; } ++$files_linked; } return $files_linked; }