use strict; use warnings; use File::Find; use List::Util qw( reduce ); # these partition functions treat the input data as having # already been partitioned; they partition the data further. sub partition_by_calculated_key(&@) { my $cr = shift; map { my %h; push @{ $h{ &$cr } }, $_ for @$_; values %h } @_ } sub partition_by_comparison_function(&@) { my $cr = shift; map { my %h; my $added; for ( @$_ ) { for my $seen ( keys %h ) { local( $a, $b ) = ( $_, $seen ); if ( $cr->( $_, $seen ) ) # returns true if "equal" { push @{ $h{$seen} }, $_; $added = 1; last; } } $added or push @{ $h{$_} }, $_; } values %h } @_ } ##################################### my @root = @ARGV; @root or @root = ('.'); my @files; find( sub { push @files, $File::Find::name }, $_ ) for @root; # eliminate directory names: @files = grep { ! -d $_ } @files; # eliminate existing symlinks: @files = grep { ! -l $_ } @files; my @a = ( \@files ); # first do the cheap one: @a = partition_by_calculated_key { -s $_ } @a; @a = grep { @$_ > 1 } @a; # filter out singleton lists # then the expensive one: @a = partition_by_calculated_key { qx( sum "$_" ) } @a; @a = grep { @$_ > 1 } @a; # filter out singleton lists # now group those which are "equal": @a = partition_by_comparison_function { ! system qq( cmp -s "$a" "$b" ) } @a; @a = grep { @$_ > 1 } @a; # filter out singleton lists # finally, hardlink the files in each group: reduce { system qq( ln "$a" "$b" ); $b } @$_ for @a;