#!/usr/bin/perl ########################################################################## # # # How diffy works: # # - it simply compares files and prints out the names of identical files # # - if you give it 2 paths it compares the files in those paths # # - if you give it 1 path, it compares just the files in this path # # - if you give it no path, the current directory is taken # # - if you give the option -h, hardlings will be shown too # # # # TODO: # # make it faster, smaller and more robust # # print usage with -h and get better options # # # ########################################################################## $hardlinks = 0; if ( "$ENV{'DIFFY_SEARCH_SIZE'}" ) { $min_size = $ENV{'DIFFY_SEARCH_SIZE'}; } else { $min_size = "20k"; } $numDirs = 0; foreach (@ARGV) { if($_ eq "?") { die "*** Usage: diffy [directoy_1 [directory_2]] [-h]\n\n"; } if($_ eq "-h") { $hardlinks = 1; } elsif($numDirs < 2) { if($numDirs == 0) { $dir1 = $_ } if($numDirs == 1) { $dir2 = $_ } $numDirs++; } } if($numDirs == 0) { $dir1 = "."; $numDirs = 1; } if($numDirs == 1) { $dir2 = $dir1; } %files1 = &getFiles($dir1); %files2 = &getFiles($dir2); @keys1 = sort { $files1{$a} <=> $files1{$b} } keys(%files1); @keys2 = sort { $files2{$a} <=> $files2{$b} } keys(%files2); $numKeys1 = scalar(@keys1) + 1 ; $numKeys2 = scalar(@keys2) + 1 ; $value = $files1{$keys1[0]}; $i2 = 0; $index = 0; %savedFiles1 = %empty; # $savedFiles1{index} = filename %savedFiles2 = %empty; for($i1 = 0; $i1 < $numKeys1; $i1++) { if($value != $files1{$keys1[$i1]}) { if($index > 0) { ### now delete double pairs and print the remaining pairs @savedKeys = keys(%savedFiles1); $numSaved = scalar(@savedKeys); while($numSaved > 0) { for($iSave = 1; $iSave < $numSaved; $iSave++) { if($savedFiles1{$savedKeys[0]} eq $savedFiles2{$savedKeys[$iSave]} && $savedFiles2{$savedKeys[0]} eq $savedFiles1{$savedKeys[$iSave]}) { delete $savedFiles1{$savedKeys[$iSave]}; delete $savedFiles2{$savedKeys[$iSave]}; last; } } print "ln -f \"".$savedFiles1{$savedKeys[0]}."\""." "."\"".$savedFiles2{$savedKeys[0]}."\""."\n"; delete $savedFiles1{$savedKeys[0]}; delete $savedFiles2{$savedKeys[0]}; @savedKeys = keys(%savedFiles1); $numSaved = scalar(@savedKeys); } } $index = 0; %savedFiles1 = %empty; # just to be sure %savedFiles2 = %empty; $value = $files1{$keys1[$i1]}; } while($files2{$keys2[$i2]} < $files1{$keys1[$i1]}) { $i2++; if($i2 > $numKeys2) { exit; } } if($files2{$keys2[$i2]} == $files1{$keys1[$i1]}) { $i2Old = $2; while($files2{$keys2[$i2]} == $files1{$keys1[$i1]}) { if($keys2[$i2] ne $keys1[$i1]) { open(FILE, $keys1[$i1]); $inode1 = (stat(FILE))[1]; close(FILE); open(FILE, $keys2[$i2]); $inode2 = (stat(FILE))[1]; close(FILE); if($inode1 == $inode2) { if($hardlinks == 1) { $index++; $savedFiles1{$index} = $keys1[$i1]; $savedFiles2{$index} = $keys2[$i2]; } } else { if(system("cmp -s \"$keys1[$i1]\" \"$keys2[$i2]\"") == 0) { $index++; $savedFiles1{$index} = $keys1[$i1]; $savedFiles2{$index} = $keys2[$i2]; } } } $i2++; if($i2 > $numKeys2) { exit; } } $i2 = $i2Old; # $i2 gets a reset, because every file in %files1 has to be compared with # every file in %files2 of equal size. Otherwise some files would be skipped. } } sub getFiles { local($dir, %list, $file, $size); $dir = $_[0]; unless(-d $dir) { die "*** ERROR: $dir is not a directory.\n"; } open(DIR, "find $dir -size +$min_size -type f -printf \"%p %s\\n\" |") || die "*** ERROR: Cannot access $dir.\n"; while() { chop; ($file, $size) = split ( /\s/, $_, 2 ); if(-f $file) { $list{$file} = $size; } } close(DIR); unless(keys(%list)) { die "*** No files in $dir\n"; } %list; }