0: #!/usr/bin/perl
1:
2: ##########################################################################
3: # #
4: # How diffy works: #
5: # - it simply compares files and prints out the names of identical files #
6: # - if you give it 2 paths it compares the files in those paths #
7: # - if you give it 1 path, it compares just the files in this path #
8: # - if you give it no path, the current directory is taken #
9: # - if you give the option -h, hardlings will be shown too #
10: # #
11: # TODO: #
12: # make it faster, smaller and more robust #
13: # print usage with -h and get better options #
14: # #
15: ##########################################################################
16:
17: $hardlinks = 0;
18:
19:
20: if ( "$ENV{'DIFFY_SEARCH_SIZE'}" ) {
21: $min_size = $ENV{'DIFFY_SEARCH_SIZE'};
22: } else {
23: $min_size = "20k";
24: }
25:
26: $numDirs = 0;
27: foreach (@ARGV) {
28: if($_ eq "?") {
29: die "*** Usage: diffy [directoy_1 [directory_2]] [-h]\n\n";
30: }
31:
32: if($_ eq "-h") {
33: $hardlinks = 1;
34: } elsif($numDirs < 2) {
35: if($numDirs == 0) { $dir1 = $_ }
36: if($numDirs == 1) { $dir2 = $_ }
37:
38: $numDirs++;
39: }
40: }
41:
42: if($numDirs == 0) {
43: $dir1 = ".";
44:
45: $numDirs = 1;
46: }
47:
48: if($numDirs == 1) {
49: $dir2 = $dir1;
50: }
51:
52:
53: %files1 = &getFiles($dir1);
54: %files2 = &getFiles($dir2);
55:
56:
57: @keys1 = sort { $files1{$a} <=> $files1{$b} } keys(%files1);
58: @keys2 = sort { $files2{$a} <=> $files2{$b} } keys(%files2);
59:
60:
61: $numKeys1 = scalar(@keys1) + 1 ;
62: $numKeys2 = scalar(@keys2) + 1 ;
63:
64:
65: $value = $files1{$keys1[0]};
66: $i2 = 0;
67: $index = 0;
68: %savedFiles1 = %empty; # $savedFiles1{index} = filename
69: %savedFiles2 = %empty;
70:
71: for($i1 = 0; $i1 < $numKeys1; $i1++) {
72: if($value != $files1{$keys1[$i1]}) {
73: if($index > 0) {
74: ### now delete double pairs and print the remaining pairs
75:
76: @savedKeys = keys(%savedFiles1);
77: $numSaved = scalar(@savedKeys);
78:
79: while($numSaved > 0) {
80: for($iSave = 1; $iSave < $numSaved; $iSave++) {
81: if($savedFiles1{$savedKeys[0]} eq $savedFiles2{$savedKeys[$iSave]} &&
82: $savedFiles2{$savedKeys[0]} eq $savedFiles1{$savedKeys[$iSave]}) {
83:
84: delete $savedFiles1{$savedKeys[$iSave]};
85: delete $savedFiles2{$savedKeys[$iSave]};
86: last;
87: }
88: }
89: print "ln -f \"".$savedFiles1{$savedKeys[0]}."\""." "."\"".$savedFiles2{$savedKeys[0]}."\""."\n";
90:
91: delete $savedFiles1{$savedKeys[0]};
92: delete $savedFiles2{$savedKeys[0]};
93:
94: @savedKeys = keys(%savedFiles1);
95: $numSaved = scalar(@savedKeys);
96: }
97: }
98: $index = 0;
99: %savedFiles1 = %empty; # just to be sure
100: %savedFiles2 = %empty;
101:
102: $value = $files1{$keys1[$i1]};
103: }
104:
105: while($files2{$keys2[$i2]} < $files1{$keys1[$i1]}) {
106: $i2++;
107: if($i2 > $numKeys2) { exit; }
108: }
109:
110: if($files2{$keys2[$i2]} == $files1{$keys1[$i1]}) {
111: $i2Old = $2;
112:
113: while($files2{$keys2[$i2]} == $files1{$keys1[$i1]}) {
114: if($keys2[$i2] ne $keys1[$i1]) {
115: open(FILE, $keys1[$i1]);
116: $inode1 = (stat(FILE))[1];
117: close(FILE);
118:
119: open(FILE, $keys2[$i2]);
120: $inode2 = (stat(FILE))[1];
121: close(FILE);
122:
123: if($inode1 == $inode2) {
124: if($hardlinks == 1) {
125: $index++;
126: $savedFiles1{$index} = $keys1[$i1];
127: $savedFiles2{$index} = $keys2[$i2];
128: }
129: } else {
130: if(system("cmp -s \"$keys1[$i1]\" \"$keys2[$i2]\"") == 0) {
131: $index++;
132: $savedFiles1{$index} = $keys1[$i1];
133: $savedFiles2{$index} = $keys2[$i2];
134: }
135: }
136: }
137: $i2++;
138: if($i2 > $numKeys2) { exit; }
139: }
140: $i2 = $i2Old;
141:
142: # $i2 gets a reset, because every file in %files1 has to be compared with
143: # every file in %files2 of equal size. Otherwise some files would be skipped.
144: }
145: }
146:
147: sub getFiles {
148: local($dir, %list, $file, $size);
149:
150: $dir = $_[0];
151: unless(-d $dir) { die "*** ERROR: $dir is not a directory.\n"; }
152:
153: open(DIR, "find $dir -size +$min_size -type f -printf \"%p %s\\n\" |") ||
154: die "*** ERROR: Cannot access $dir.\n";
155: while(<DIR>) {
156: chop;
157: ($file, $size) = split ( /\s/, $_, 2 );
158: if(-f $file) {
159: $list{$file} = $size;
160: }
161: }
162: close(DIR);
163:
164: unless(keys(%list)) { die "*** No files in $dir\n"; }
165: %list;
166: }
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |