1: #!/usr/bin/perl
2:
3: ##########################################################################
4: # #
5: # How diffy works: #
6: # - it simply compares files and prints out the names of identical files #
7: # - if you give it 2 paths it compares the files in those paths #
8: # - if you give it 1 path, it compares just the files in this path #
9: # - if you give it no path, the current directory is taken #
10: # - if you give the option -h, hardlings will be shown too #
11: # #
12: # TODO: #
13: # make it faster, smaller and more robust #
14: # print usage with -h and get better options #
15: # #
16: ##########################################################################
17:
18: $hardlinks = 0;
19:
20:
21: if ( "$ENV{'DIFFY_SEARCH_SIZE'}" ) {
22: $min_size = $ENV{'DIFFY_SEARCH_SIZE'};
23: } else {
24: $min_size = "20k";
25: }
26:
27: $numDirs = 0;
28: foreach (@ARGV) {
29: if($_ eq "?") {
30: die "*** Usage: diffy [directoy_1 [directory_2]] [-h]\n\n";
31: }
32:
33: if($_ eq "-h") {
34: $hardlinks = 1;
35: } elsif($numDirs < 2) {
36: if($numDirs == 0) { $dir1 = $_ }
37: if($numDirs == 1) { $dir2 = $_ }
38:
39: $numDirs++;
40: }
41: }
42:
43: if($numDirs == 0) {
44: $dir1 = ".";
45:
46: $numDirs = 1;
47: }
48:
49: if($numDirs == 1) {
50: $dir2 = $dir1;
51: }
52:
53:
54: %files1 = &getFiles($dir1);
55: %files2 = &getFiles($dir2);
56:
57:
58: @keys1 = sort { $files1{$a} <=> $files1{$b} } keys(%files1);
59: @keys2 = sort { $files2{$a} <=> $files2{$b} } keys(%files2);
60:
61:
62: $numKeys1 = scalar(@keys1) + 1 ;
63: $numKeys2 = scalar(@keys2) + 1 ;
64:
65:
66: $value = $files1{$keys1[0]};
67: $i2 = 0;
68: $index = 0;
69: %savedFiles1 = %empty; # $savedFiles1{index} = filename
70: %savedFiles2 = %empty;
71:
72: for($i1 = 0; $i1 < $numKeys1; $i1++) {
73: if($value != $files1{$keys1[$i1]}) {
74: if($index > 0) {
75: ### now delete double pairs and print the remaining pairs
76:
77: @savedKeys = keys(%savedFiles1);
78: $numSaved = scalar(@savedKeys);
79:
80: while($numSaved > 0) {
81: for($iSave = 1; $iSave < $numSaved; $iSave++) {
82: if($savedFiles1{$savedKeys[0]} eq $savedFiles2{$savedKeys[$iSave]} &&
83: $savedFiles2{$savedKeys[0]} eq $savedFiles1{$savedKeys[$iSave]}) {
84:
85: delete $savedFiles1{$savedKeys[$iSave]};
86: delete $savedFiles2{$savedKeys[$iSave]};
87: last;
88: }
89: }
90: print "ln -f \"".$savedFiles1{$savedKeys[0]}."\""." "."\"".$savedFiles2{$savedKeys[0]}."\""."\n";
91:
92: delete $savedFiles1{$savedKeys[0]};
93: delete $savedFiles2{$savedKeys[0]};
94:
95: @savedKeys = keys(%savedFiles1);
96: $numSaved = scalar(@savedKeys);
97: }
98: }
99: $index = 0;
100: %savedFiles1 = %empty; # just to be sure
101: %savedFiles2 = %empty;
102:
103: $value = $files1{$keys1[$i1]};
104: }
105:
106: while($files2{$keys2[$i2]} < $files1{$keys1[$i1]}) {
107: $i2++;
108: if($i2 > $numKeys2) { exit; }
109: }
110:
111: if($files2{$keys2[$i2]} == $files1{$keys1[$i1]}) {
112: $i2Old = $2;
113:
114: while($files2{$keys2[$i2]} == $files1{$keys1[$i1]}) {
115: if($keys2[$i2] ne $keys1[$i1]) {
116: open(FILE, $keys1[$i1]);
117: $inode1 = (stat(FILE))[1];
118: close(FILE);
119:
120: open(FILE, $keys2[$i2]);
121: $inode2 = (stat(FILE))[1];
122: close(FILE);
123:
124: if($inode1 == $inode2) {
125: if($hardlinks == 1) {
126: $index++;
127: $savedFiles1{$index} = $keys1[$i1];
128: $savedFiles2{$index} = $keys2[$i2];
129: }
130: } else {
131: if(system("cmp -s \"$keys1[$i1]\" \"$keys2[$i2]\"") == 0) {
132: $index++;
133: $savedFiles1{$index} = $keys1[$i1];
134: $savedFiles2{$index} = $keys2[$i2];
135: }
136: }
137: }
138: $i2++;
139: if($i2 > $numKeys2) { exit; }
140: }
141: $i2 = $i2Old;
142:
143: # $i2 gets a reset, because every file in %files1 has to be compared with
144: # every file in %files2 of equal size. Otherwise some files would be skipped.
145: }
146: }
147:
148: sub getFiles {
149: local($dir, %list, $file, $size);
150:
151: $dir = $_[0];
152: unless(-d $dir) { die "*** ERROR: $dir is not a directory.\n"; }
153:
154: open(DIR, "find $dir -size +$min_size -type f -printf \"%p %s\\n\" |") ||
155: die "*** ERROR: Cannot access $dir.\n";
156: while(<DIR>) {
157: chop;
158: ($file, $size) = split ( /\s/, $_, 2 );
159: if(-f $file) {
160: $list{$file} = $size;
161: }
162: }
163: close(DIR);
164:
165: unless(keys(%list)) { die "*** No files in $dir\n"; }
166: %list;
167: }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: diffy
by bikeNomad (Priest) on Jun 22, 2001 at 04:21 UTC | |
|
Re: diffy
by CharlesClarkson (Curate) on Jun 22, 2001 at 07:19 UTC | |
by bikeNomad (Priest) on Jun 22, 2001 at 08:30 UTC |