# UPDATE!!!! - After downloading, I noticed that this script didn't # work any longer. Problem was, that additional CR's # were added to the __DATA__ section. # Adding s/\x0d//g; to extract_csv_entries() fixed # this problem under Linux. # TODO: # [ ] use files instead of DATA # [ ] use a CPAN module to parse CSV files # [ ] simplify / it is much easier if all files can be hold in memory: # - read second file to $file2_data{$key}=$value # - then do something along: # @result = map { exists $file2_data{$_} ? $file2_data{$_} # : "0" # } @ordered_items; use strict; my %file1_data; # $file1_data{key} = value of last column my @ordered_items; # preserves order of keys from first file my $expected_no_of_entries = 10; # expected entries in a CSV line sub extract_csv_entries { # in a real world program, one would use a CSV module from CPAN... my $line = shift; # NOTE: maybe you need to remove the next line under Windows/Mac? $line =~ s/\x0d//g; my @csv_items = split /;/, $line; if (@csv_items != $expected_no_of_entries) { die "illegal number of entries: $line ... \n"; } return @csv_items[0,-1]; # first & last entry } sub compare_file { # here, we simulate to read from files... my $expect = 0; my @result; while () { next if /^\s*$/; last if /^EOF/; chomp; my ($key,$value) = extract_csv_entries($_); if (exists $file1_data{$key}) { # Update: uncomment these lines to ensure that # entries are the same as in 1st file... # if ($file1_data{$key} ne $value) { # ensure val's didn't change # die "enties for $key differ: $file1_data{$key} <=> $value"; # } } else { die "item found that is not in first file: $key / $value\n"; } # advance expected key until match while ($key ne $ordered_items[$expect++]) { push(@result,0); } push(@result, $value); # finally in sync. } # EOF before last expected key? Pad with zeros... push(@result,0) for ($expect..$#ordered_items); if (@result != @ordered_items) { # paranoia die("internal error: " . join(";", @result)); } return \@result; } sub print_result { my ($file_no, $aref) = @_; print "File $file_no: ", join("; ", @{$aref}), "\n"; } # Step 1 - learn the key/value pairs and key-order # read the first "file" (emulated here) while () { next if /^\s*$/; # skip empty lines last if /^EOF/; # emulate eof chomp; my ($key,$value) = extract_csv_entries($_); push @ordered_items, $key; # learn order from first file $file1_data{$key} = $value; # finally learn key/value } # print the list of items for 1st file in original order print "Entries: ", join (";", @ordered_items), "\n"; print_result(1, [ (map { $file1_data{$_} } @ordered_items) ]); # now compare some sample files... for my $file_no (2..5) { print_result($file_no, compare_file() ); } __DATA__ DISTINGUERE TRA;1;14;507;0,000000242475382686773;0,00000339465535761482;0,000122935019022194;0,00000000041732202096217;9,18246152003019;9,18246152003019 MANCANTE DI;1;56;507;0,000000242475382686773;0,0000135786214304593;0,000122935019022194;0,00000000166928808384868;7,18246152003019;7,18246152003019 APPLICARE SU;1;64;507;0,000000242475382686773;0,0000155184244919535;0,000122935019022194;0,00000000190775781011278;6,9898164420878;6,9898164420878 MONTATO IN;1;78;507;0,000000242475382686773;0,0000189130798495683;0,000122935019022194;0,00000000232507983107495;6,70441422322555;6,70441422322555 IMPIEGATO IN;2;180;507;0,000000484950765373545;0,0000436455688836191;0,000122935019022194;0,00000000536556884094218;6,49796334575812;12,9959266915162 RAGGRUPPARE IN;1;109;507;0,000000242475382686773;0,0000264298167128582;0,000122935019022194;0,00000000324915002034832;6,22163211731087;6,22163211731087 EOF of first file DISTINGUERE TRA;1;14;507;0,000000242475382686773;0,00000339465535761482;0,000122935019022194;0,00000000041732202096217;9,18246152003019;9,18246152003019 APPLICARE SU;1;64;507;0,000000242475382686773;0,0000155184244919535;0,000122935019022194;0,00000000190775781011278;6,9898164420878;6,9898164420878 MONTATO IN;1;78;507;0,000000242475382686773;0,0000189130798495683;0,000122935019022194;0,00000000232507983107495;6,70441422322555;6,70441422322555 IMPIEGATO IN;2;180;507;0,000000484950765373545;0,0000436455688836191;0,000122935019022194;0,00000000536556884094218;6,49796334575812;12,9959266915162 EOF of second file DISTINGUERE TRA;1;14;507;0,000000242475382686773;0,00000339465535761482;0,000122935019022194;0,00000000041732202096217;9,18246152003019;me differs! RAGGRUPPARE IN;1;109;507;0,000000242475382686773;0,0000264298167128582;0,000122935019022194;0,00000000324915002034832;6,22163211731087;6,22163211731087 EOF of dummy third file MONTATO IN;1;78;507;0,000000242475382686773;0,0000189130798495683;0,000122935019022194;0,00000000232507983107495;6,70441422322555;6,70441422322555 EOF of dummy fourth file MONTATO INorOUTorWhatever;1;78;507;0,000000242475382686773;0,0000189130798495683;0,000122935019022194;0,00000000232507983107495;6,70441422322555;6,70441422322555 EOF of illegal fifth file with illegal entry