in reply to Comparing two files line by line and exporting the differences from the first file

G'day jzelkowsz,

Here's a solution using Text::CSV (if you have Text::CSV_XS installed it will run faster) and in-memory files (see open). The input data I used is a verbatim copy of what you posted here.

#!/usr/bin/env perl use strict; use warnings; use Text::CSV; my ($hr_file, $ad_file, $com_file) = qw{hr.txt ad.txt com.txt}; my (@col_index, %hr_record_for); my $csv = Text::CSV::->new({quote_space => 0}) or die "Can't instantiate a Text::CSV object: ", Text::CSV::->error_diag(); { open my $mem_fh, '<', canonicalise_file_in_memory($hr_file) or die "Can't read in-memory file: $!"; @col_index = @{$csv->getline($mem_fh)}; while (my $row = $csv->getline($mem_fh)) { $hr_record_for{$row->[0]} = $row; } } { open my $mem_fh, '<', canonicalise_file_in_memory($ad_file) or die "Can't read in-memory file: $!"; open my $out_fh, '>', $com_file or die "Can't write '$com_file': $!"; (undef) = $csv->getline($mem_fh); while (my $row = $csv->getline($mem_fh)) { for my $i (1 .. $#col_index) { if ($hr_record_for{$row->[0]}[$i] ne $row->[$i]) { $csv->say($out_fh, [ $row->[0], $col_index[$i], $hr_record_for{$row->[0]}[$i] ]); } } } } sub canonicalise_file_in_memory { my ($file) = @_; open my $fh, '<', $file or die "Can't read '$file': $!"; my $canon; while (<$fh>) { chomp if /,$/; $canon .= $_; } return \$canon; }

Output:

$ cat com.txt barsu991,mail,Uttiam.Barski@pulse.org barsu991,title,Director of Cooks walkl003,givenname,Lreblemet walkl003,employeenumber,20178941 walkl003,mail,Lreblemet.Walker@pulse.org walkl003,title,Head Cook karss001,givenname,Sovyetk karss001,mail,Sovyetk.Karsten@pulse.org karss001,title,Dishwasher karss001,physicaldeliveryoffice,Kitchen of the World karss001,streetaddress,205 Willy B. Temple karss001,st,WI karss001,postalcode,50987 zingk072,givenname,Kovon zingk072,employeenumber,20113578 zingk072,symphonyemployeetype,IKP zingk072,mail,Kovon.Zingerman@pulse.org zingk072,manager,"cn=manager1,ou=users,ou=Kitchen,dc=Kitchen,dc=net" hutcy231,givenname,Yello hutcy231,mail,Yello Hutchinson haserz221,sn,Haserkrilk haserz221,employeenumber,20125471 haserz221,mail,Zebediah.Haserkrilk@kit.org haserz221,telephonenumber,

— Ken

  • Comment on Re: Comparing two files line by line and exporting the differences from the first file
  • Select or Download Code