in reply to compare records in two csv files
First off, your "records" can match without the physical lines matching. For example, the following lines are a record match:
field 1,field 2 "field 1","field 2"
Given that, what you have to do is adequately parse the CSV files into some kind of records format that's easy to compare. You could do this in memory with data structures, but that will cause problems if your CSV files grow to sufficient size. Better to use a lightwieght database.
I recommend Text::CSV_XS for parsing the CSV, and DBI with the DBD::SQLite2 driver for a lightweight database system.
Some concept (e.g. untested, but conceptually sound) code:
use strict; use warnings; use DBI; use Text::CSV_XS; use IO::File; my ($file1,$file2) = @ARGV[0..1]; # two first args are files to compar +e # create a session filename my $dbfile = \"Session"; { my $sid = "$dbfile"; #stringify ref $sid = $1 if m/^SCALAR\((.*?)\)/; #remove all but address $dbfile = "$sid.db"; } #prepare to read files my $csv = Text::CSV_XS->new(); my $io1 = IO::File->new($file1,'<') or die ("Can't open $file1 for rea +d: $!"); my $io2 = IO::File->new($file2,'<') or die ("Can't open $file2 for rea +d: $!"); my $out = IO::File->new("$dbfile.csv",'>') or die ("Can't write outfil +e: $!"); #init DB connection my $dbh = DBI->connect("dbi:SQLite2:dbname=$dbfile",'','',{RaiseError +=>1}); # Now, read file1 into the DB. print STDERR "loading $file1 into database...\n"; #-> first line of CSV is header row my $head1 = $csv->getline($io1); # create table $dbh->do( 'CREATE TABLE file ('.join(' TEXT,',@$head1).' TEXT)' ); # load db { my $sth = $dbh->prepare( 'INSERT INTO file ('.join(',',@$head1). ') VALUES ('.join(',',map {'?'} @$head1).')' ); until ($io1->eof) { my $row = $csv->getline($io1); $sth->execute(@$row); } } # ok, now compare file2 with what's in the DB. print STDERR "Beginning comparison.\n"; # assuming same header row $csv->getline($io2); { my $sth = $dbh->prepare( 'SELECT '.join(',',@$head1).' FROM file WHERE '. join(' AND ',map { "$_ = ?" } @$head1). ' LIMIT 1' ); until ($io2->eof) { my $row = $csv->getline($io2); $sth->execute(@$row); my $result = $sth->fetchrow_arrayref; if (defined $result && @$result) { print STDERR '+'; $csv->print($out,$row); #save matches to CSV } else { print STDERR '-'; } } print STDERR "\ndone.\n"; } print STDERR "Results saved to $dbfile.csv"; # clean up. $dbh->disconnect(); unlink($dbfile);
|
|---|