in reply to CSV Pattern Matching

Here you go:

#!/usr/local/bin/perl use strict; use warnings; # NOTE: This program loads all of file1 and file2 into memory, # so if they're huge, this might not work. my $infile1 = "file1.txt"; my $infile2 = "file2.txt"; #-------------------------------------------------------------------- my %f2 = (); # KEY=col1, VAL=col2 (in file2) open my $INFILE2, '<', $infile2 || die "Cannot open \"$infile2\"!\n"; LINE: while(my $line=<$INFILE2>) { chomp $line; next LINE if($line eq ""); # Skip empties my @input = split(/,/, $line); $f2{ $input[0] } = $input[1]; # "d" => 90 } #LINE close $INFILE2; #-------------------------------------------------------------------- my %f1 = (); open my $INFILE1, '<', $infile1 || die "Cannot open \"$infile1\"!\n"; LINE: while(my $line=<$INFILE1>) { chomp $line; next LINE if($line eq ""); # Skip empties # Col3=$2, Col5=$3 if($line =~ m/(\S+, \S+, \S+, (\S+), \S+, (\S+))/) { $f1{$2}->{$3} = $1; } else { next LINE; } } #LINE close $INFILE1; #-------------------------------------------------------------------- my $outfilename = "output.txt"; open my $OUTFILE, '>', $outfilename; FILE2COL: foreach my $col1_f2 (keys %f2) { next FILE2COL unless (exists $f1{$col1_f2}); while(my($col5_f1, $line_f1) = each %{ $f1{$col1_f2} }) { my $to_match = $f2{$col1_f2}; print $OUTFILE $line_f1."\n" if( $line_f1 =~ m/$to_match/); } } close $OUTFILE;

Replies are listed 'Best First'.
Re^2: CSV Pattern Matching
by aaron_baugher (Curate) on Apr 13, 2012 at 12:39 UTC

    There's no need to pull the first file into memory. This is a fairly standard "load the filtering file into a hash and check the other file against it line-by-line" problem, with the one extra twist that, if the first field from the filtering file is a match, another field needs to be checked against the second field.

    #!/usr/bin/env perl use Modern::Perl; my %k; open my $fd2, '<', 'file2.txt' or die $!; while(<$fd2>){ chomp; if( /([a-z]),(\d+)/ ){ # one lowercase character, a comma, and di +gits $k{$1} = $2; } } close $fd2; open my $fd1, '<', 'file1.txt' or die $!; while(<$fd1>){ my @w = split /, /; if( $k{$w[3]} and $w[5] =~ /$k{$w[3]}/ ){ print; } } close $fd1;

    Aaron B.
    My Woefully Neglected Blog, where I occasionally mention Perl.