in reply to combining 2 files with 4 columns need help

Here is my best guess at what you mean by closest matched dates. I've assumed all the fields except the date make up the primary key that you want to join on. Also assumed all the dates are on or after 01/01/2000;
#!/usr/bin/perl # use strict; use warnings; use Date::Calc qw( Delta_Days ); my %hash=(); input_data(1,'out1.txt'); input_data(2,'out2.txt'); output_data('final.txt'); sub input_data { my ($ix,$filename) = @_; open FILE1, "<", $filename or die "$filename : $!\n"; while ( <FILE1> ) { chomp $_; my ( $key, $le, $date, $company ) = split ',', $_; my $pk = join "\t",$key,$le,$company; push @{$hash{$pk}[$ix]},fmt_ymd($date); } close FILE1; } sub output_data { my $filename = shift; open OUTFILE, ">", $filename or die "$filename : $!\n"; # primary key for my $pk (sort keys %hash){ my ($key,$le,$company) = split "\t",$pk; # get multiple dates my @dates = @{$hash{$pk}[1]}; my @rdates = @{$hash{$pk}[2]}; # even up number of dates while (@dates < @rdates) { push @dates,'1900-01-01'; } while (@rdates < @dates) { push @rdates,'1900-01-01'; } # print out multiple dates for each key for my $date (reverse sort @dates){ # use match sub if more than 1 if (@rdates > 1){ @rdates = match($date,@rdates); } # rdates sorted so best match is first element my $rdate = shift @rdates; print OUTFILE join ' ',$key,$le,fmt_mdy($date),fmt_mdy($rdate),$ +company,"\n"; } } close OUTFILE; } # match dates by calc days diff # and sorting to get least diff sub match { my ($date,@rdates) = @_; my @days=(); # split date into y,m,d my @d1 = split /\D/,$date; # calc diff and store with date for my $rdate (@rdates){ my @d2 = split /\D/,$rdate; push @days,[$rdate,abs Delta_Days(@d1,@d2)]; } # sort array by days @days = sort {$a->[1] <=> $b->[1]} @days; # extract dates return map {$_->[0]} @days; } # change mm/dd/yy to yyyy-mm-dd sub fmt_ymd { my $mdy = shift; $mdy =~ s/ //g; my ($m,$d,$y) = split /\D/,$mdy; if ($y < 99){ $y += 2000 }; return sprintf "%04d-%02d-%02d",$y,$m,$d; } # change yyyy-mm-dd to mm/dd/yy sub fmt_mdy { my $ymd = shift; $ymd =~ s/ //g; return ' 'x8 if $ymd eq '1900-01-01'; my ($y,$m,$d) = split /\D/,$ymd; $y -= 2000; return sprintf "%02d/%02d/%02d",$m,$d, $y; }
poj

Replies are listed 'Best First'.
Re^2: combining 2 files with 4 columns need help
by rruser (Acolyte) on May 29, 2013 at 19:18 UTC

    I am getting following message when running script "Can't use an undefined value as an ARRAY reference at script.pl line 34."

    lines 33 & 34 my @dates = @{$hash{$pk}[1]}; my @rdates = @{$hash{$pk}[2]};

    thanks so much for your help

      Looks like you have a primary key in one file only. Add a print to confirm ;
      print "$pk\n"; my @dates = @{$hash{$pk}[1]}; my @rdates = @{$hash{$pk}[2]};
      poj

        you are right it has a primary key in only 1 file, this will occur potentially in either file. one date is order date the other is release date..can it be taken into account?

        thanks so much