Thanks so much for the help again, I replace the sub input_data and sub output_data in original script with this and I get the following:
ACFX 28523 L 05/18/13 ABCCO
ACFX 28523 L 05/01/13 ABCCO-C
I am pasting entire script, maybe I am doing something wrong, this is a little over my head, hopefully will begin understanding better
#!/usr/bin/perl
#
use strict;
use warnings;
use Date::Calc qw( Delta_Days );
my %hashC=();
my %hash=();
input_data(1,'out1.txt');
input_data(2,'out2.txt');
#input_data(1,'pcarry.txt');
#input_data(2,'rcarry.txt');
output_data('final.txt');
#sub input_data {
# my ($ix,$filename) = @_;
# open FILE1, "<", $filename or die "$filename : $!\n";
# while ( <FILE1> ) {
# chomp $_;
# my ( $key, $le, $date, $company ) = split ',', $_;
# my $pk = join "\t",$key,$le,$company;
# push @{$hash{$pk}[$ix]},fmt_ymd($date);
# }
# close FILE1;
#}
sub input_data {
my ($ix,$filename) = @_;
open FILE1, "<", $filename or die "$filename : $!\n";
while ( <FILE1> ) {
chomp $_;
my ( $key, $le, $date, $company ) = split ',', $_;
my $pk = join "\t",$key,$le,$company;
# remove -C from key and store
if ($pk =~ s/-C$//){
# print "-C removed $pk\n";
$hashC{$pk} = '-C';
}
push @{$hash{$pk}[$ix]},fmt_ymd($date);
}
close FILE1;
}
#sub output_data {
# my $filename = shift;
# open OUTFILE, ">", $filename or die "$filename : $!\n";
# primary key
# for my $pk (sort keys %hash){
# my ($key,$le,$company) = split "\t",$pk;
sub output_data {
my $filename = shift;
open OUTFILE, ">", $filename or die "$filename : $!\n";
# primary key
for my $pk (sort keys %hash){
my ($key,$le,$company) = split "\t",$pk;
# add -C back if required
$company .= $hashC{$pk} || '';
# get multiple dates
# print "$pk\n";
# my @dates = @{$hash{$pk}[1]};
# my @rdates = @{$hash{$pk}[2]};
my @dates = (defined $hash{$pk}[1]) ? @{$hash{$pk}[1]} : ();
my @rdates = (defined $hash{$pk}[2]) ? @{$hash{$pk}[2]} : ();
# even up number of dates:
while (@dates < @rdates) {
push @dates,'1900-01-01';
}
while (@rdates < @dates) {
push @rdates,'1900-01-01';
}
# print out multiple dates for each key
for my $date (reverse sort @dates){
# use match sub if more than 1
if (@rdates > 1){
@rdates = match($date,@rdates);
}
# rdates sorted so best match is first element
my $rdate = shift @rdates;
print join ' ',$key,$le,fmt_mdy($date),fmt_mdy($rdate),$company,
+"\n";
}
}
close OUTFILE;
}
# match dates by calc days diff
# and sorting to get least diff
sub match {
my ($date,@rdates) = @_;
my @days=();
# split date into y,m,d
my @d1 = split /\D/,$date;
# calc diff and store with date
for my $rdate (@rdates){
my @d2 = split /\D/,$rdate;
push @days,[$rdate,abs Delta_Days(@d1,@d2)];
}
# sort array by days
@days = sort {$a->[1] <=> $b->[1]} @days;
# extract dates
return map {$_->[0]} @days;
}
# change mm/dd/yy to yyyy-mm-dd
sub fmt_ymd {
my $mdy = shift;
$mdy =~ s/ //g;
my ($m,$d,$y) = split /\D/,$mdy;
if ($y < 99){ $y += 2000 };
return sprintf "%04d-%02d-%02d",$y,$m,$d;
}
# change yyyy-mm-dd to mm/dd/yy
sub fmt_mdy {
my $ymd = shift;
$ymd =~ s/ //g;
return ' 'x8 if $ymd eq '1900-01-01';
my ($y,$m,$d) = split /\D/,$ymd;
$y -= 2000;
return sprintf "%02d/%02d/%02d",$m,$d, $y;
}
|