in reply to combining 2 files with 4 columns need help
poj#!/usr/bin/perl # use strict; use warnings; use Date::Calc qw( Delta_Days ); my %hash=(); input_data(1,'out1.txt'); input_data(2,'out2.txt'); output_data('final.txt'); sub input_data { my ($ix,$filename) = @_; open FILE1, "<", $filename or die "$filename : $!\n"; while ( <FILE1> ) { chomp $_; my ( $key, $le, $date, $company ) = split ',', $_; my $pk = join "\t",$key,$le,$company; push @{$hash{$pk}[$ix]},fmt_ymd($date); } close FILE1; } sub output_data { my $filename = shift; open OUTFILE, ">", $filename or die "$filename : $!\n"; # primary key for my $pk (sort keys %hash){ my ($key,$le,$company) = split "\t",$pk; # get multiple dates my @dates = @{$hash{$pk}[1]}; my @rdates = @{$hash{$pk}[2]}; # even up number of dates while (@dates < @rdates) { push @dates,'1900-01-01'; } while (@rdates < @dates) { push @rdates,'1900-01-01'; } # print out multiple dates for each key for my $date (reverse sort @dates){ # use match sub if more than 1 if (@rdates > 1){ @rdates = match($date,@rdates); } # rdates sorted so best match is first element my $rdate = shift @rdates; print OUTFILE join ' ',$key,$le,fmt_mdy($date),fmt_mdy($rdate),$ +company,"\n"; } } close OUTFILE; } # match dates by calc days diff # and sorting to get least diff sub match { my ($date,@rdates) = @_; my @days=(); # split date into y,m,d my @d1 = split /\D/,$date; # calc diff and store with date for my $rdate (@rdates){ my @d2 = split /\D/,$rdate; push @days,[$rdate,abs Delta_Days(@d1,@d2)]; } # sort array by days @days = sort {$a->[1] <=> $b->[1]} @days; # extract dates return map {$_->[0]} @days; } # change mm/dd/yy to yyyy-mm-dd sub fmt_ymd { my $mdy = shift; $mdy =~ s/ //g; my ($m,$d,$y) = split /\D/,$mdy; if ($y < 99){ $y += 2000 }; return sprintf "%04d-%02d-%02d",$y,$m,$d; } # change yyyy-mm-dd to mm/dd/yy sub fmt_mdy { my $ymd = shift; $ymd =~ s/ //g; return ' 'x8 if $ymd eq '1900-01-01'; my ($y,$m,$d) = split /\D/,$ymd; $y -= 2000; return sprintf "%02d/%02d/%02d",$m,$d, $y; }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: combining 2 files with 4 columns need help
by rruser (Acolyte) on May 29, 2013 at 19:18 UTC | |
by poj (Abbot) on May 29, 2013 at 19:24 UTC | |
by rruser (Acolyte) on May 29, 2013 at 19:37 UTC | |
by poj (Abbot) on May 29, 2013 at 19:39 UTC | |
by rruser (Acolyte) on May 29, 2013 at 20:17 UTC |