Rather than loading a hundred files into memory or even trying to process 100 files in lockstep, you can use the first file to create a hash using the key fields TIME, CASE, FUSION & TYPE as the primary key, and two secondary keys:
and then process each of the files one at a time, matching each record against the hash by primary key accumlating the totals and counting the records contributing to them.
When you've totalled them all, use a final loop to calculate the averages and output them to the report.
This should keep your memory requirements and run times predictable and reasonable. As only one file will be in use at any one time.
How you handle records in the subsequent files that were not in the first will depend on your needs, but you can either accumulate them or discard them as your requirements dictate.
#! perl -slw use strict; use Inline::Files; use Data::Dumper; use constant TIME =>0; use constant CASE =>1; use constant ITER =>2; use constant FUSION =>3; use constant TYPE =>4; use constant TANKS =>5; use constant AFVS =>6; use constant ADAS =>7; use constant IFVS =>8; use constant UAVS =>9; use constant UNKNOWN =>10; use constant TOTAL =>11; use constant LATENCY =>12; use constant DECOYS =>13; use constant FALSENEG =>14; use constant FALSEPOS =>15; my %data; <FILE_0>; while (<FILE_0>) { local $"=','; #" my @fields = split','; $data{"@fields[TIME,CASE,FUSION,TYPE]"}{Totals} = [ @fields[TA +NKS .. FALSEPOS] ]; $data{"@fields[TIME,CASE,FUSION,TYPE]"}{Count} = 1; } <FILE_1>; while (<FILE_1>) { local $"=','; #" my @fields = split','; if (exists $data{"@fields[TIME,CASE,FUSION,TYPE]"}) { $data{"@fields[TIME,CASE,FUSION,TYPE]"}{Totals}[$_ - TANKS +] += $fields[$_]||0 for TANKS .. FALSEPOS; $data{"@fields[TIME,CASE,FUSION,TYPE]"}{Count}++; } #else create a new record if that is the requirement. } print "Time,Case,Fusion,Type, Tanks,AFVs,ADAs,IFVs,UAVS,Unknown,Total, Latency, +Decoys ,FalseNeg, FalsePos\n"; for my $key (keys %data) { printf '%35s:' . ('%5.2f 'x 11) . $/, $key, map{ $data{$key}{Totals}[$_ - TANKS] /$data{$key}{Count} } TANKS .. FALSEPOS; } __FILE_0__ 1,Case,Iter,Fusion,Type,Tanks,AFVs,ADAs,IFVs,UAVS,Unknown,Total, Laten +cy, Decoys ,FalseNeg, FalsePos 32,A2,1,UE_Battle_Bde,TRUTH,6,3,7,8,5,8,7,4,0 32,A2,1,UE_Battle_Bde,PERCEIVED,3,4,2,3,1,2,8,4,4,9,0 32,A2,1,UE_Battle_Bde,FREQUENCIES,7,3,7,6,0 32,A2,1,UA1,TRUTH,0,8,6,2,3,2,1,2,0 32,A2,1,UA1,PERCEIVED,2,6,5,9,2,1,1,4,7,8,0 32,A2,1,UA1,FREQUENCIES,6,4,1,1,0 35,A2,1,UE_Battle_Bde,TRUTH,8,7,3,9,6,1,9,3,0 35,A2,1,UE_Battle_Bde,PERCEIVED,2,9,6,8,7,2,5,2,2,8,0 35,A2,1,UE_Battle_Bde,FREQUENCIES,0,3,4,0,0 __FILE_1__ 1,Case,Iter,Fusion,Type,Tanks,AFVs,ADAs,IFVs,UAVS,Unknown,Total, Laten +cy, Decoys ,FalseNeg, FalsePos 32,A2,1,UE_Battle_Bde,TRUTH,2,5,1,8,4,8,0,1,0 32,A2,1,UE_Battle_Bde,PERCEIVED,0,9,6,8,1,7,3,6,9,7,0 32,A2,1,UE_Battle_Bde,FREQUENCIES,1,0,8,4,0 32,A2,1,UA1,TRUTH,8,2,3,2,1,4,8,3,0 32,A2,1,UA1,PERCEIVED,3,9,1,6,7,3,4,2,6,0,0 32,A2,1,UA1,FREQUENCIES,6,3,2,6,0 35,A2,1,UE_Battle_Bde,TRUTH,1,1,2,6,5,0,7,3,0 35,A2,1,UE_Battle_Bde,PERCEIVED,5,6,2,0,3,2,7,6,5,6,0 35,A2,1,UE_Battle_Bde,FREQUENCIES,6,7,1,2,0 __OUTPUT__ C:\test>245214 Time,Case,Fusion,Type, Tanks,AFVs,ADAs,IFVs,UAVS,Unkn +own,Total, Latency, Decoys ,FalseNeg, FalsePos 32,A2,UE_Battle_Bde,PERCEIVED: 1.50 6.50 4.00 5.50 1.00 4.5 +0 5.50 5.00 6.50 8.00 0.00 32,A2,UE_Battle_Bde,TRUTH: 4.00 4.00 4.00 8.00 4.50 8.0 +0 3.50 2.50 0.00 0.00 0.00 32,A2,UA1,FREQUENCIES: 6.00 3.50 1.50 3.50 0.00 0.0 +0 0.00 0.00 0.00 0.00 0.00 35,A2,UE_Battle_Bde,PERCEIVED: 3.50 7.50 4.00 4.00 5.00 2.0 +0 6.00 4.00 3.50 7.00 0.00 35,A2,UE_Battle_Bde,FREQUENCIES: 3.00 5.00 2.50 1.00 0.00 0.0 +0 0.00 0.00 0.00 0.00 0.00 32,A2,UA1,PERCEIVED: 2.50 7.50 3.00 7.50 4.50 2.0 +0 2.50 3.00 6.50 4.00 0.00 32,A2,UA1,TRUTH: 4.00 5.00 4.50 2.00 2.00 3.0 +0 4.50 2.50 0.00 0.00 0.00 35,A2,UE_Battle_Bde,TRUTH: 4.50 4.00 2.50 7.50 5.50 0.5 +0 8.00 3.00 0.00 0.00 0.00 32,A2,UE_Battle_Bde,FREQUENCIES: 4.00 1.50 7.50 5.00 0.00 0.0 +0 0.00 0.00 0.00 0.00 0.00 C:\test>
In reply to Re: Tabulating Data Across Multiple Large Files
by BrowserUk
in thread Tabulating Data Across Multiple Large Files
by reds
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |