#Sample Line From BIGFILE #{9999991234ff00aa},9999991234,1,"Y",0,0,{55760FFC56837F3E} my %minhash = (); my %npanxxhash = (); my @npanxxarray; my $key; my $value; my $npanxx; my $npanxxcnt; my $in = "BIGFILE.out.gz"; my $out_min = "npanxx_minsort.out"; my $out_cnt = "npanxx_cntsort.out"; open IN, "/bin/gunzip -c $in |" or die "IN: $!\n"; open OUT_MIN, ">", "$out_min" or die "OUT_MIN: $!\n"; open OUT_CNT, ">", "$out_cnt" or die "OUT_CNT: $!\n"; print "Time: " . time . "\n"; print "Processing $in...\n"; while () { if ( $_ =~ m/^{.*$/ ) { #Grab 9999991234 from line above my ($a,$MIN,$c,$d,$e,$f) = split( /,/ ); $minhash{$MIN} = undef; } } close IN; print "Time: " . time . "\n"; print "Massaging Data...\n"; while ( ($key, $value) = each(%minhash) ) { #Get just 999999 from above $npanxx = substr($key, 0, 6); push(@npanxxarray, $npanxx); $npanxxhash{$npanxx} = undef; } undef $key; undef $value; print "Time: " . time . "\n"; print "Getting Counts...\n"; foreach $key (sort keys %npanxxhash) { &CountAndHash($key,\@npanxxarray,\%npanxxhash); # $npanxxcnt = grep (/$key/, @npanxxarray); # $npanxxhash{$key} = $npanxxcnt; } print "Time: " . time . "\n"; print "Generating Flat Files...\n"; foreach $key (sort keys %npanxxhash) { print OUT_MIN "$key $npanxxhash{$key}\n"; } foreach $key (sort { $npanxxhash{$a} <=> $npanxxhash{$b} } keys %npanxxhash) { printf OUT_CNT "%-7s %s\n", $key, $npanxxhash{$key}; } print "Time: " . time . "\n"; print "Complete...\n"; sub CountAndHash { my ($key, $arrayref, $hashref) = @_; my %hashref; if (!defined(my $pid = fork())) { die "Cannot fork to child: $!\n"; } elsif ($pid == 0) { #print "Launching child process...\n"; $npanxxcnt = grep (/$key/, $arrayref); $hashref{$key} = $npanxxcnt; exit; } else { my $ret = waitpid($pid,0); print "PID $ret completed...\n"; } return ($npanxxcnt, $hashref); }