#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
my $inFile = "input file";
my $outFile = "output file";
# By putting an "or die" clause after an open, the code will exit gracefully
# with an emitted error message if a file cannot be opened for the intended
# operation
open my $fin, "<", $inFile or die "Open fail on $inFile: $!";
open my $fout, ">", $outFile or die "Open fail on $outFile: $!";
my %results;
print $fout "|CSED Form|OrderNumber|Date|Total Documents|Total Pages|\n";
# Process the input file 1 line at a time, storing the results in a hash of
# hashes in %results
while (my $line = <$fin>) { # Implicit tests if $line is defined
my @fields = split /\|/, $line; # Split $line on |
my $key = lc($fields[1]); # Lowercase here so results get associated
# $results{$key} is a hash reference, containing information on all records
# keyed on $key (m737, q569, m729...). After processing, the hash reference
# in $results{$key} will have data keyed on 'lines', 'sum1' and 'sum2'
push @{$results{$key}{lines}}, $line;
# $results{$key}{lines} is an array reference. As each line is encountered,
# it is added to the end of the array. This code uses autovivification -
# this means that since $results{$key}{lines} is undefined when perl first
# encounters it AND it is treated as an array reference when perl first
# encounters it, it will be set to an empty array reference. It is
# roughly equivalent to being preceded by the line
# $results{$key}{lines} = [] if not defined $results{$key}{lines};
$results{$key}{sum1} += $fields[-3]; # Third to last column
# Accumulate the number in $fields[-3] in $results{$key}{sum1}
$results{$key}{sum2} += $fields[-2]; # Second to last column
# Accumulate the number in $fields[-2] in $results{$key}{sum2}
}
# Use a second loop to sort and output the accumulated results. It transits all
# keys in alphabetical order since there is no sorting code block
for my $key (sort keys %results) {
# prints the unchanged lines to the file (print in list context) at $fout
# In particular, note all lines still contain newline characters at the ends
print $fout @{$results{$key}{lines}};
my $count = @{$results{$key}{lines}}; # List in scalar context yields length
# Print accumulated results, as stored in the variables $count, $key,
# $results{$key}{sum1}, $results{$key}{sum2}
print $fout "There were $count ${key}s and total of 2nd to last row was $results{$key}{sum1} and total of last row was $results{$key}{sum2}\n";
}
# Handles close automatically when they go out of scope
# Output to terminal a dump of %results so operator can see internal structure
print Dumper \%results;
####
$VAR1 = {
'q569' => {
'sum1' => 1989,
'sum2' => 18248,
'lines' => [
'|Q569|5555|Jun 05,2010|584|4562|
',
'|Q569|5555|Jun 05,2010|345|4562|
',
'|Q569|5555|Jun 05,2010|215|4562|
',
'|Q569|5555|Jun 05,2010|845|4562|
'
]
},
'm737' => {
'sum1' => 876,
'sum2' => 9124,
'lines' => [
'|M737|5555|Jun 05,2010|753|4562|
',
'|M737|5555|Jun 05,2010|123|4562|
'
]
},
'm729' => {
'sum1' => 2780,
'sum2' => 22810,
'lines' => [
'|M729|5652|Jun 11,2010|198|4562|
',
'|M729|5876|Jun 15,2010|298|4562|
',
'|M729|5726|Jun 18,2010|428|4562|
',
'|M729|5147|Jun 20,2010|918|4562|
',
'|M729|5632|Jun 01,2010|938|4562|
'
]
}
};
####
|M737|5555|Jun 05,2010|753|4562|
|M737|5555|Jun 05,2010|123|4562|
|Q569|5555|Jun 05,2010|584|4562|
|Q569|5555|Jun 05,2010|345|4562|
|Q569|5555|Jun 05,2010|215|4562|
|Q569|5555|Jun 05,2010|845|4562|
|M729|5652|Jun 11,2010|198|4562|
|M729|5876|Jun 15,2010|298|4562|
|M729|5726|Jun 18,2010|428|4562|
|M729|5147|Jun 20,2010|918|4562|
|M729|5632|Jun 01,2010|938|4562|
####
|CSED Form|OrderNumber|Date|Total Documents|Total Pages|
|M729|5652|Jun 11,2010|198|4562|
|M729|5876|Jun 15,2010|298|4562|
|M729|5726|Jun 18,2010|428|4562|
|M729|5147|Jun 20,2010|918|4562|
|M729|5632|Jun 01,2010|938|4562|
There were 5 m729s and total of 2nd to last row was 2780 and total of last row was 22810
|M737|5555|Jun 05,2010|753|4562|
|M737|5555|Jun 05,2010|123|4562|
There were 2 m737s and total of 2nd to last row was 876 and total of last row was 9124
|Q569|5555|Jun 05,2010|584|4562|
|Q569|5555|Jun 05,2010|345|4562|
|Q569|5555|Jun 05,2010|215|4562|
|Q569|5555|Jun 05,2010|845|4562|
There were 4 q569s and total of 2nd to last row was 1989 and total of last row was 18248