#!/usr/bin/perl use strict; use warnings; use Data::Dumper; my $inFile = "input file"; my $outFile = "output file"; # By putting an "or die" clause after an open, the code will exit gracefully # with an emitted error message if a file cannot be opened for the intended # operation open my $fin, "<", $inFile or die "Open fail on $inFile: $!"; open my $fout, ">", $outFile or die "Open fail on $outFile: $!"; my %results; print $fout "|CSED Form|OrderNumber|Date|Total Documents|Total Pages|\n"; # Process the input file 1 line at a time, storing the results in a hash of # hashes in %results while (my $line = <$fin>) { # Implicit tests if $line is defined my @fields = split /\|/, $line; # Split $line on | my $key = lc($fields[1]); # Lowercase here so results get associated # $results{$key} is a hash reference, containing information on all records # keyed on $key (m737, q569, m729...). After processing, the hash reference # in $results{$key} will have data keyed on 'lines', 'sum1' and 'sum2' push @{$results{$key}{lines}}, $line; # $results{$key}{lines} is an array reference. As each line is encountered, # it is added to the end of the array. This code uses autovivification - # this means that since $results{$key}{lines} is undefined when perl first # encounters it AND it is treated as an array reference when perl first # encounters it, it will be set to an empty array reference. It is # roughly equivalent to being preceded by the line # $results{$key}{lines} = [] if not defined $results{$key}{lines}; $results{$key}{sum1} += $fields[-3]; # Third to last column # Accumulate the number in $fields[-3] in $results{$key}{sum1} $results{$key}{sum2} += $fields[-2]; # Second to last column # Accumulate the number in $fields[-2] in $results{$key}{sum2} } # Use a second loop to sort and output the accumulated results. It transits all # keys in alphabetical order since there is no sorting code block for my $key (sort keys %results) { # prints the unchanged lines to the file (print in list context) at $fout # In particular, note all lines still contain newline characters at the ends print $fout @{$results{$key}{lines}}; my $count = @{$results{$key}{lines}}; # List in scalar context yields length # Print accumulated results, as stored in the variables $count, $key, # $results{$key}{sum1}, $results{$key}{sum2} print $fout "There were $count ${key}s and total of 2nd to last row was $results{$key}{sum1} and total of last row was $results{$key}{sum2}\n"; } # Handles close automatically when they go out of scope # Output to terminal a dump of %results so operator can see internal structure print Dumper \%results;