#!/usr/bin/perl use strict; use warnings; use Data::Dumper; my $inFile = "input file"; my $outFile = "output file"; # By putting an "or die" clause after an open, the code will exit gracefully # with an emitted error message if a file cannot be opened for the intended # operation open my $fin, "<", $inFile or die "Open fail on $inFile: $!"; open my $fout, ">", $outFile or die "Open fail on $outFile: $!"; my %results; print $fout "|CSED Form|OrderNumber|Date|Total Documents|Total Pages|\n"; # Process the input file 1 line at a time, storing the results in a hash of # hashes in %results while (my $line = <$fin>) { # Implicit tests if $line is defined my @fields = split /\|/, $line; # Split $line on | my $key = lc($fields[1]); # Lowercase here so results get associated # $results{$key} is a hash reference, containing information on all records # keyed on $key (m737, q569, m729...). After processing, the hash reference # in $results{$key} will have data keyed on 'lines', 'sum1' and 'sum2' push @{$results{$key}{lines}}, $line; # $results{$key}{lines} is an array reference. As each line is encountered, # it is added to the end of the array. This code uses autovivification - # this means that since $results{$key}{lines} is undefined when perl first # encounters it AND it is treated as an array reference when perl first # encounters it, it will be set to an empty array reference. It is # roughly equivalent to being preceded by the line # $results{$key}{lines} = [] if not defined $results{$key}{lines}; $results{$key}{sum1} += $fields[-3]; # Third to last column # Accumulate the number in $fields[-3] in $results{$key}{sum1} $results{$key}{sum2} += $fields[-2]; # Second to last column # Accumulate the number in $fields[-2] in $results{$key}{sum2} } # Use a second loop to sort and output the accumulated results. It transits all # keys in alphabetical order since there is no sorting code block for my $key (sort keys %results) { # prints the unchanged lines to the file (print in list context) at $fout # In particular, note all lines still contain newline characters at the ends print $fout @{$results{$key}{lines}}; my $count = @{$results{$key}{lines}}; # List in scalar context yields length # Print accumulated results, as stored in the variables $count, $key, # $results{$key}{sum1}, $results{$key}{sum2} print $fout "There were $count ${key}s and total of 2nd to last row was $results{$key}{sum1} and total of last row was $results{$key}{sum2}\n"; } # Handles close automatically when they go out of scope # Output to terminal a dump of %results so operator can see internal structure print Dumper \%results; #### $VAR1 = { 'q569' => { 'sum1' => 1989, 'sum2' => 18248, 'lines' => [ '|Q569|5555|Jun 05,2010|584|4562| ', '|Q569|5555|Jun 05,2010|345|4562| ', '|Q569|5555|Jun 05,2010|215|4562| ', '|Q569|5555|Jun 05,2010|845|4562| ' ] }, 'm737' => { 'sum1' => 876, 'sum2' => 9124, 'lines' => [ '|M737|5555|Jun 05,2010|753|4562| ', '|M737|5555|Jun 05,2010|123|4562| ' ] }, 'm729' => { 'sum1' => 2780, 'sum2' => 22810, 'lines' => [ '|M729|5652|Jun 11,2010|198|4562| ', '|M729|5876|Jun 15,2010|298|4562| ', '|M729|5726|Jun 18,2010|428|4562| ', '|M729|5147|Jun 20,2010|918|4562| ', '|M729|5632|Jun 01,2010|938|4562| ' ] } }; #### |M737|5555|Jun 05,2010|753|4562| |M737|5555|Jun 05,2010|123|4562| |Q569|5555|Jun 05,2010|584|4562| |Q569|5555|Jun 05,2010|345|4562| |Q569|5555|Jun 05,2010|215|4562| |Q569|5555|Jun 05,2010|845|4562| |M729|5652|Jun 11,2010|198|4562| |M729|5876|Jun 15,2010|298|4562| |M729|5726|Jun 18,2010|428|4562| |M729|5147|Jun 20,2010|918|4562| |M729|5632|Jun 01,2010|938|4562| #### |CSED Form|OrderNumber|Date|Total Documents|Total Pages| |M729|5652|Jun 11,2010|198|4562| |M729|5876|Jun 15,2010|298|4562| |M729|5726|Jun 18,2010|428|4562| |M729|5147|Jun 20,2010|918|4562| |M729|5632|Jun 01,2010|938|4562| There were 5 m729s and total of 2nd to last row was 2780 and total of last row was 22810 |M737|5555|Jun 05,2010|753|4562| |M737|5555|Jun 05,2010|123|4562| There were 2 m737s and total of 2nd to last row was 876 and total of last row was 9124 |Q569|5555|Jun 05,2010|584|4562| |Q569|5555|Jun 05,2010|345|4562| |Q569|5555|Jun 05,2010|215|4562| |Q569|5555|Jun 05,2010|845|4562| There were 4 q569s and total of 2nd to last row was 1989 and total of last row was 18248