in reply to Filter out an input file with a given waiver file, and output to a specific file 2.0

Hello DespacitoPerl,

This is one possible solution.

#!/usr/bin/perl use say; use strict; use warnings; use Data::Dumper; my @keys_input_file = ('scenario', 'pins', 'threshold-1', 'threshold-2', 'subtraction', +'notation'); my @keys_filter_file = ('pins', 'threshold-1', 'threshold-2', 'justification', 'date'); my %HoH_Input; my %HoH_Filter; # reset line numbering on each input file while (<>) { chomp; next if /^\s*$/; # skip empty lines my @values; if (/,/) { @values = split /,/, $_; @{$HoH_Filter{$ARGV}{$.}}{@keys_filter_file} = @values; } else { @values = split /\s+/, $_; @{$HoH_Input{$ARGV}{$.}}{@keys_input_file} = @values; } } continue { close ARGV if eof; # Not eof()! } # print Dumper \%HoH_Input, \%HoH_Filter; my $result = 'output.txt'; open my $output, '>>', $result or die "Could not open ".$result.": $!"; foreach my $file ( sort { keys %{ $HoH_Input{$b} } <=> keys %{ $HoH_Input{$a} } } keys %HoH_Input ) { foreach my $line ( sort { $HoH_Input{$file}{$a} <=> $HoH_Input{$file}{$b} } keys %{ $HoH_Input{$file} } +) { foreach my $filter ( sort { keys %{ $HoH_Filter{$b} } <=> keys %{ $HoH_Filter{$a} } } keys %HoH_Filter ) { foreach my $filter_line ( sort { $HoH_Filter{$filter}{$a} <=> $HoH_Filter{$filter}{$b} } keys %{ $HoH_Filte +r{$filter} } ) { if ( $HoH_Input{$file}{$line}{'pins'} eq $HoH_Filter{$filter}{$filter_line}{'pins'} ) { my $subtraction = sprintf('%2.2f', $HoH_Filter{$filter}{$filter_line}{'threshol +d-1'} - $HoH_Filter{$filter}{$filter_line}{'threshol +d-2'}); say $output join (' ', $HoH_Input{$file}{$line}{'scenario'}, $HoH_Input{$file}{$line}{'pins'}, $HoH_Filter{$filter}{$filter_line}{'threshold-1' +}, $HoH_Filter{$filter}{$filter_line}{'threshold-2' +}, $subtraction, '(WAIVED)', ); next; } say $output join (' ', $HoH_Input{$file}{$line}{'scenario'}, $HoH_Input{$file}{$line}{'pins'}, $HoH_Input{$file}{$line}{'threshold-1'}, $HoH_Input{$file}{$line}{'threshold-2'}, $HoH_Input{$file}{$line}{'subtraction'}, $HoH_Input{$file}{$line}{'notation'}, ); } } } } close $output or warn "Could not close ".$result.": $!"; __END__ $ cat output.txt abcd124 klmn124 100.00 2500.00 -2400.00 (WAIVED) abcd123 klmn123 100.00 1000.00 -900.00 (VIOLATED)

What I have implemented here is a combination of eof FILEHANDLE, Access and Printing of a HASH OF HASHES and at the end to print the data join.

The script is simple, you provide as input any number of files just like the input data that you provide us and any number of filter files like the filter file data that you provided, Syntax perl test.pl in1.txt filter1.txt filter2.txt in2.txt etc.... The order of the files or number is irrelevant. I created this script in order to make your problem more generic.

As a next step I sort files and then I compare the hashes with each other. So far so good, but you are wondering why the lines are not in order although the hashes are in order. Well this is a bit tricky so I will leave this with you to solve ;).

If you have more trouble let us know and I will assist more.

Update: Well I read the other monks reply after answering the question, so I did not wanted to delete the answer. So at this point I will just provide you with the final solution.

#!/usr/bin/perl use say; use strict; use warnings; use Data::Dumper; my @keys_input_file = ('scenario', 'pins', 'threshold-1', 'threshold-2', 'subtraction', +'notation'); my @keys_filter_file = ('pins', 'threshold-1', 'threshold-2', 'justification', 'date'); my %HoH_Input; my %HoH_Filter; # reset line numbering on each input file while (<>) { chomp; next if /^\s*$/; # skip empty lines my @values; if (/,/) { @values = split /,/, $_; @{$HoH_Filter{$ARGV}{$.}}{@keys_filter_file} = @values; } else { @values = split /\s+/, $_; @{$HoH_Input{$ARGV}{$.}}{@keys_input_file} = @values; } } continue { close ARGV if eof; # Not eof()! } # print Dumper \%HoH_Input, \%HoH_Filter; my %final; foreach my $file ( sort { keys %{ $HoH_Input{$b} } <=> keys %{ $HoH_Input{$a} } } keys %HoH_Input ) { foreach my $line ( sort { $HoH_Input{$file}{$a} <=> $HoH_Input{$file}{$b} } keys %{ $HoH_Input{$file} } +) { foreach my $filter ( sort { keys %{ $HoH_Filter{$b} } <=> keys %{ $HoH_Filter{$a} } } keys %HoH_Filter ) { foreach my $filter_line ( sort { $HoH_Filter{$filter}{$a} <=> $HoH_Filter{$filter}{$b} } keys %{ $HoH_Filte +r{$filter} } ) { if ( $HoH_Input{$file}{$line}{'pins'} eq $HoH_Filter{$filter}{$filter_line}{'pins'} ) { my $subtraction = sprintf('%2.2f', $HoH_Filter{$filter}{$filter_line}{'threshol +d-1'} - $HoH_Filter{$filter}{$filter_line}{'threshol +d-2'}); $final{ $HoH_Input{$file}{$line}{'pins'}} = join (' ', $HoH_Input{$file}{$line}{'scenario'}, $HoH_Input{$file}{$line}{'pins'}, $HoH_Filter{$filter}{$filter_line}{'threshold-1'}, $HoH_Filter{$filter}{$filter_line}{'threshold-2'}, $subtraction, '(WAIVED)', ); next; } $final{ $HoH_Input{$file}{$line}{'pins'} } = join (' ', $HoH_Input{$file}{$line}{'scenario'}, $HoH_Input{$file}{$line}{'pins'}, $HoH_Input{$file}{$line}{'threshold-1'}, $HoH_Input{$file}{$line}{'threshold-2'}, $HoH_Input{$file}{$line}{'subtraction'}, $HoH_Input{$file}{$line}{'notation'}, ); } } } } # print Dumper \%final; my $result = 'output.txt'; open my $output, '>>', $result or die "Could not open ".$result.": $!"; foreach my $key (sort { $a cmp $b || $a <=> $b } keys %final) { say $output $final{$key}; } close $output or warn "Could not close ".$result.": $!";

Update2: It came to my mind that the filter file is smaller in comparison to the input file so I changed the algorithm to iterate each line of the file once and compare with the input file instead of the opposite. It should save some resources in big files. Find code bellow:

#!/usr/bin/perl use say; use strict; use warnings; use Data::Dumper; my @keys_input_file = ('scenario', 'pins', 'threshold-1', 'threshold-2', 'subtraction', +'notation'); my @keys_filter_file = ('pins', 'threshold-1', 'threshold-2', 'justification', 'date'); my %HoH_Input; my %HoH_Filter; # reset line numbering on each input file while (<>) { chomp; next if /^\s*$/; # skip empty lines my @values; if (/,/) { @values = split /,/, $_; @{$HoH_Filter{$ARGV}{$.}}{@keys_filter_file} = @values; } else { @values = split /\s+/, $_; @{$HoH_Input{$ARGV}{$.}}{@keys_input_file} = @values; } } continue { close ARGV if eof; # Not eof()! } # print Dumper \%HoH_Input, \%HoH_Filter; my %final; foreach my $filter ( sort { keys %{ $HoH_Filter{$b} } <=> keys %{ $HoH +_Filter{$a} } } keys %HoH_Filter ) { foreach my $filter_line ( sort { $HoH_Filter{$filter}{$a} <=> $HoH +_Filter{$filter}{$b} } keys %{ $HoH_Filter{$filter} } ) { foreach my $file ( sort { keys %{ $HoH_Input{$b} } <=> keys %{ $Ho +H_Input{$a} } } keys %HoH_Input ) { foreach my $line ( sort { $HoH_Input{$file}{$a} <=> $HoH_Input +{$file}{$b} } keys %{ $HoH_Input{$file} } ) { if ( $HoH_Filter{$filter}{$filter_line}{'pins'} eq $HoH_Input{$file}{$line}{'pins'} ) { my $subtraction = sprintf('%2.2f', $HoH_Filter{$filter}{$filter_line}{'threshol +d-1'} - $HoH_Filter{$filter}{$filter_line}{'threshol +d-2'}); $final{ $HoH_Input{$file}{$line}{'pins'}} = join (' ', $HoH_Input{$file}{$line}{'scenario'}, $HoH_Input{$file}{$line}{'pins'}, $HoH_Filter{$filter}{$filter_line}{'threshold-1'}, $HoH_Filter{$filter}{$filter_line}{'threshold-2'}, $subtraction, '(WAIVED)', ); next; } $final{ $HoH_Input{$file}{$line}{'pins'} } = join (' ', $HoH_Input{$file}{$line}{'scenario'}, $HoH_Input{$file}{$line}{'pins'}, $HoH_Input{$file}{$line}{'threshold-1'}, $HoH_Input{$file}{$line}{'threshold-2'}, $HoH_Input{$file}{$line}{'subtraction'}, $HoH_Input{$file}{$line}{'notation'}, ); } } } } # print Dumper \%final; my $result = 'output.txt'; open my $output, '>>', $result or die "Could not open ".$result.": $!"; foreach my $key (sort { $a cmp $b || $a <=> $b } keys %final) { say $output $final{$key}; } close $output or warn "Could not close ".$result.": $!"; __END__ $ cat output.txt abcd123 klmn123 100.00 1000.00 -900.00 (VIOLATED) abcd124 klmn124 100.00 2500.00 -2400.00 (WAIVED)

Hope this helps, BR.

Seeking for Perl wisdom...on the process of learning...not there...yet!
  • Comment on Re: Filter out an input file with a given waiver file, and output to a specific file 2.0 (Update2)
  • Select or Download Code