#!/usr/bin/perl use warnings; use strict; use feature qw{ say }; use List::Util qw{ sum }; use constant temp_file => "tmp.$$"; sub parse_block { my ($block) = @_; my @lines = split /\n/, $block; my $key = $lines[1]; my ($prefix, $counts) = $lines[3] =~ /^(\S+)\s((?:\d+\s*)+)/; my ($count) = $lines[3] =~ /count:(\d+)/; return { key => $key, counts => [ $counts =~ /\d+/g ], count => $count, prefix => $prefix, line0 => $lines[0], line2 => $lines[2], }; } sub process_first_bunch { my @files = @_; my %h; for my $file (@files) { print STDERR "$file\r"; open my $FIRST, '<', $file or die $!; while (my $block = <$FIRST>) { my $parsed = parse_block($block); $h{ $parsed->{key} } = [ @$parsed{qw{ line0 key line2 prefix }} ] unless exists $h{ $parsed->{key} }; push @{ $h{ $parsed->{key} } }, @{ $parsed->{counts} }; } } open my $TMP, '>', temp_file or die $!; for my $key (sort keys %h) { print_block($TMP, $h{$key}); } close $TMP or die $!; } sub process_next_bunch { my @files = @_; my %h; for my $file (@files) { print STDERR "$file\r"; open my $IN, '<', $file or die $!; while (my $block = <$IN>) { my $parsed = parse_block($block); $h{ $parsed->{key} } = [ @$parsed{qw{ line0 key line2 prefix }} ] unless exists $h{ $parsed->{key} }; push @{ $h{ $parsed->{key} } }, @{ $parsed->{counts} }; } } my %stats = ( ins => 0, merge => 0, 0 => 0 ); my @keys = sort keys %h; open my $TMP, '<', temp_file or die $!; my $new_temp = temp_file . $files[0]; open my $OUT, '>', $new_temp or die $!; while (my $block = <$TMP>) { my $parsed = parse_block($block); my $key = $parsed->{key}; while (@keys && $keys[0] lt $key) { # Insert. ++$stats{ins}; print_block($OUT, $h{ shift @keys }); } if (@keys && $key eq $keys[0]) { # Merge. ++$stats{merge}; shift @keys; push @{ $parsed->{counts} }, @{ $h{$key} }[ 4 .. $#{ $h{$key} } ]; print_block($OUT, [ @$parsed{qw{ line0 key line2 prefix }}, @{ $parsed->{counts} } ]); } else { # No change. ++$stats{0}; print {$OUT} $block; } } if (@keys) { # Add the rest. print_block($OUT, $h{$_}) for @keys; $stats{ins} += @keys; } close $OUT or die $!; rename $new_temp, temp_file or die $!; say STDERR "inserted: $stats{ins}, merged: $stats{merge}, kept: $stats{0}."; } sub print_block { my ($OUT, $details) = @_; my @lines = splice @$details, 0, 4; my $sum = sum(@$details); print {$OUT} join "\n", @lines; say {$OUT} "\t", join "\t", @$details, "count:$sum\n"; } local $/ = q(); my $bunch_size = shift; process_first_bunch(splice @ARGV, 0, $bunch_size); while (my @files = splice @ARGV, 0, $bunch_size) { process_next_bunch(@files); } say STDERR "Result: ", temp_file;