#!/usr/bin/perl -sw use vars qw/$N/; use strict; use sort "stable"; use Encode qw(encode decode); no strict 'refs'; $|++; sub key1 { ( substr( $a, 3, 17 )) cmp ( substr( $b, 3, 17 )); } # Sorts in descending order. sub key2 { ( substr( $b, 20, 2 )) cmp ( substr( $a, 20, 2 )); } my $reclen = 8072; #! Adjust to suit your records/line ends. $N = $N || 1; warn "Usage: $0 [-N=n] file\n" and exit(-1) unless @ARGV; warn "Reading input file $ARGV[0] ", -s $ARGV[0], "\n"; if ( not defined $ARGV[1] ) { warn "Output file not specified a Continue[N|y]?"; exit -1 if !~ /^Y/i; } $/= \$reclen; open INPUT, '<', $ARGV[0] or die $!, $ARGV[0]; binmode(INPUT); my (@fhs); while ( ) { my $key = substr($_, 3, $::N); if (not defined $fhs[$key]) { $fhs[$key] = "temp.$key"; warn( "\rCreating file: $fhs[$key] "); open( $fhs[$key], ">$fhs[$key]") or die( "Could create $fhs[$key]: $!"); binmode($fhs[$key]); } print {$fhs[$key]} $_; } #! Get rid of unused filehandles or those that reference zero length file @fhs = grep{ $_ and ! -z $_} @fhs; close $_ for @fhs; close INPUT; warn "Split made to: ", scalar @fhs, " files\n"; #! Sort the split files on the first & second field for my $fh (@fhs) { warn "$fh: reading;..."; open $fh, "<$fh" or die $!; binmode($fh); my @recs = <$fh>; close $fh; warn " sorting: ", scalar @recs, " recs;..."; # @recs = sort{ substr($a, 3, 16) cmp substr($b, 3, 16) # || substr($b, 20, 3) cmp substr($a, 20, 3) } @recs; my @recs = map { decode('cp1047', $_) } sort { key1 || key2 } (map { encode('cp1047', $_) } @recs); warn " writing;..."; open $fh, ">$fh" or die $!; binmode($fh); print $fh @recs; close $fh; warn "done;\n"; } warn "Merging files: "; *SORTED = *STDOUT; open SORTED, '>', $ARGV[1] and binmode(SORTED) or die $! if $ARGV[1]; for my $fh (reverse @fhs) { warn " $fh;"; open $fh, "<$fh" and binmode($fh) or die $!; print SORTED <$fh>; close $fh; } warn "\nClosing sorted file: sorted\n"; close SORTED; warn "Deleting temp files\n"; unlink $_ or warn "Couldn't unlink $_\n" for @fhs; warn "Done.\n"; exit (0);