use strict; use File::Temp qw(tempfile); use vars qw($eof $max_recs); # Having a localized $eof is necessary for STDIN to work as expected $max_recs = 500; sub get_block { my $in = shift; return if $eof; my $cmp = shift; my @lines; my $i; $eof = 1; while (<$in>) { push @lines, $_; if($max_recs < ++$i) { $eof = 0; last; } } my $temp = tempfile(); print $temp sort {$cmp->($a, $b)} @lines; seek($temp, 0, 0); return $temp; } sub merge_blocks { my $fh1 = shift; my $fh2 = shift; my $cmp = shift; my $other = <$fh2>; unless (defined $other) { # The second handle has nothing seek ($fh1, 0, 0); return $fh1; } my $out = tempfile(); while (<$fh1>) { if ($cmp->($_, $other) <= 0) { print $out $_; } else { print $out $other; $other = $_; ($fh1, $fh2) = ($fh2, $fh1); } } print $out $other; print $out $_ while <$fh2>; seek ($out, 0, 0); return $out; } sub sorted_block { my $in = shift; my $cmp = shift; my $depth = shift; if (1 < $depth) { my $fh1 = sorted_block($in, $cmp, $depth - 1); return unless defined($fh1); my $fh2 = sorted_block($in, $cmp, $depth - 1); if( defined($fh2) ) { return merge_blocks($fh1, $fh2, $cmp); } else { return $fh1; } } else { return get_block($in, $cmp); } } # If a reference it assumes it is a handle, else it returns an opened # filehandle sub get_handle { my $name = shift; if (ref($name)) { return $name; } else { my $fh = do {local *FOO}; open ($fh, $name) or die "Cannot read from $name: $!"; return $fh; } } # Takes 3 arguments, all optional. First a filehandle to read from, # defaults to STDIN. Secondly a filehandle to write to, defaults to # STDOUT. And thirdly a subroutine that takes 2 arguments and # compares them. Defaults to a lexical comparison. sub merge_sort { my $fh = get_handle(shift || \*STDIN); my $out = get_handle(shift || \*STDOUT); my $cmp = shift || sub {shift cmp shift}; my $sorted = tempfile(); my $depth = 1; local $eof = 0; while (my $block = sorted_block($fh, $cmp, $depth++)) { $sorted = merge_blocks($sorted, $block, $cmp); } print $out $_ while <$sorted>; } # Example test, sort a file from @ARGV or else STDIN. merge_sort(shift);