#!/usr/bin/perl -w # Mergesort use IO::Handle; # For the ->getline require 5.6.0; # Sort sub prototypes $recs=13; # Total number of records to sort..... # Leave out of the real thing $max=5; # Maximum number of records per merge file @files=(); # The prototype is needed because we want lexical # values in the sort because we're using it as a # regular comparison and as a sort sub. sub sortsub ($$) { my($c,$d)=@_; return $c<=>$d; } { # Should be POSIX::tmpnam. But I'm lazy at the moment. # (Under UNIX you can even re-use the same name each # time and just unlink it after the push()!) $tempname="fooaa"; sub store { my($a)=@_; my $f; open($f, "+>/tmp/$tempname") || die; print $f sort sortsub @$a; # Sort small pile seek $f, 0, 0 or warn "Can't seek: $!"; push(@files, { fh => $f, queued => scalar <$f>, }); $tempname++; } } # This is where you'd read the input file to exhaustion # I'm just making up data. The important part is the block itself. while($_=rand() . "\n", $recs--) { push(@sortarr, $_); if (@sortarr==$max) { store(\@sortarr); @sortarr=(); } } store(\@sortarr) if @sortarr; # Store the leftovers LOOP: { ($lowest)=(sort { sortsub($a->{queued}, $b->{queued}); } grep(defined $_->{queued}, @files) )[0]; last unless defined $lowest->{queued}; # Do your processing here print $lowest->{queued}; $lowest->{queued}=$lowest->{fh}->getline(); redo; }