Use your own sortsub of course. And your own input records (I used random numbers). Otherwise, it's fit to use.
#!/usr/bin/perl -w # Mergesort use IO::Handle; # For the ->getline require 5.6.0; # Sort sub prototypes $recs=13; # Total number of records to sort..... # Leave out of the real thing $max=5; # Maximum number of records per merge file @files=(); # The prototype is needed because we want lexical # values in the sort because we're using it as a # regular comparison and as a sort sub. sub sortsub ($$) { my($c,$d)=@_; return $c<=>$d; } { # Should be POSIX::tmpnam. But I'm lazy at the moment. # (Under UNIX you can even re-use the same name each # time and just unlink it after the push()!) $tempname="fooaa"; sub store { my($a)=@_; my $f; open($f, "+>/tmp/$tempname") || die; print $f sort sortsub @$a; # Sort small pile seek $f, 0, 0 or warn "Can't seek: $!"; push(@files, { fh => $f, queued => scalar <$f>, }); $tempname++; } } # This is where you'd read the input file to exhaustion # I'm just making up data. The important part is the block itself. while($_=rand() . "\n", $recs--) { push(@sortarr, $_); if (@sortarr==$max) { store(\@sortarr); @sortarr=(); } } store(\@sortarr) if @sortarr; # Store the leftovers LOOP: { ($lowest)=(sort { sortsub($a->{queued}, $b->{queued}); } grep(defined $_->{queued}, @files) )[0]; last unless defined $lowest->{queued}; # Do your processing here print $lowest->{queued}; $lowest->{queued}=$lowest->{fh}->getline(); redo; }
In reply to Re: Re: Sorting a large file
by clintp
in thread Sorting a large file
by c-era
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |