Update: This code has a bug, which can cause
it to return one line of the result wrong.
I've uploaded the correct code as a reply to this node.
Diff the two codes to see where the bug was.
There are several modules for heaps on CPAN.
However, AFAIK, none of them supports popping an element and
adding a new one in one step.
I give you an example code doing that, so this may be faster than
those modules (or it might not be, I don't know).
This code excepts du output or the like as input,
and prints the largeset files.
This of course probably won't make any sense unless you know what
binary heaps are.
#!/usr/local/bin/perl
use warnings; use strict; use less;
our $infinity= 1e200;
# $h= heap_init ($n): returns heap of $n elements
# I use -1e200 as a very small number, probably smaller than any data
+input.
sub heap_init {
[([-$infinity])x$_[0]];
}
# heap_repl ($h, $e): remove smallest heap element and insert new elem
+ent $e.
# each element should be an array-ref, of which the first element is a
# numerical key.
sub heap_repl {
my ($h, $e, $n, $p);
($h, $e)= @_;
$n= 0;
$p= $$h[0];
while (1) {
if (2*$n+2 < @$h && $$h[2*$n+2][0] < $$e[0]) {
if ($$h[2*$n+1][0] < $$h[2*$n+2][0]) {
$$h[$n]= $$h[2*$n+1]; $n= 2*$n+1;
}
else {
$$h[$n]= $$h[2*$n+2]; $n= 2*$n+2;
}
}
elsif (2*$n+1 < @$h && $$h[2*$n+1][0] < $$e[0]) {
$$h[$n]= $$h[2*$n+1]; $n= 2*$n+1;
}
else
{ last; }
}
$$h[$n]= $e;
return $p;
}
# heap_pop ($h) removes the lowest element from a heap; returns undef
+if the
# heap is empty
sub heap_pop {
my ($h, $e);
($h)= @_;
@$h<=1 and do {
@$h==1 and return pop @$h;
return;
};
$e= pop @$h;
return heap_repl $h, $e;
}
# main
{
my $heap= heap_init 32;
my $elt;
while (<>) {
$_=~ /^(\d+)\s+(.*)$/ or
die "error: line does not match";
heap_repl $heap, [$1, $2];
}
while (defined ($elt= heap_pop $heap)) {
print $$elt[0], "\t", $$elt[1], $/;
}
}
__END__
|