my %occurrences; while(<>) { # Extract the IP addresses my ($from_ip, $to_ip) = /...some regex.../ or next; # Losslessly compress them into an 8-byte key my $key = pack("C8", split(/\./, "$from_ip.$to_ip")); $occurrences{$key}++; } # Now convert the counts to small, easily sorted things my @data; while (my ($key, $count) = each %occurrences) { # Use network byte-order (big-endian) because it # will sort alphabetically in numeric order push @data, pack("N", $count) . $key; # To save space, shrink the hash. Note that it is # normally unsafe to modify the set of entries in # a hash while iterating over them, but deleting # the current entry returned by 'each' is explicitly # allowed (perldoc -f each). delete $occurrences{$key}; } # Now we have a simple array of (packed) counts and IP # pairs. The easiest thing to implement would then be: return (reverse sort @data)[0..$N-1]; # A trickier implementation that would avoid the superlinear # expense of sorting would be to grab a random element and # partition the array into a group of elements that are # greater than that element, and a group of those that are # smaller. As soon as the size of the group that is greater # exceeds $N, throw out the other group and just finish # creating the group of larger elements. If you're unlucky # and your "larger" group never gets up to $N elements, # start over again with a smaller pivot element. # # Repeat until you are left with a more manageable size, # and then just do the sort. while (@data > 10000) { # Assuming $N < 10000 my (@big, @small); # The first element may be "random" enough, since it's # in perl's hash order. But you really probably ought # to pick a better one, just in case. my $pivot = shift(@data); while (@data) { my $elt = shift(@data); ($elt gt $pivot) ? push(@big,$elt) : push(@small,$elt); if (@big >= $N) { while (@data) { $elt = shift(@data); push(@big, $elt) if $elt gt $pivot; } last; } } if (@big >= $N) { @data = @big; } else { # Put the pivot at the end so we don't reuse it! push @data, @big, @small, $pivot; } }