#!/usr/bin/perl use warnings; use strict; use Data::Hash::Totals; my @log; while (my $line = <>) { next unless $line =~ m!(\S+)\s+(\S+)\s+\S*/(\S+)\s!; my ($ip,$date,$dist) = ($1, $2, $3); next unless $dist =~ s/\.(tar\.gz|tgz)$//; $dist =~ s/(-|_)[0-9][0-9a-z._]+$//; $dist =~ s/.pm$//; push @log, { ip => $ip, date => $date, dist => $dist }; } # Count IP and find a cutoff for the 99%ile of downloaders my %ip; for my $line (@log) { $ip{$line->{ip}}++; } my $cut = int( 0.01 * keys %ip ); my $cutoff = [ sort { $ip{$b} <=> $ip{$a} } keys %ip ]->[$cut]; # Tally distributions for everyone else my %dist; for my $line (@log) { $dist{$line->{dist}}++ if $ip{$line->{ip}} < $cutoff; } my %top100; $top100{$_} = $dist{$_} for splice( @{[sort { $dist{$b} <=> $dist{$a} } keys %dist ]}, 0, 100 ); print as_table(\%top100);