#!/usr/bin/perl use warnings; use strict; use Data::Hash::Totals; my @log; while (my $line = <>) { next unless $line =~ m!(\S+)\s+(\S+)\s+\S*/(\S+)\s!; my ($ip,$date,$dist) = ($1, $2, $3); next unless $dist =~ s/\.(tar\.gz|tgz)$//; $dist =~ s/(-|_)[0-9][0-9a-z._]+$//; $dist =~ s/.pm$//; push @log, { ip => $ip, date => $date, dist => $dist }; } # Count IP and find a cutoff for the 99%ile of downloaders my %ip; for my $line (@log) { $ip{$line->{ip}}++; } my $cut = int( 0.01 * keys %ip ); my $cutoff = [ sort { $ip{$b} <=> $ip{$a} } keys %ip ]->[$cut]; # Tally distributions for everyone else my %dist; for my $line (@log) { $dist{$line->{dist}}++ if $ip{$line->{ip}} < $cutoff; } my %top100; $top100{$_} = $dist{$_} for splice( @{[sort { $dist{$b} <=> $dist{$a} } keys %dist ]}, 0, 100 ); print as_table(\%top100); #### 17596 Net_SSLeay 13732 DBD-mysql 11138 DBI 8226 perl-ldap 7542 Mail-SpamAssassin 5528 GD 5440 libwww-perl 4557 HTML-Parser 3865 Digest-SHA1 3449 Digest 3397 CGI 3260 MIME-Base64 2868 XML-Parser 2786 Digest-MD5 2635 DBD-Pg 2630 MIME-tools 2625 File-Scan 2530 Compress-Zlib 2236 URI 2173 Net-DNS 2136 Time-HiRes 2130 Archive-Tar 2001 Test-Simple 1904 Tk 1767 DateManip 1743 Digest-HMAC 1650 HTML-Tagset 1629 MailTools 1617 libnet 1540 Gtk-Perl 1476 DB_File 1470 Archive-Zip 1418 DBD-Oracle 1400 Msql-Mysql-modules 1286 Apache-ASP 1286 HTML-Template 1138 Template-Toolkit 1134 IO-stringy 1124 Apache-MP3 1109 mod_perl 1087 MD5 1008 Storable 998 Module-Build 995 Crypt-CBC 972 Net-Telnet 952 CPAN 918 XML-Writer 916 Date-Calc 908 IMAP-Admin 900 TimeDate 836 Convert-ASN1 829 AppConfig 817 IO-String 800 GDGraph 787 Net-SNMP 783 MIME-Lite 783 XML-Generator 782 BerkeleyDB 773 Curses 763 AcePerl 760 PathTools 757 TermReadKey 747 Crypt-SSLeay 726 Convert-TNEF 714 Zanas 703 ExtUtils-MakeMaker 691 IO-Socket-SSL 662 HTML-Mason 655 Test-Harness 653 XML-Simple 624 bioperl 616 DBIx-SQLEngine 608 IO-Zlib 603 PodParser 601 GDTextUtil 599 PerlMagick 597 Parallel-Pvm 596 SOAP-Lite 571 Authen-SASL 557 AxKit-App-TABOO 557 Spreadsheet-WriteExcel 553 Bit-Vector 553 Data-Dumper 544 Parse-RecDescent 542 App-Info 533 perl 529 DBD-ODBC 528 Net-Server 525 Authen-PAM 520 Crypt-DES 519 Config-Maker 514 Bio-Das 512 File-Tail 505 Excel-Template 502 Boulder 502 XML-LibXML 500 Mail-ClamAV 498 IOC 496 Event 485 Apache-Session