#!/usr/bin/perl # getleaders [list-name] -- get the ten first people on ASPN archives, with better acuracy # getleaders (defaults to perl-xml) # getleaders xml-dev (gets xml-dev) use strict; use vars qw($IN_PERSON %people); use LWP::Simple qw(); use HTML::Parser qw(); use constant BASE_URL => 'http://aspn.activestate.com/ASPN/Mail/Leaders/'; my $list = shift || 'perl-xml'; my $url = BASE_URL . $list . '/'; my $html = LWP::Simple::get($url) or die "Could not get $url"; my $p = HTML::Parser->new( api_version => 3, start_h => [\&start_handler, 'tagname, attr'], text_h => [\&text_handler, 'dtext'], ); $p->unbroken_text(1); $p->parse($html); $p->eof; sub start_handler { my $tag = shift; my $attr = shift; if ($tag eq 'a' and $attr->{title} =~ m/Click to see postings by this author/) { $IN_PERSON = 'person'; } } sub text_handler { my $txt = shift; return unless $IN_PERSON; $txt =~ s/^\s+//; $txt =~ s/\s+$//; if ($IN_PERSON eq 'person') { normalize(\$txt); $IN_PERSON = $txt; } elsif ($txt =~ m/\d+ posts/) { $people{$IN_PERSON} += $txt; # this numifies $IN_PERSON = undef; } } # this is very ad hoc sub normalize { my $txt = shift; $$txt = 'Ilya Sterin' if $$txt eq 'Sterin, Ilya'; $$txt = 'Barrie Slaymaker' if $$txt eq 'barries'; } # sort and print the result my @results = map { [ $_, $people{$_} ] } sort { $people{$b} <=> $people{$a} } keys %people; my $longest = 0; for my $r (@results) { my $len = length $r->[0]; $longest = $len if $len > $longest; } my $nlen = length $results[0]->[1]; for my $i (0..9) { my $pad = ($i == 9) ? '' : '0'; print $pad . ($i + 1) . '. '; my $ppad = $longest - length $results[$i]->[0]; my $npad = $nlen - length $results[$i]->[1]; print $results[$i]->[0] . ' ' x $ppad . ' ' . ' ' x $npad . $results[$i]->[1] . "\n"; }