sourcecode
jcwren
<code>
#!/usr/local/bin/perl -w
#
# Invoke with './statswhore.pl [-u username] [-p password]'
#
# Alternatively, username and/or password can be embedded into the script, if you don't want
# command line arguments.
#
# Displays a users total writeups, total reputation, along with min, max, and average. Only
# works for your own account, since reps are 'proprietary'
#
# Requires:
# HTML::TableExtract
# LWP::Simple
#
# Copyright 2000(c) J.C.Wren jcwren@jcwren.com
# No rights reserved, use as you see fit. I'd like to know about it, though, just for kicks.
#
# This module has more code than is actually necessary for the functionality provided.
# I was originally writing a module for another function, and this came out of it. The
# @articles array that is returned from get_article_list() contains an array reference
# that has the articles name, prefixed by the string 'node_id=xxx:', where 'xxx' is the
# number the article title refers to, the reputation of the article, and the date the
# article was written. I'm sure you can imagine some uses for this...
#
use strict;
use LWP::Simple;
use Getopt::Std;
my $def_username = ""; # Set this to your user name if you don't want to use the -u option
my $def_password = ""; # Set this to your pass word if you don't want to use the -p option
my $pmsite = "http://www.perlmonks.org/index.pl";
{
my %args = ();
getopts ('u:p:', \%args);
my $username = $args{u} || $def_username;
my $password = $args{p} || $def_password;
die "No password and/or username. Program terminated.\n" if (!$username || !$password);
show_reps ($username, $password);
}
sub show_reps
{
@_ == 2 or die "Incorrect number of parameters";
my ($username, $password) = @_;
my $total = 0;
my $rarticles = get_article_list ($username, $password);
die "You have no articles, perhaps?\n" unless ($#$rarticles >= 0);
$total += @$_[1] for (@$rarticles);
@$rarticles = sort {@$a[1] <=> @$b[1]} @$rarticles;
print "\n";
print sprintf (" User: %s\n", $username);
print sprintf (" Total articles: %d\n", $#$rarticles + 1);
print sprintf (" Total reputation: %d\n", $total);
print sprintf (" Min reputation: %d\n", @$rarticles [0]->[1]);
print sprintf (" Max reputation: %d\n", @$rarticles [-1]->[1]);
print sprintf ("Average reputation: %3.2f\n", $total / ($#$rarticles + 1));
print "\n";
}
sub get_article_list
{
@_ == 2 or die "Incorrect number of parameters";
my ($username, $password) = @_;
my @articles = ();
$LWP::Simple::FULL_LWP = 1;
for (my $i = 0; 1; $i += 50)
{
my $url = "$pmsite?user=$username&passwd=$password&op=login&node=perl+monks+user+search&usersearch=$username&orderby=createtime%20desc&count=$i";
if (my $page = get ($url))
{
last if (get_article_page ($page, ['Writeup', 'Rep', 'Create Time'], \@articles, $i % 50) < 50);
}
else
{
die "Get on $url failed.";
}
}
return (\@articles);
}
sub get_article_page
{
@_ == 4 or die "Incorrect number of parameters";
my ($html, $tablecols, $rarticles, $lines) = @_;
my $rowcnt = 0;
$html =~ s/bgcolor=>/bgcolor="">/mg;
my $te = new jcwExtract (headers => $tablecols)->parse ($html);
die sprintf ("Wrong number of tables (%d) returned! (Probably bad username/password)\n", scalar $te->table_states) if (scalar $te->table_states != 1) ;
foreach my $ts ($te->table_states)
{
foreach ($ts->rows)
{
last if (@$_[2] !~ /\d+-\d+\d+/);
push @$rarticles, [ @$_ ];
$rowcnt++;
}
}
return ($rowcnt);
}
BEGIN
{
#
# This is not good code. It's really evil and the both the author of this package, and the author
# of HTML::TableExtract should be severely beaten about the head and shoulders.
#
package jcwExtract;
use strict;
use HTML::TableExtract;
@jcwExtract::ISA = qw(HTML::TableExtract HTML::TableExtract::TableState);
my $node_id = undef;
{
local $^W = 0;
#
# Override the _add_text mode that if $node_id is defined, we'll insert the node_id
# value at the front of the string.
#
eval 'sub HTML::TableExtract::TableState::_add_text
{
my ($self, $txt, $skew_column) = @_;
defined $txt or return;
my $row = $self->{content}[$#{$self->{content}}];
$txt = sprintf ("node_id=%d:%s", $node_id, $txt) if defined ($node_id);
$node_id = undef;
$row->[$skew_column] .= $txt;
$txt;
}';
}
#
# Overridden start method, so we can look for <A HREF=...> tags
#
sub start
{
my $self = shift;
my ($tag, $attr, $attrseq, $origtext) = @_;
$self->SUPER::start (@_);
#
# If it's a <A HREF=...> tag, and has a node_id, then $1 will contain the node_id.
# If we're in a table cell, set $node_id to $1, otherwise undef it. We don't simply
# set $node_id to undef if it's not a <A> tag, because we want to save the last value
# if there is a subsequent <B> or <i> or somesuch tag between the <A> and </A> tags.
#
if ($tag eq 'a' && defined ($attr->{'href'}) && $attr->{'href'} =~ /\bnode_id=(\d+)/i)
{
$node_id = ($self->_current_table_state->{in_cell}) ? $1 : undef;
}
}
1;
}
</code>
Extracts a users total writeups, and total repuration, along with min, max, and average. Account and password can be embedded into the program, or supplied on the command line
PerlMonks Related Scripts
Chris 'jcwren' Wren<br>
jcwren@jcwren.com