Description: |
A little while back in the CB, someone (forgive me, I have forgotten who), mentioned it would be interesting to see the age distribution of those monks who have reported their ages for the PerlMonks Stats Pages. So I decided to take a stab at generating such stats.
The script currently pulls up the upcoming birthdays page and scrapes the age information out, then outputs the requested file format (HTML or PNG). A future version might pull XML data from the server instead when it is available.
Examples: HTML Output, Summary Graph, Detail Graph.
Update: T'was dada that proposed the age stats in the CB. Thanks for the reminder ;)
Changed PDF to PNG in the comments. |
#!/usr/bin/perl -wT
# $Id: monkages.pl,v 1.3 2002/08/05 05:31:48 corwin Exp $
# 04 Aug 02 - rattusillegitimus of perlmonks.org
#
# Freely redistributable under the same terms as perl itself.
#
# monkages.pl -o HTML > ages.html
#
# monkages.pl -o PNG > summary.png
#
# monkages.pl -o PNG -d > detail.png
#
use strict;
use Getopt::Std; # Get output type
use LWP::Simple; # Gather the Data
use HTML::TableExtract; # Extract the Data
use HTML::Template; # Build output HTML
use GD::Graph::bars (); # Build output graph
my $url =
'http://www.tinymicros.com/pm/index.php?goto=UpcomingBirthdays';
my ( $content, $monks, %agesum, %agedet, $row, %opts );
getopts( 'do:', \%opts ); # Get my command-line options
# Get the page
unless ( defined( $content = get $url) ) {
die "Could not get $url\n";
}
# Let's do some parsing
my $te =
new HTML::TableExtract( headers =>
[ 'Node ID', 'Name', 'Birthday', 'Days Until', 'Current Age' ] )
+;
$te->parse($content);
foreach $row ( $te->rows ) {
$monks++;
@$row[4] eq '--' ? $agedet{-1}++ : $agedet{ @$row[4] }++;
@$row[4] eq '--' ? $agesum{-1}++ : $agesum{ int( @$row[4] / 10 ) }
+++;
}
if ( $opts{'o'} && ( $opts{'o'} eq 'PNG' ) ) {
my $gd;
if ( defined $opts{'d'} ) {
$gd = do_graph( 'D', %agedet );
}
else {
$gd = do_graph( 'S', %agesum );
}
print $gd->png;
}
else {
print do_html( \%agesum, \%agedet );
}
sub do_html {
# Apply the data to the template
my ( $agesum, $agedet ) = @_;
# Build the details array for the template
my ( @details, @summary );
for ( sort bynum keys %agedet ) {
push (
@details,
{
AGE => ( ( $_ == -1 ) ? 'N/A' : $_ ),
MONKS => $agedet{$_}
}
);
}
for ( sort bynum keys %agesum ) {
push (
@summary,
{
AGE => (
( $_ == -1 )
? 'N/A'
: sprintf( "%d - %d", $_ * 10, ( $_ * 10 ) + 9 )
),
MONKS => $agesum{$_}
}
);
}
my $template = HTML::Template->new( filehandle => *DATA );
$template->param( DETAILS => [@details] );
$template->param( SUMMARY => [@summary] );
$template->param( MONKCOUNT => $monks );
return $template->output;
}
sub bynum { $a <=> $b; } # numeric sorting for great justice
sub age_range {
my $key = shift;
return (
( $key == -1 )
? 'N/A'
: sprintf( "%d - %d", $key * 10, ( $key * 10 ) + 9 )
);
}
sub do_graph {
# Build me a graph using GD::Graph and return the GD image
my $type = shift;
my %agedata = @_;
my @data;
for ( sort bynum keys %agedata ) {
push @{ $data[0] }, ( $type eq 'S' )
? age_range($_)
: ( ( $_ < 0 ) ? 'N/A' : $_ );
push @{ $data[1] }, $agedata{$_};
}
my $graph = new GD::Graph::bars( 600, 400 ) || die "$!";
$graph->set(
title => 'PerlMonks Age Distribution',
x_label => ( ( $type eq 'S' ) ? 'Age Range' : 'Age' )
+,
x_label_position => 1 / 2,
y_label => 'Number of Monks',
shadow_depth => 3,
show_values => 1,
bar_spacing => 5,
);
return $graph->plot( \@data );
}
__DATA__
<html>
<head>
<title>Perlmonks Age Breakdown</title>
<style type="text/css">
<!--
html { font-family: Verdana; }
body { background-color: #f0f8ff; color: #000000; }
table { border-collapse: collapse;
border-style: ridge;
background-color: #ccccff; }
th,td { border-style: ridge; }
th { background-color: #cccccc; }
td { text-align: center; }
caption { white-space: nowrap; }
-->
</style>
</head>
<body>
<h1>Perlmonks Age Breakdown</h1>
<h2><TMPL_VAR NAME=MONKCOUNT> monks counted</h2>
<hr/>
<table>
<caption>
<strong>Age Summary</strong>
</caption>
<thead>
<tr>
<th scope="col">Age</th>
<th scope="col"># Monks</th>
</tr>
</thead>
<tbody>
<TMPL_LOOP NAME=SUMMARY>
<tr>
<td>
<TMPL_VAR NAME=AGE>
</td>
<td>
<TMPL_VAR NAME=MONKS>
</td>
</tr>
</TMPL_LOOP>
</tbody>
</table>
<hr/>
<table>
<caption>
<strong>Age Details</strong>
</caption>
<thead>
<tr>
<th scope="col">Age</th>
<th scope="col"># Monks</th>
</tr>
</thead>
<tbody>
<TMPL_LOOP NAME=DETAILS>
<tr>
<td>
<TMPL_VAR NAME=AGE>
</td>
<td>
<TMPL_VAR NAME=MONKS>
</td>
</tr>
</TMPL_LOOP>
</tbody>
</table>
<hr/>
</body>
</html>
|