Beefy Boxes and Bandwidth Generously Provided by pair Networks
P is for Practical
 
PerlMonks  

Top Seti@home Teams

by Mr. Muskrat (Canon)
on Jan 06, 2003 at 22:07 UTC ( [id://224763]=sourcecode: print w/replies, xml ) Need Help??
Category: Web Stuff
Author/Contact Info /msg Mr. Muskrat
Description:

I am still working on my top Seti@home teams script.

This uses: (in no particular order)
grantm's XML::Simple,
samtregar's HTML::Template,
mojotoad's HTML::TableExtract,
gbarr's Net::FTP,
LWP::UserAgent,
HTML::Entities
and DBI.

Many thanks to the authors of these wonderful modules!

Output is available at Top 40 Seti@home Teams.

The URL that Berkeley uses for the XML team stats may change once they "officially" announce it. They still need to fix some things... See my Malformed XML thread at the Seti@home Bulletin Board for more infomation.update 4

Updated the ParseTeamXML subroutine so that it no longer uses a hash as a reference. Thanks tye and thunders for helping me resolve this issue.

Update 2 added color coding for rise/fall in rank.

Update 3 Updated code to work around duplicate teams in the top teams page.

Update 4 Thread is AWOL.

#!/usr/bin/perl -w
use strict;
use XML::Simple;
use LWP::UserAgent;
use HTML::Entities;
use HTML::TableExtract;
use HTML::Template;
use DBI;
use Net::FTP;

# --- Config section --- #

# !!!
# You will need to fill out the DB and FTP info sections.
# !!!

# once Berkeley fixes their malformed XML, set this to 0
my $malformed_xml = 1;

# Seti@home URL info
my %seti = (
  baseurl => 'http://setiathome.ssl.berkeley.edu/',
  teamlookup => 'fcgi-bin/fcgi?cmd=team_lookup_xml&name=',
  topteams => 'stats/team/team_type_0.html',
);

# DB info
my %db = (
  type => 'mysql',
  name => 'seti',
  host => '',
  user => '',
  pass => '',
);

# FTP info
my %ftp = (
  host => '',
  user => '',
  pass => '',
  dir  => '',
  file => 'teams.html',
);

# Number of teams to track
my $num_teams = 40;

# --- End of config section --- #

my $dbh = DBI->connect("DBI:$db{type}:$db{name}:$db{host}", $db{user},
+ $db{pass},
  { PrintError => 0}) || die $DBI::errstr;

# To enable debugging, set an environment variable named debug to a tr
+ue value
# or add a command line parameter that has a true value
my $debug = $ARGV[0] || $ENV{debug} || 0;

print "Getting Top Teams...\n" if $debug;

my @teams = GetTeams();

print "Parsing Teams...\n" if $debug;

my ($static, $dynamic) = ParseTeams(@teams);

$dbh->disconnect();

# Begin creation of HTML file that will be uploaded

print "Creating HTML file...\n" if $debug;

my $time = gmtime();

my (@stemp, @dtemp);
for my $num (0..$#teams) {
  my $teamname = $$dynamic{$num};
  my $bteam = $$static{$teamname};
  my $team = $$dynamic{$teamname};
  my %class = (
    rank    => '',
    members => '',
    results => '',
  );
  if ($$team{rank} > $$bteam{rank}) {
    $class{rank} = 'red';
  } elsif ($$team{rank} < $$bteam{rank}) {
    $class{rank} = 'green';
  }
  if ($$team{members} > $$bteam{members}) {
    $class{members} = 'green';
  } elsif ($$team{members} < $$bteam{members}) {
    $class{members} = 'red';
  }
  if ($$team{results} > $$bteam{results}) {
    $class{results} = 'green';
  } elsif ($$team{results} < $$bteam{results}) {
    $class{results} = 'red';
  }
  push @dtemp, { rank => $$team{rank}, name => $teamname, members => $
+$team{members}, results => $$team{results}, c => $class{rank}, cm => 
+$class{members}, cr => $class{results} };  
  $teamname = $$static{$num};
  $bteam = $$static{$teamname};
  push @stemp, { rank => $$bteam{rank}, name => $teamname, members => 
+$$bteam{members}, results => $$bteam{results} };
}

# open the html template for the Rank Standings Page
my $top = HTML::Template->new(filename => 'top.tmpl');
# fill in the parameters
$top->param(NUM_TEAMS => $num_teams);
$top->param(TIME => $time);
$top->param(STEMP => [@stemp]);
$top->param(DTEMP => [@dtemp]);

print "Writing the HTML file...\n" if $debug;

# create the Top Teams Page
open(HTML, '>', $ftp{file});
print HTML $top->output;
close(HTML);

print "Uploading the HTML file...\n" if $debug;

UploadTeams($ftp{host}, $ftp{user}, $ftp{pass}, $ftp{dir}, $ftp{file})
+;

exit;

# Teams processing subroutines

sub GetTeams {
  my $url = $seti{baseurl}.$seti{topteams};
  print "Grabbing top teams: $url\n" if $debug;
  my $html = GetURL($url);
  my $te  = new HTML::TableExtract( headers => [qw(Name Members Result
+s Total Average)]);
  $te->parse($html);
  my @teams;
  my $lastteam = '';
  my $rank = 0;
  foreach my $ts ($te->table_states) {
    foreach my $row ($ts->rows) {
      my ($team) = $$row[0] =~ /\d+\)\s+(.*)\s*$/;
      next if ($team eq $lastteam);
      $rank++;
      print "$rank $team\n" if ($debug && $rank <= $num_teams);
      push(@teams, $team) if ($rank <= $num_teams);
      $lastteam = $team;
    }
  }
  return @teams;
}

sub ParseTeams {
  my @teams = @_;
 
  # @static will hold the static data
  # @dynamic will hold the dynamic data
  # using arrays until sorting complete
  my (@static, @dynamic);
  for my $num (0..$#teams) {
    sleep(15);
    my $name = $teams[$num]; # team name
    my $url = $seti{baseurl}.$seti{teamlookup}.$name;
    print "Team URL: '$url'\n" if $debug;
    my $file = GetURL($url);
    $file = FixXML($file) if $malformed_xml;
    WriteXML("$name.xml", $file) if $debug;
    my ($bmembers, $bresults, $members, $results) = ParseTeamXML($file
+); 
    push(@static, [$name, $bmembers, $bresults]);
    push(@dynamic, [$name, $members, $results]);
  }

  # sort on the results in @dynamic
  @dynamic = sort { $b->[2] <=> $a->[2] } @dynamic;

  # sorting complete, move the data into hashes for easier lookup
  my (%static, %dynamic);
  for my $num (0..$#teams) {
    my ($bname, $bmembers, $bresults) = @{$static[$num]};
    my ($name, $members, $results) = @{$dynamic[$num]};
    my $rank = $num + 1;
    $static{$bname} = {rank => $rank, members => $bmembers, results =>
+ $bresults};
    $dynamic{$name} = {rank => $rank, members => $members, results => 
+$results};
    $static{$num} = $bname;
    $dynamic{$num} = $name;
    InsertTeamValues($dbh, "bteams", $rank, $bname, $bmembers, $bresul
+ts);
    InsertTeamValues($dbh, "teams", $rank, $name, $members, $results);
  }

  return (\%static, \%dynamic);
}

# LWP subroutines

sub GetURL {
  my $url = shift;
  my $ua = new LWP::UserAgent(
    keep_alive => 1,
    timeout => 240,
  );
  $ua->agent('KWSNStats/0.4');
  my $file;
  my $pagelen;
  until ($file and $pagelen > 0) {
    sleep(15);
    print "Grabbing $url\n" if $debug;
    my $req = HTTP::Request->new(GET => $url);
    my $res = $ua->request($req);
    print $res->code, " ", $res->message,"\n" if $debug;
     if ($res->is_success) {
      $file = $res->content;
      $pagelen = length($file);
    } else {
      warn "Unable to get $url";
      $pagelen = -1
    }
    print "page length: $pagelen\n" if $debug;
  }
  return $file;
}

# XML subroutines

sub FixXML {
  # fix Berkeley's malformed XML and remove some unnecessary info
  my $xml = shift;
  $xml =~ s!<profile>.*?</profile>!!sg;
  $xml =~ s!<url>.*?</url>!!sg;
  $xml =~ s!<totalcpu>.*?</totalcpu>!!sg;
  $xml =~ s!<avecpu>.*?</avecpu>!!sg;
  $xml =~ s!<datelastresult>.*?</datelastresult>!!sg;
  $xml =~ s!<country>.*?</country>!!sg;
  $xml =~ s/\n+/\n/g;
  $xml =~ s/[|]/ /g; # turn chr(127) and chr(27) into spaces
  my @xml = split('\n', $xml);
  $xml = '';
  for my $line (@xml) {
    my ($name) = $line =~ m!<name>(.*)</name>!;
    if (defined $name) {
      print $name,$/ if $debug;
      encode_entities($name, '<&>³');
      $line = "<name>$name<\/name>";
    }
    $xml .= $line."\n";
  }
  $xml =~ s!&sup3;!3!g;
  return $xml;
}

sub ParseTeamXML {
  my $xml = shift;
  my $xs = new XML::Simple( keyattr => 'topmembers');
  my $ref = $xs->XMLin($xml);
  my $bresults = $$ref{numresults};
  my $bmembers = $$ref{nummembers};
  my $results = 0;
  my @members = @{ $ref->{topmembers}{member} };
  my $members = scalar @members;
  for my $member (@members) {
    $results += $$member{numresults};
  }
  if ($debug) {
    print "Berkeley Members: $bmembers\n";
    print "Berkeley Results: $bresults\n";
    print "XML Members: $members\n";
    print "XML Results: $results\n";
  }
  return ($bmembers, $bresults, $members, $results);
}

sub WriteXML {
  my $file = shift;
  my $xml = shift;
  print "Writing XML out to $file.\n";
  open(XML, '>', $file);
  print XML $xml;
  close(XML);
}

# DB subroutines

sub InsertTeamValues {
  my ($dbh, $table, $rank, $team, $members, $results) = @_;
  my $sth = $dbh->prepare("INSERT INTO $table (rank, team, members, re
+sults, datetime) VALUES (?,?,?,?,NOW())") or die $dbh->errstr;
   $sth->execute($rank, $team, $members, $results) or die $sth->errstr
+;
}

# FTP subroutines

sub UploadTeams {
  my ($host, $user, $pass, $dir, $file) = @_;
  my $ftp = Net::FTP->new($host, user => $user, pass => $pass, Debug =
+> $debug);
  $ftp->login($user, $pass);
  $ftp->cwd($dir);                       
  $ftp->put($file);
  $ftp->quit;
}


Template (top.tmpl)

<html>
<head>
<title>Sir Muskrat's Top <TMPL_VAR NAME=NUM_TEAMS> Seti@home Teams</ti
+tle>
<style type="text/css">
      body {       color: #FFFFFF;
        background-color: #000000; }
      td {         color: #FFFFFF; }
      td.green {   color: #00FF00; }
      td.red {     color: #FF0000; }
      a:link {     color: #CC99FF; }
      a:visited {  color: #CC99FF; }
</style>
</head>
<body bgcolor="#000000" text="#FFFFFF" link="#CC99FF" vlink="#CC99FF" 
+alink="#CC99FF">
<h1 align="center">Sir Muskrat's Top <TMPL_VAR NAME=NUM_TEAMS> Seti@ho
+me Teams</h1>
<p>Last updated: <TMPL_VAR NAME=TIME></p>
<p><table>
<tr><td>
<table border="1" align="center" valign="middle" cellspacing="2" cellp
+adding="2">
<caption>Static Top <TMPL_VAR NAME=NUM_TEAMS> Teams</caption>
<tr><th>Rank</th><th>Team Name</th><th>Members</th><th>Results</th></t
+r>
<TMPL_LOOP NAME=STEMP>
  <tr>
    <td><TMPL_VAR NAME=RANK></td>
    <td><TMPL_VAR NAME=NAME></td>
    <td><TMPL_VAR NAME=MEMBERS></td>
    <td><TMPL_VAR NAME=RESULTS></td>
  </tr>
</TMPL_LOOP>
</table></td>
<td><table border="1" align="center" valign="middle" cellspacing="2" c
+ellpadding="2">
<caption>Dynamic Top <TMPL_VAR NAME=NUM_TEAMS> Teams</caption>
<tr><th>Rank</th><th>Team Name</th><th>Members</th><th>Results</th></t
+r>
<TMPL_LOOP NAME=DTEMP>
  <tr>
    <td class='<TMPL_VAR NAME=C>'><TMPL_VAR NAME=RANK></td>
    <td><TMPL_VAR NAME=NAME></td>
    <td class='<TMPL_VAR NAME=CM>'><TMPL_VAR NAME=MEMBERS></td>
    <td class='<TMPL_VAR NAME=CR>'><TMPL_VAR NAME=RESULTS></td>
  </tr>
</TMPL_LOOP>
</table>
</td></tr>
</table></p>
<p>Static is calculated by Berkeley.  Dynamic is calculated by adding 
+up the results for all members.<br />
<font color="#ff0000">Red</font> means the dynamic number is lower tha
+n the static while <font color="#00ff00">green</font> indicates a hig
+her number.</p>
</body>
</html>
Replies are listed 'Best First'.
Re: Top Seti@home Teams
by vek (Prior) on Jan 07, 2003 at 04:05 UTC
    Nice job Mr Muskrat.

    Good to know that they'll be offering XML for teams as well as users. I've been slowly adding the group/team functionality to the next release of SETI::WebStats and will probably drop the HTML scraping code now in favour of the XML.

    -- vek --

      Thanks.

      The XML user stats are available now.
      The URL is: http://setiathome.ssl.berkeley.edu/fcgi-bin/fcgi?cmd=user_xml&email=XXXXX
      And you can also check out the DTD

      I'm looking forward to seeing the next version of Seti::WebStats.

Re: Top Seti@home Teams
by Mr. Muskrat (Canon) on Jun 16, 2003 at 19:32 UTC
    Berkeley is having problems where some teams appear on the list more than once. (See Teams 1 - 200 and you'll see several) I will work on patching this script to deal with Berkeley's mistakes just as soon as time permits.
Re: Top Seti@home Teams
by Mr. Muskrat (Canon) on Jul 01, 2003 at 18:21 UTC

    I have updated the code once again. This update implements a work around for the duplicate teams on the top teams page.

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://224763]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others sharing their wisdom with the Monastery: (6)
As of 2024-04-19 08:21 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found