#! /usr/bin/perl use strict; use warnings; use LWP::Simple qw(get); use File::Slurp ; # pid = Persona ID, one of a players 3 identities. # sid = Sortie ID, identifier for a mission taken by the persona. # We want to crawl all sortie list pages and collect new sid's we # have't seen before and then move on to next persona. my $pbase = q{http://csr.wwiionline.com/scripts/services/persona/sorties.jsp}; my $pcnt = 1; my $pidfile = q{c:/scr/pidlist.txt}; # Open list of pid's open my $pidlist, q{<}, $pidfile or die qq{Could not open $pidfile: $!}; # loop over list of pids one at a time while (my $pid = <$pidlist>){ print $pid; chomp $pid; # what does a line from the pidlist look like? # do need to do any work on it? while (1) { my $url = qq{$pbase?page=$pcnt&pid=$pid}; my $content = get($url); die qq{get failed for $url: $!} unless $content; # parse $content # extract data we need # test if there is more work to do # Update page number and grab next. $pcnt += 1; }; } # Close files close $pidlist or die $!; print qq{\nDone!\n};