use.perl journal archiver

I know, I know there's a module doing this on CPAN. But sometime using Perl for a Cool Purpose is just sheer fun. So, below is a script that downloads my entire use.perl journal:

use warnings;
use strict;
use LWP::Simple;

$|++;

# When updates are done, entries can be restricted by their number
# in order to disregard ones already archived.
# For example, if the last entry number was 12345, then only entries
# with number > 12345 should be downloaded

my $main_url = "http://use.perl.org/journal.pl?op=list&uid=4197";
my $journal_path = "use.perl.org/~spur/journal";

my $content = get($main_url) or die "Could not fetch $main_url:  $!";
print "Fetched  $main_url\n";
print "Processing...\n\n";

my @lines = split("\n+", $content);

for (my $i = 0; $i <= $#lines; ++$i)
{
    if ($lines[$i] =~ /$journal_path\/(\d+)/)
    {
        my $entry_url = "http://" . $journal_path . "/$1";
        
        # look for the post title
        $lines[$i] =~ /<B>(.+)<\/B>/ 
            or die "Error line $i: title not found\n";
        
        my $title = $1;
        
        # we don't like spaces in file names
        $title =~ tr/ /_/;
        
        # neither these characters
        $title =~ s/[":?<>\/\\]//g;
        
        # look for the post date (on the next line)
        $lines[$i+1] =~ /<EM>([\d.]+)/ 
            or die "Error in line $i+1: date not found\n";
            
        my $date = $1;
        my $target_name = $date . "__" . $title . ".html";
        
        my $entry_content = get($entry_url) 
            or die "Could not fetch $entry_url:  $!";
        print "Fetched  $entry_url\n";
        
        open(TARGET_FILE, ">$target_name");
        print TARGET_FILE $entry_content;
        print "Wrote to  $target_name\n";
    }
}

print "Done.\n";
[download]

Comment on use.perl journal archiver Download Code