#!/usr/bin/perl use warnings; use strict; my $start = 1; my $end = 10; for my $cnt ($start..$end) { print "Current page count: $cnt
\n";
my $uri = "http://www.allpoetry.com/chat/page=$cnt";
my $html = get($uri);
# retrieve the text and split into lines
my @lines = split /[\r\n]+/, $html;
# Now get into trouble for parsing HTML by hand
# This skips through until the first chat message hopefully.
while (@lines) {
if ($lines[0] =~ m/^\\1<\/a>
:<\/a>//x;
my $user = $1;
next unless $line =~ s/^(.*?)
\((\d+\s+
(?:days|hours?|minutes|seconds)
\s+ago)\)
\s+(?: )//x;
push @messages, {user => $user,
content => $1,
delay => $2};
}
foreach (@messages)
{
print sprintf("%15s:\%s (\%s)
|
\n",
$_->{user},
$_->{content},
$_->{delay});
}
}
exit(0);
sub get {
my $uri = shift;
$uri =~ /(\d+)$/;
my $number = $1;
open my $html, "<", $number or die "Couldn't open $number: $!";
local $/;
my $ret = <$html>;
close $html or die "Couldn't close $number: $!";
return $ret;
}