#!/usr/bin/perl use warnings; use strict; my $start = 1; my $end = 10; for my $cnt ($start..$end) { print "Current page count: $cnt

\n"; my $uri = "http://www.allpoetry.com/chat/page=$cnt"; my $html = get($uri); # retrieve the text and split into lines my @lines = split /[\r\n]+/, $html; # Now get into trouble for parsing HTML by hand # This skips through until the first chat message hopefully. while (@lines) { if ($lines[0] =~ m/^\\1<\/a> :<\/a>//x; my $user = $1; next unless $line =~ s/^(.*?) \((\d+\s+ (?:days|hours?|minutes|seconds) \s+ago)\) \s+(?:
|

)//x; push @messages, {user => $user, content => $1, delay => $2}; } foreach (@messages) { print sprintf("%15s:\%s (\%s)
\n", $_->{user}, $_->{content}, $_->{delay}); } } exit(0); sub get { my $uri = shift; $uri =~ /(\d+)$/; my $number = $1; open my $html, "<", $number or die "Couldn't open $number: $!"; local $/; my $ret = <$html>; close $html or die "Couldn't close $number: $!"; return $ret; }