#!/bin/perl5 use strict; use warnings; use HTML::TokeParser::Simple; my $html; { local $/; $html = ; } my $p = HTML::TokeParser::Simple->new(\$html); my ($start, @data); while (my $t = $p->get_token) { $start++, next if $t->is_comment and $t->as_is eq ''; last if $t->is_comment and $t->as_is eq ''; next unless $start; if ($t->is_start_tag('b')){ my $comment = $p->get_trimmed_text('/b'); my $sig = $p->get_trimmed_text('hr'); $sig =~ s/\s+-.*//; # crudely strip the timestamp push @data, join '|', $sig, $comment; } } print "$_\n" for @data; __DATA__ Last Post
Comment 1
Doug <hun@tele.com>
USA - Thu 01/05/2006 - 22:05:51
Comment 2
J H
Clearwater, FL USA - Wed 01/04/2006 - 02:05:12