!#/usr/bin/perl -w
use strict;
use HTML::Parser ();
my $html = q(
The Title
Heading 1
First paragraph.
Second fully emphasized paragraph.
Third partially paragraph.
Para with BOLD middle bit.
Para with CODE - changed to TT for PM middle bit.
- dot point 1
- dot point 2
- dot point 3
);
my $ra_want_tags = [ qw(title h1 p em li strong) ];
my %parser_init = (
api_version => 3,
text_h => [\&print_element, 'self'],
report_tags => $ra_want_tags,
);
my $p = HTML::Parser->new(%parser_init);
do { $p->parse($html) } until ($p->eof);
sub print_element
{
my $p = shift;
$p->handler(start =>
sub {print map {s/^<(.+?)>$/$1: /; $_} @_}, 'text');
$p->handler(text =>
sub {print map {s/^\s*(.*?)\s*$/$1\n/; $_} grep /\S/, @_}, 'dtext');
}