use strict;
use HTML::Parser;
my $html = < --- TITLE --- WORDS WORDS WORDS
WORDS WORDS WORDS
EOH
my ( $partext, $objtext );
my ( @titlewords, @objects );
my $inpar = my $inobj = 0;
my $parser = new HTML::Parser( api_version => 3,
start_h => [ \&handle_starttag, "tagname,text" ],
text_h => [ \&handle_text, "dtext" ],
end_h => [ \&handle_endtag, "tagname,text" ] );
$parser->parse( $html );
for my $t ( @titlewords ) {
print "=== Found title and words: ===\n$t\n======\n";
}
for my $o ( @objects ) {
print "=== Found object: ===\n$o\n======\n";
}
sub handle_starttag
{
my ( $tag, $text ) = @_;
if ( $tag eq 'p' ) {
$inpar = 1;
$partext = '';
}
elsif ( $tag eq 'object' ) {
$inobj = 1;
$objtext = '';
}
elsif ( $tag eq 'br' and $inpar ) {
push @titlewords, $partext if ( $partext =~ /-+ TITLE -+/ );
$inpar = 0;
}
elsif ( $inobj ) {
$objtext .= $text;
}
}
sub handle_text
{
my ( $text ) = @_;
if ( $inpar ) {
$partext .= $text;
}
elsif ( $inobj ) {
$objtext .= $text;
}
}
sub handle_endtag
{
my ( $tag, $text ) = @_;
if ( $tag eq 'object' ) {
push @objects, $objtext;
$inobj = 0;
}
elsif ( $inobj ) {
$objtext .= $text;
}
}