--- TITLE --- WORDS WORDS WORDS WORDS WORDS WORDS

#### use strict; use HTML::Parser; my $html = < --- TITLE --- WORDS WORDS WORDS WORDS WORDS WORDS

EOH my ( $partext, $objtext ); my ( @titlewords, @objects ); my $inpar = my $inobj = 0; my $parser = new HTML::Parser( api_version => 3, start_h => [ \&handle_starttag, "tagname,text" ], text_h => [ \&handle_text, "dtext" ], end_h => [ \&handle_endtag, "tagname,text" ] ); $parser->parse( $html ); for my $t ( @titlewords ) { print "=== Found title and words: ===\n$t\n======\n"; } for my $o ( @objects ) { print "=== Found object: ===\n$o\n======\n"; } sub handle_starttag { my ( $tag, $text ) = @_; if ( $tag eq 'p' ) { $inpar = 1; $partext = ''; } elsif ( $tag eq 'object' ) { $inobj = 1; $objtext = ''; } elsif ( $tag eq 'br' and $inpar ) { push @titlewords, $partext if ( $partext =~ /-+ TITLE -+/ ); $inpar = 0; } elsif ( $inobj ) { $objtext .= $text; } } sub handle_text { my ( $text ) = @_; if ( $inpar ) { $partext .= $text; } elsif ( $inobj ) { $objtext .= $text; } } sub handle_endtag { my ( $tag, $text ) = @_; if ( $tag eq 'object' ) { push @objects, $objtext; $inobj = 0; } elsif ( $inobj ) { $objtext .= $text; } }