#!/usr/bin/perl use strict; use warnings; use HTML::Parser; my $html = <
foo_a
foo_b
foo_c0 baz foo_c1
foo_d
foo_e
EOT my ( $divtext, $indiv ); my $p = HTML::Parser->new( api_version => 3, start_h => [\&div_check, "tagname,text"], text_h => [sub { $divtext .= $_[0] if $indiv }, "dtext"], end_h => [\&work_on_divtext, "tagname,text"] ); $p->parse( $html ); sub div_check { my ( $tag, $text ) = @_; if ( $tag eq 'div' ) { $divtext = ''; $indiv = 1; } elsif ( $indiv ) { $divtext .= $text; } } sub work_on_divtext { my ( $tag, $text ) = @_; if ( $tag eq 'div' ) { print "=$divtext=\n" if ( $divtext ); $divtext = ''; $indiv = 0; } elsif ( $indiv ) { $divtext .= $text; } }