{ package MyParser; use base 'HTML::Parser'; sub start { my($self, $tagname, $attr, $attrseq, $origtext) = @_; $self->{divs}->[-1] .= $origtext if $self->{dc}; if ( $tagname eq 'div' ) { push @{$self->{divs}}, ''; $self->{dc}++; } } sub end { my($self, $tagname, $origtext) = @_; $self->{dc}-- if $tagname eq 'div'; $self->{divs}->[-1] .= $origtext if $self->{dc}; } sub text { my($self, $origtext, $is_cdata) = @_; $self->{divs}->[-1] .= $origtext if $self->{dc}; } sub comment { my($self, $origtext) = @_; $self->{divs}->[-1] .= "" if $self->{dc}; } } my $p = MyParser->new; $p->parse($content); # WARNING this array deref will die if we have not put anything # in (ie not divs) as we will try to deref an undefined value if ( exists $p->{divs} ) { print"($_)\n" for @{$p->{divs}}; undef $p->{divs}; # prevent leaks, and accumulating in $p object } #### $content = '
foo
bar somestuff
';