use strict; use warnings; use HTML::Entities qw( encode_entities ); use HTML::Tagset qw( ); use HTML::TreeBuilder qw( ); use Object::Destroyer qw( ); sub extract_html { # It would be better if we had access to the unparsed text, # but this will do as long as the parser doesn't change. my $html = ''; local *helper = sub { my ($node) = @_; if (!ref($node)) { $html .= encode_entities($node); return; } my $tag = $node->tag(); $html .= $node->starttag(); helper($_) for $node->content_list(); $html .= $node->endtag() if !$HTML::Tagset::emptyElement{$tag} && !$HTML::Tagset::optionalEndTag{$tag}; }; my $node = @_ ? $_[0] : $_; helper($_) for $node->content_list(); return $html; } { my $tree = HTML::TreeBuilder->new(); $tree = Object::Destroyer->new($tree, 'delete'); $tree->parse_content(<<'__EOI__'); Foo

Foo

Not bar __EOI__ print extract_html( $tree->look_down( '_tag' , 'body' ) ); } ####

Foo

Not bar