my $html = ...
my $tree = HTML::TreeBuilder->new;
$tree->parse($html);
my $text = $tree->as_text();
####
# delim => delimiter to replace stripped tags
# escaped_delim => text to escape delimiter with if present in content
# skip_tags => tags for which content is not added to text,
# defaults to 'script' and 'style'
#
sub tree2text {
my ($tree, %options) = @_;
my $delim = defined $options{delim} ? $options{delim} : '';
my $esc_delim = $options{escape_delim};
my $skip = $options{skip_tags} || ['script', 'style'];
my @skip = map { lc } @$skip;
my @nodes = ($tree);
my ($tag, $text);
$text = '';
while (@nodes) {
my $node = shift @nodes;
if (!defined $node) {
# move along
}
elsif (!ref $node) {
$node =~ s/$delim/$esc_delim/g if defined $esc_delim;
$text .= $delim.$node;
}
else {
$tag = $node->tag;
next if grep { $tag eq $_ } @skip;
unshift @nodes, $node->content_list;
}
}
return $text;
}