use HTML::Entities;
use HTML::TokeParser::Simple;
my $html = ; #this is shorthand for example..assume the File has been opened in slurp mode
my $parsed = parseHTML($html);
sub parseHTML {
my $html = shift;
my $parsed;
my $p = HTML::TokeParser::Simple->new(\$html);
while ( my $token = $p->get_token ) {
# This prints all text in an HTML doc (i.e., it strips the HTML)
if ($token->is_text) {
my $text = $token->as_is;
encode_entities($text, '",' );
$parsed .= $text;
} else {
$parsed .= $token->as_is;
}
}
return $parsed;
}