use HTML::Entities; use HTML::TokeParser::Simple; my $html = ; #this is shorthand for example..assume the File has been opened in slurp mode my $parsed = parseHTML($html); sub parseHTML { my $html = shift; my $parsed; my $p = HTML::TokeParser::Simple->new(\$html); while ( my $token = $p->get_token ) { # This prints all text in an HTML doc (i.e., it strips the HTML) if ($token->is_text) { my $text = $token->as_is; encode_entities($text, '",' ); $parsed .= $text; } else { $parsed .= $token->as_is; } } return $parsed; }