#!/usr/bin/perl
use HTML::TokeParser::Simple;
use HTML::Entities;
use strict;
use warnings;
use vars qw($filename $parser $in_body);
die "usage: $0 " unless $filename = shift @ARGV;
$parser = HTML::TokeParser::Simple->new( $filename );
$in_body = 0;
while ( my $token = $parser->get_token ) {
if ($in_body) {
# we are inside BODY
if ($token->is_text) {
# it's text, convert it
print HTML::Entities::encode_entities($token->as_is);
} else {
if ($token->is_end_tag( 'body' )) {
# we've found the end of the BODY
$in_body = 0;
}
print $token->as_is;
}
} else {
if ($token->is_start_tag( 'body' )) {
# we've found the beginning of the BODY
$in_body = 1;
}
print $token->as_is;
}
}