#!/usr/bin/perl use HTML::TokeParser::Simple; use HTML::Entities; use strict; use warnings; use vars qw($filename $parser $in_body); die "usage: $0 " unless $filename = shift @ARGV; $parser = HTML::TokeParser::Simple->new( $filename ); $in_body = 0; while ( my $token = $parser->get_token ) { if ($in_body) { # we are inside BODY if ($token->is_text) { # it's text, convert it print HTML::Entities::encode_entities($token->as_is); } else { if ($token->is_end_tag( 'body' )) { # we've found the end of the BODY $in_body = 0; } print $token->as_is; } } else { if ($token->is_start_tag( 'body' )) { # we've found the beginning of the BODY $in_body = 1; } print $token->as_is; } }