my $p = HTML::Parser->new( api_version => 3, start_h => [\&start, "tagname, attr"], end_h => [\&end, "tagname"], text_h => [\&text, "dtext"], marked_sections => 1, ); # Parse directly from file $p->parse_file($inputFile); #### sub text { my($origtext, $is_cdata) = @_; if ( $origtext =~ /^\s*$/ ) { return; } $origtext = "UNDEF" if !defined $origtext; $is_cdata = "UNDEF" if !defined $is_cdata; $origtext =~ s/ \& / \& /g; $origtext =~ s/½/\½/g; print $origtext; }