Some UTF-8 characters hereí
Some more UTF-8 characters hereč
±There could be UTF-8 characters anywhere
]]>
#### #!/usr/bin/perl use strict; use warnings; use utf8; use XML::Twig; use HTML::Entities; use HTML::Parser; my $xml = $ARGV[0] or die "Usage: $0 file.xml\n"; #print $xml; my $twig = XML::Twig->new( pretty_print => 'indented', twig_handlers => { '#CDATA' => \&encodeCorrectly }); $twig->parsefile( $xml ); $twig->flush; exit; sub encodeCorrectly { my( $twig, $property)= @_; my $htmlToEncode = $property->text; my $htmlEncoded = encode_entities( $htmlToEncode, '&\'"[]\200-\377' ); # print "\n\n\n" . $htmlEncoded ."\n\n\n"; $property->set_text( $htmlEncoded ); # print "\n\n\n" . $property->text ."\n\n\n"; } #### perl -CS -pe 's/([^[:ascii:]])/sprintf("&#%d",ord($1))/eg' < orig.xml > encoded.xml