in reply to set_content of XML::twig
See Tutorials: perlunitut: Unicode in Perl, perluniintro/perlunitut, and utf8, binmode, Perlmonks uses the windows-1252 (similar to Latin-1) encoding, and all characters that are not in that character set are HTML-escaped - which doesn't work inside <code>...</code> tags, because everything is interpreted literally there.
So if you put all that together
#!/usr/bin/perl -- use utf8; use strict; use warnings; use XML::Twig; use Data::Dump qw/ dd /; my $rawl = q{C1000 マネージャー&# +12477;フトウェア}; my $rawe = "C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x{30FC}\x{3 +0BD}\x{30D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}"; for my $raw ( $rawl, $rawe ){ my $t = XML::Twig->new( pretty_print => 'indented' )->xparse( "<a> +<b><q></q></b></a>" ); $t->findnodes('//q', 0 )->set_content( $raw ); dd( $raw, $t->findnodes('//q',0)->trimmed_text, $t->sprint, ); #~ $t->print( \*STDOUT ); #~ $t->print_to_file( 'outfoo.xml' ); } __END__ ( "C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x{30FC}\x{30BD}\x{30 +D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}", "C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x{30FC}\x{30BD}\x{30 +D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}", "<a>\n <b>\n <q>C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x +{30FC}\x{30BD}\x{30D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}</q>\n </b>\n< +/a>\n", ) ( "C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x{30FC}\x{30BD}\x{30 +D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}", "C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x{30FC}\x{30BD}\x{30 +D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}", "<a>\n <b>\n <q>C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x +{30FC}\x{30BD}\x{30D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}</q>\n </b>\n< +/a>\n", )
|
|---|