test1 has asked for the wisdom of the Perl Monks concerning the following question:

use strict; use warnings; use autodie; use XML::Twig; use File::Copy; use Encode; use File::Find::Rule; use XML::Twig; binmode STDOUT, ":utf8"; my $srcDCR='test.xml'; my $logend = $srcDCR; my $logfile = $logend . "log"; open LOG, ">$logfile"; my $tmpdcr = $srcDCR.".tmp"; copy($srcDCR,$tmpdcr); open (FILE,'>:encoding(UTF-8)',"$tmpdcr") or die "Failed to open $ +tmpdcr"; my $twig = XML::Twig->new (keep_encoding => 0); $twig->parsefile($srcDCR); my $root = $twig->root; my $littype_parent = $root->first_child('Metadata'); my $littype_value = $littype_parent->first_child_trimmed_text('Descrip +tion'); print LOG $littype_value; $littype_value = 'C1000 マネージ '; print LOG $littype_value; my $littype_child = $littype_parent->first_child('Description'); $littype_child->set_content($littype_value); print LOG 'final value'; print LOG $littype_parent->first_child_trimmed_text('Description'); $twig->print(\*FILE); close(FILE);
test.xml:
<Metadata> <Description>C1000 &#12510;&#12493;&#12540;&#12472;&#12515;&#12540;&#1 +2477;&#12501;&#12488;&#12454;&#12455;&#12450;</Description> </Metadata>
n the xml the value of description is getting set to C1000 マネージ instead of C1000 &#12510;&#12493;&#12540;&#12472; I am new to perl.

Replies are listed 'Best First'.
Re: set_content of XML::twig
by Anonymous Monk on Sep 11, 2015 at 23:32 UTC

    See Tutorials: perlunitut: Unicode in Perl, perluniintro/perlunitut, and utf8, binmode, Perlmonks uses the windows-1252 (similar to Latin-1) encoding, and all characters that are not in that character set are HTML-escaped - which doesn't work inside <code>...</code> tags, because everything is interpreted literally there.

    So if you put all that together

    #!/usr/bin/perl -- use utf8; use strict; use warnings; use XML::Twig; use Data::Dump qw/ dd /; my $rawl = q{C1000 &#12510;&#12493;&#12540;&#12472;&#12515;&#12540;&# +12477;&#12501;&#12488;&#12454;&#12455;&#12450;}; my $rawe = "C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x{30FC}\x{3 +0BD}\x{30D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}"; for my $raw ( $rawl, $rawe ){ my $t = XML::Twig->new( pretty_print => 'indented' )->xparse( "<a> +<b><q></q></b></a>" ); $t->findnodes('//q', 0 )->set_content( $raw ); dd( $raw, $t->findnodes('//q',0)->trimmed_text, $t->sprint, ); #~ $t->print( \*STDOUT ); #~ $t->print_to_file( 'outfoo.xml' ); } __END__ ( "C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x{30FC}\x{30BD}\x{30 +D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}", "C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x{30FC}\x{30BD}\x{30 +D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}", "<a>\n <b>\n <q>C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x +{30FC}\x{30BD}\x{30D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}</q>\n </b>\n< +/a>\n", ) ( "C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x{30FC}\x{30BD}\x{30 +D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}", "C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x{30FC}\x{30BD}\x{30 +D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}", "<a>\n <b>\n <q>C1000 \x{30DE}\x{30CD}\x{30FC}\x{30B8}\x{30E3}\x +{30FC}\x{30BD}\x{30D5}\x{30C8}\x{30A6}\x{30A7}\x{30A2}</q>\n </b>\n< +/a>\n", )