use strict;
use XML::Twig;
use Text::Iconv;
my $utf2latin = Text::Iconv->new ('UTF-8', "ISO-8859-1");
my $latin2utf = Text::Iconv->new ("ISO-8859-1", 'UTF-8');
sub Node {
my ($twig, $node) = @_;
my $txt = $node->text ();
my $id = $node->att ('id');
#print two strings individually
print "$id\n";
print "$txt\n";
#print list - OK
print "1) ", $id, " ", $txt, "\n";
#print with string concat - garbled
print "2) $id $txt\n";
#convert $id to Latin1 - now the string concat works
my $latId = $utf2latin->convert ($id);
print "3) $latId $txt\n";
#convert $txt to UTF-8 - still doesn't work
my $utfTxt = $latin2utf->convert ($txt);
print "4) $id $utfTxt\n";
#first string concat, then convert to Latin1 - OK
print $utf2latin->convert ("5) $id $txt\n");
#the concatenated string does not match the Latin1 part
my $res = "$id $txt" !~ /$txt/;
print "\"$id $txt\" !~ /$txt/ => $res\n";
}
package main;
my $twig = XML::Twig->new (KeepEncoding => 1, TwigHandlers => {Node => \&Node});
$twig->parse (\*DATA);
__DATA__
fünfzig
####
50
fünfzig
1) 50 fünfzig
2) 50 fünfzig
3) 50 fünfzig
4) 50 fünfzig
5) 50 fünfzig
"50 fünfzig" !~ /fünfzig/ => 1