use warnings; use strict; use Encode; use HTML::Entities; use XML::Twig; my $objinfo_parser = XML::Twig->new ( twig_handlers => { 'Product[@Id = "most3usb20on"]' => \&_test_handler } ); $objinfo_parser->parse (decode_entities (join '', ) ); #$objinfo_parser->parseurl ('http://www.3btech.net/objinfo.xml'); sub _test_handler { my ($twig, $elt) = @_; my $str = substr ($elt->field ('Description'), -12); print "The string below passes a utf8 well-formedness test, why?\n" if Encode::is_utf8 ($str, 1); print "$str\n\n"; for (unpack ('U*', $str)) { printf "0x%X\n", $_; } print "\n"; exit; } # The xml snippet below is taken directly from the above url # (the url will take about 15s to download and parse) # HTML encoded to preserve offending characters __DATA__ Moving Star 3.5" USB 2.0 One Button Backup Aluminum Hard Drive Enclosure – Black