use HTML::LinkExtractor; use Encode qw/_utf8_on is_utf8/; use open OUT => ':utf8'; my $utf8 = do { local $/; }; my $LX = HTML::LinkExtractor->new(undef, undef, 1); $LX->parse(\$utf8); for my $l (@{$LX->links}){ if($l->{tag} eq 'a'){ my $character = substr($l->{_TEXT}, 0, 1); print "Character: $character\n"; print "Code: ", ord($character) , "\n"; print "utf8 flag: " , is_utf8($character), "\n"; _utf8_on($character); print "Character: $character\n"; print "Code: ", ord($character) , "\n"; print "utf8 flag: " , is_utf8($character), "\n"; } } __DATA__ ó __OUTPUT__ Character: ó Code: 243 utf8 flag: Wide character in print at a.pl line 15, line 1. Character: ó Malformed UTF-8 character (unexpected non-continuation byte 0x00, immediately after start byte 0xf3) in ord at a.pl line 16, line 1. Code: 0 utf8 flag: 1