use HTML::LinkExtractor;
use Encode qw/_utf8_on is_utf8/;
use open OUT => ':utf8';
my $utf8 = do { local $/; };
my $LX = HTML::LinkExtractor->new(undef, undef, 1);
$LX->parse(\$utf8);
for my $l (@{$LX->links}){
if($l->{tag} eq 'a'){
my $character = substr($l->{_TEXT}, 0, 1);
print "Character: $character\n";
print "Code: ", ord($character) , "\n";
print "utf8 flag: " , is_utf8($character), "\n";
_utf8_on($character);
print "Character: $character\n";
print "Code: ", ord($character) , "\n";
print "utf8 flag: " , is_utf8($character), "\n";
}
}
__DATA__
ó
__OUTPUT__
Character: ó
Code: 243
utf8 flag:
Wide character in print at a.pl line 15, line 1.
Character: ó
Malformed UTF-8 character (unexpected non-continuation byte 0x00, immediately after start byte 0xf3) in ord at a.pl line 16, line 1.
Code: 0
utf8 flag: 1