There's indeed a problem.
#!/usr/bin/perl
use strict;
use warnings;
use encoding 'UTF-8';
#use encoding 'utf8';
#use utf8;
use Devel::Peek qw( Dump );
my $word = "État"; # UTF-8 encoding of "État"
my $char = "É"; # UTF-8 encoding of "É"
Dump($word);
print("String Length: ", length($word), "\n");
print("\n");
Dump($char);
print("String Length: ", length($char), "\n");
print("\n");
if ($word =~ /$char/) {
print "Matches\n";
} else {
print "Does not match\n";
}
if ($word =~ /\Q$char/) {
print "Matches\n";
} else {
print "Does not match\n";
}
if (substr($word, 0, 1) eq $char) {
print "Equal\n";
} else {
print "Not equal\n";
}
SV = PV(0x22608c) at 0x225f9c
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK,UTF8) <-- That's good
PV = 0x1822634 "\303\211tat"\0 [UTF8 "\x{c9}tat"] <-- That's good
CUR = 5
LEN = 8
String Length: 4 <-- That's good
SV = PV(0x2260a4) at 0x225f3c
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK,UTF8) <-- That's good
PV = 0x22f43c "\303\211"\0 [UTF8 "\x{c9}"] <-- That's good
CUR = 2
LEN = 4
String Length: 1 <-- That's good
Does not match <-- WTF?
Does not match <-- WTF?
Equal <-- That's good
Replacing use encoding 'UTF-8'; with use encoding 'utf8'; yields the same results.
Replacing use encoding 'UTF-8'; with use utf8; produces the same dumps, but the matches succeed.
My suggestion:
- Use use utf8; to treat the source as UTF-8.
- Use binmode(STDOUT, ":utf8"); to output UTF-8.
|