#!/usr/bin/perl use strict; use warnings; use encoding 'UTF-8'; #use encoding 'utf8'; #use utf8; use Devel::Peek qw( Dump ); my $word = "État"; # UTF-8 encoding of "État" my $char = "É"; # UTF-8 encoding of "É" Dump($word); print("String Length: ", length($word), "\n"); print("\n"); Dump($char); print("String Length: ", length($char), "\n"); print("\n"); if ($word =~ /$char/) { print "Matches\n"; } else { print "Does not match\n"; } if ($word =~ /\Q$char/) { print "Matches\n"; } else { print "Does not match\n"; } if (substr($word, 0, 1) eq $char) { print "Equal\n"; } else { print "Not equal\n"; }
SV = PV(0x22608c) at 0x225f9c REFCNT = 1 FLAGS = (PADBUSY,PADMY,POK,pPOK,UTF8) <-- That's good PV = 0x1822634 "\303\211tat"\0 [UTF8 "\x{c9}tat"] <-- That's good CUR = 5 LEN = 8 String Length: 4 <-- That's good SV = PV(0x2260a4) at 0x225f3c REFCNT = 1 FLAGS = (PADBUSY,PADMY,POK,pPOK,UTF8) <-- That's good PV = 0x22f43c "\303\211"\0 [UTF8 "\x{c9}"] <-- That's good CUR = 2 LEN = 4 String Length: 1 <-- That's good Does not match <-- WTF? Does not match <-- WTF? Equal <-- That's good
Replacing use encoding 'UTF-8'; with use encoding 'utf8'; yields the same results.
Replacing use encoding 'UTF-8'; with use utf8; produces the same dumps, but the matches succeed.
My suggestion:
In reply to Re^3: utf8, locale and regexp
by ikegami
in thread utf8, locale and regexp
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |