use charnames qw( :full );
my $s =
"ksi\N{LATIN SMALL LETTER E WITH OGONEK}" .
"gowos\N{LATIN SMALL LETTER S WITH ACUTE}" .
"c\N{LATIN SMALL LETTER C WITH ACUTE}";
print $s =~ /^(?:\r\n|[\p{Alnum} \n-])*\z/
? "match\n"
: "no match\n";
####
match
####
use charnames qw( :full );
my $s =
"ksie\N{COMBINING OGONEK}gowo" .
"s\N{COMBINING ACUTE ACCENT}" .
"c\N{COMBINING ACUTE ACCENT}";
print $s =~ /^(?:\r\n|[\p{Alnum} \n-])*\z/
? "match\n"
: "no match\n";
####
match
####
use Unicode::Normalize qw( normalize );
use charnames qw( );
my $s = "ksi\x{0119}gowo\x{015B}\x{0107}";
for (qw(NFC NFD)) {
print "$_\n";
printf("U+%04X: %s\n", $_, charnames::viacode($_))
for
map ord,
split //,
normalize($_, $s);
print("\n");
}
####
NFC
U+006B: LATIN SMALL LETTER K
U+0073: LATIN SMALL LETTER S
U+0069: LATIN SMALL LETTER I
U+0119: LATIN SMALL LETTER E WITH OGONEK
U+0067: LATIN SMALL LETTER G
U+006F: LATIN SMALL LETTER O
U+0077: LATIN SMALL LETTER W
U+006F: LATIN SMALL LETTER O
U+015B: LATIN SMALL LETTER S WITH ACUTE
U+0107: LATIN SMALL LETTER C WITH ACUTE
NFD
U+006B: LATIN SMALL LETTER K
U+0073: LATIN SMALL LETTER S
U+0069: LATIN SMALL LETTER I
U+0065: LATIN SMALL LETTER E
U+0328: COMBINING OGONEK
U+0067: LATIN SMALL LETTER G
U+006F: LATIN SMALL LETTER O
U+0077: LATIN SMALL LETTER W
U+006F: LATIN SMALL LETTER O
U+0073: LATIN SMALL LETTER S
U+0301: COMBINING ACUTE ACCENT
U+0063: LATIN SMALL LETTER C
U+0301: COMBINING ACUTE ACCENT
####
"\N{LATIN SMALL LETTER E WITH ACUTE}" =~ /^\X\z/
####
"e\N{COMBINING ACUTE}" =~ /^\X\z/