use charnames qw( :full ); my $s = "ksi\N{LATIN SMALL LETTER E WITH OGONEK}" . "gowos\N{LATIN SMALL LETTER S WITH ACUTE}" . "c\N{LATIN SMALL LETTER C WITH ACUTE}"; print $s =~ /^(?:\r\n|[\p{Alnum} \n-])*\z/ ? "match\n" : "no match\n"; #### match #### use charnames qw( :full ); my $s = "ksie\N{COMBINING OGONEK}gowo" . "s\N{COMBINING ACUTE ACCENT}" . "c\N{COMBINING ACUTE ACCENT}"; print $s =~ /^(?:\r\n|[\p{Alnum} \n-])*\z/ ? "match\n" : "no match\n"; #### match #### use Unicode::Normalize qw( normalize ); use charnames qw( ); my $s = "ksi\x{0119}gowo\x{015B}\x{0107}"; for (qw(NFC NFD)) { print "$_\n"; printf("U+%04X: %s\n", $_, charnames::viacode($_)) for map ord, split //, normalize($_, $s); print("\n"); } #### NFC U+006B: LATIN SMALL LETTER K U+0073: LATIN SMALL LETTER S U+0069: LATIN SMALL LETTER I U+0119: LATIN SMALL LETTER E WITH OGONEK U+0067: LATIN SMALL LETTER G U+006F: LATIN SMALL LETTER O U+0077: LATIN SMALL LETTER W U+006F: LATIN SMALL LETTER O U+015B: LATIN SMALL LETTER S WITH ACUTE U+0107: LATIN SMALL LETTER C WITH ACUTE NFD U+006B: LATIN SMALL LETTER K U+0073: LATIN SMALL LETTER S U+0069: LATIN SMALL LETTER I U+0065: LATIN SMALL LETTER E U+0328: COMBINING OGONEK U+0067: LATIN SMALL LETTER G U+006F: LATIN SMALL LETTER O U+0077: LATIN SMALL LETTER W U+006F: LATIN SMALL LETTER O U+0073: LATIN SMALL LETTER S U+0301: COMBINING ACUTE ACCENT U+0063: LATIN SMALL LETTER C U+0301: COMBINING ACUTE ACCENT #### "\N{LATIN SMALL LETTER E WITH ACUTE}" =~ /^\X\z/ #### "e\N{COMBINING ACUTE}" =~ /^\X\z/