$str = Unicode::Normalize::NFKD($str); $str =~ s/\p{NonspacingMark}//g; ##

##

## Demonstrate stripping of diacritical marks from Unicode strings
## April 2010, Bryce Nesbitt, Berkeley Electronic Press
## See also http://unicodelookup.com/
## See also http://en.wikipedia.org/wiki/Diacritic
## Keywords: perl, diacritic, diacritical
##           accent, iso-8859-1, normalization.
use utf8;                   # Tell perl source code is utf-8
use 5.10.0;
use Unicode::Normalize;

# Sample: "latin small letter e with circumflex and tilde" ễ
#         "latin small ligature ff" (will be expanded)
#         "latin small ligature oe" (won't be expanded)
$str = shift || "\x{1ec5} märks \x{fb00} \x{153}";
say "Input: ".debug_chatty_string($str);

# Decompose into letter and combining marks, in "Kompatibility" mode
$str = NFKD($str);
say "NFKD : ".debug_chatty_string($str);

# Remove combining marks
$str =~ s/\p{NonspacingMark}//g;
$str = lc($str);
say "Out  : ".debug_chatty_string($str);

sub debug_chatty_string
{
    my $outstring;
    # Use shift below, so utf-8 flag is preserved.
    # Else you might have to fiddle with Encode::_utf8_on()
    foreach $char (split //,shift) {
        my $ord = ord($char);
        if(($ord >= 32 && $ord < 127) || $ord == 10) {
            $outstring .= $char;
        } else {
            $outstring .= "<0x".sprintf("%x",$ord).">";
        }
    }
    return $outstring;
}

##

##

Input: <0x1ec5> m<0xe4>rks <0xfb00> <0x153>
NFKD : e<0x302><0x303> ma<0x308>rks ff <0x153>
Out  : e marks ff <0x153>