use Encode qw/decode is_utf8/; use Unicode::Normalize; # let $string be value that was just fetched from a utf8 database field, # in which case, you will most likely need to do this: $string = decode( "utf8", $string ); # or just for testing, comment out the previous line, and # $string = join( "", map{chr()} 0xc0..0xff ); # uncomment this line # NFD normalization splits off all diacritic marks as separate code points # and these "combining" marks for latin are in the U0300-U036F range ( $string_nd = NFD( $string )) =~ tr/\x{300}-\x{36f}//d; binmode STDOUT, ":utf8"; # just to be sure this has been done print "original: << $string >>\n"; print " edited: << $string_nd >>\n";