use Unicode::UCD 'charinfo'; # Look for codepoints not in Basic Latin while ( $string =~ s/(\P{InBasic_Latin})// ) { my $U_char = $1; # e.g. U_char = ü my $U_codepoint = ord($U_char); # so U_codepoint = ord(ü) = 252 $string =~ s/$U_char/$subs{$U_codepoint}/; # and $subs{252} = ü }