$string =~ s/(\P{InBasic_Latin})/ # Look for codepoints that are not in Basic_Latin; for example the sign ü defined( $subs{ord($1)} ) # if $1 = ü, then ord($1) = 252. We ask is there a value in %subs for key '251' ? ? $subs{ord($1)} # If yes ( $subs{251} = û ), then return û : ' <$subs{' # if no, then return<$hash{ ... . ord($1) # 252 ... . "} = ${charinfo(ord($1))}{name};> " # } = LATIN SMALL LETTER U WITH DIAERESIS;> /egx; # /egx = e execute g repeated x spaced out regex # If a sigle was found that is absent from the hash, then the outfile will contain "<$subs{8224} = DAGGER;>" etc # You have to write into make_the_subs_hash() a line like this $subs{8224} = '¦'; . Thats at [1] below # Then re run the script with the extended %subs return($string); #### foreach my $i (126 ... 255) { $subs{$i} = chr($i); } # Plus higher value code points found empirically; see [1] above $subs{338} = 'OE';# LATIN CAPITAL LIGATURE OE $subs{339} = 'oe';# LATIN SMALL LIGATURE OE $subs{8217} = "'" ;# RIGHT SINGLE QUOTATION MARK $subs{8224} = '×' ;# DAGGER