Güldenstädt's Redstart #### Güldenstädtâ??s Redstart #### $string =~ s/([\xC2\xC3])([\x80-\xBF])/chr(ord($1)<<6&0xC0|ord($2)&0x3F)/eg; #### use Unicode::Normalize 'normalize'; #### use Unicode::String qw(utf8 latin1); #### use Unicode::UCD 'charinfo'; # Look for codepoints not in Basic Latin while ( $string =~ s/(\P{InBasic_Latin})// ) { my $U_char = $1; # e.g. U_char = ü my $U_codepoint = ord($U_char); # so U_codepoint = ord(ü) = 252 $string =~ s/$U_char/$subs{$U_codepoint}/; # and $subs{252} = ü } #### foreach my $i (126 ... 255) { $subs{$i} = chr($i); }