my %respell = ( 'ze' => "\x{017e}e", ... );
####
use Unicode::Normalize;
my %respell;
open( INFO, "<:utf8", "cetnosti" ) or die "cetnosti: $!";
while () {
next unless ( /[^[:ascii:]]/ ); # skip words that are all-ascii
my ( $word, $freq ) = split;
my $ascii_word = NFD( $word ); # break accented letters into letter, diacritic
$ascii_word =~ s/[^[:ascii:]]+//g; # delete diacritics
$respell{$ascii_word} = $word;
}
close INFO;
####
open( INPUT, "<:utf8", "input" ) or die "input: $!";
open( OUTPUT, ">:utf8", "respelled" ) or die "respelled: $!";
while () {
my $outstr = '';
for my $tkn ( split /(\s+)/ ) {
if ( exists( $respell{$tkn} )) {
$tkn = $respell{$tkn};
}
$outstr .= $tkn;
}
print OUTPUT $outstr;
}