my %respell = ( 'ze' => "\x{017e}e", ... ); #### use Unicode::Normalize; my %respell; open( INFO, "<:utf8", "cetnosti" ) or die "cetnosti: $!"; while () { next unless ( /[^[:ascii:]]/ ); # skip words that are all-ascii my ( $word, $freq ) = split; my $ascii_word = NFD( $word ); # break accented letters into letter, diacritic $ascii_word =~ s/[^[:ascii:]]+//g; # delete diacritics $respell{$ascii_word} = $word; } close INFO; #### open( INPUT, "<:utf8", "input" ) or die "input: $!"; open( OUTPUT, ">:utf8", "respelled" ) or die "respelled: $!"; while () { my $outstr = ''; for my $tkn ( split /(\s+)/ ) { if ( exists( $respell{$tkn} )) { $tkn = $respell{$tkn}; } $outstr .= $tkn; } print OUTPUT $outstr; }