my $open_mode = "<:utf8"; # or "<:encoding(UTF-16LE)" etc. open( IN, $open_mode, "unicode_input.txt" ); binmode( STDOUT, ":utf8" ); # or whatever form of unicode is supported by your display tool while () { # data will be read as (or converted to) utf8 on input # do stuff with $_, then print; } #### my %charpinyin; while () { chomp; my ( $chchar, $pinyin ) = split /\t/; $charpinyin{$chchar} = $pinyin; }