use Encode; my $cp1252_str = join('', chr(0x80), chr(0x81), chr(0x91), chr(0x92), chr(0x93), chr(0x94), chr(0x95), chr(0x96), ); my $utf8_str = decode( 'cp1252', $cp1252_str ); # update: you can now remove "unmapped" byte values this way: $utf8_str =~ tr/\x{fffd}//d; #### open( INPUT, "<:encoding(cp1252)", $filename ) or die $!; # or, if the file handle is already open (e.g. STDIN): # binmode FILEHANDLE, ":encoding(cp1252)"; while () { # $_ contains utf8 data... }