use Encode; my $cp1252_str = join('', chr(0x80), chr(0x81), chr(0x91), chr(0x92), chr(0x93), chr(0x94), chr(0x95), chr(0x96), ); my $utf8_str = decode( 'cp1252', $cp1252_str ); # update: you can now remove "unmapped" byte values this way: $utf8_str =~ tr/\x{fffd}//d;