#!/usr/bin/perl -- use strict; use warnings; use Data::Dump qw/ dd /; use Encode qw/ encode decode /; use Path::Tiny qw/ path /; my $tmpfile = path( 'deleteme.txt' ); ## CHARACTERS HERE #~ ordinal= ord( chr( 115 ) ) alias \N{U+0073} alias \163 alias LATIN SMALL LETTER S alias s #~ ordinal= ord( chr( 195 ) ) alias \N{U+00C3} alias \303 alias LATIN CAPITAL LETTER A TILDE alias à #~ ordinal= ord( chr( 188 ) ) alias \N{U+00BC} alias \274 alias FRACTION ONE QUARTER alias ¼ #~ ordinal= ord( chr( 195 ) ) alias \N{U+00C3} alias \303 alias LATIN CAPITAL LETTER A TILDE alias à #~ ordinal= ord( chr( 159 ) ) alias \N{U+009F} alias \237 alias APPLICATION PROGRAM COMMAND alias Ÿ #~ ordinal= ord( chr( 101 ) ) alias \N{U+0065} alias \145 alias LATIN SMALL LETTER E alias e my $ords = join q{}, map { chr $_ } ( 115, 195, 188, 195, 159, 101 ); $tmpfile->spew_raw( $ords ); dd( { ords => $ords, raw => $tmpfile->slurp_raw, utf8 => $tmpfile->slurp_utf8 } ); #~ { #~ ords => "s\xC3\xBC\xC3\x9Fe", #~ raw => "s\xC3\xBC\xC3\x9Fe", #~ utf8 => "s\xFC\xDFe", #~ } ## when you write raw without encoding ## when read that stuff as utf8, you get a surprise #~ ordinal= ord( chr( 223 ) ) alias \N{U+00DF} alias \337 alias LATIN SMALL LETTER SHARP S alias ß #~ ordinal= ord( chr( 252 ) ) alias \N{U+00FC} alias \374 alias LATIN SMALL LETTER U DIAERESIS alias ü ## >>>> OUTPUT encoded, the raw bytes change $tmpfile->spew_utf8( $ords ); dd( { ords => $ords, raw => $tmpfile->slurp_raw, utf8 => $tmpfile->slurp_utf8 } ); #~ { #~ ords => "s\xC3\xBC\xC3\x9Fe", #~ raw => "s\xC3\x83\xC2\xBC\xC3\x83\xC2\x9Fe", #~ utf8 => "s\xC3\xBC\xC3\x9Fe", #~ } ## utf8 is an encoding, representing characters (ordinals) $tmpfile->spew_raw( encode 'UTF-8', $ords ); dd( { ords => $ords, raw => $tmpfile->slurp_raw, utf8 => $tmpfile->slurp_utf8 } ); #~ { #~ ords => "s\xC3\xBC\xC3\x9Fe", #~ raw => "s\xC3\x83\xC2\xBC\xC3\x83\xC2\x9Fe", #~ utf8 => "s\xC3\xBC\xC3\x9Fe", #~ } ## decode raw bytes to get characters ## encode characters to get raw bytes/octets dd( { ords => $ords, decode_utf8_raw => decode( 'UTF-8', $tmpfile->slurp_raw ), utf8 => $tmpfile->slurp_utf8, } ); #~ { #~ decode_utf8_raw => "s\xC3\xBC\xC3\x9Fe", #~ ords => "s\xC3\xBC\xC3\x9Fe", #~ utf8 => "s\xC3\xBC\xC3\x9Fe", #~ } ## hooray $tmpfile->remove; __END__