äöü. ÄÖÜ. #### DB<62> open $fr,"<:raw","encode" DB<63> p -s $fr # 20 bytes 20 DB<64> @a=<$fr> # slurp DB<65> dd \@a # Data::Dump::dd shows bytes correctly [ "\xC3\xA4\xC3\xB6\xC3\xBC.\r\n", # "ä" = UTF8:\xC3\xA4 = codepoint U+00E4 etc "\xC3\x84\xC3\x96\xC3\x9C.\r\n", "\r\n", ] DB<66> seek $fr,10,0 # put readpointer to middle DB<67> p tell $fr # ok pos = 10 10 DB<68> read $fr,$rr,10 # read last 10 bytes into $rr DB<69> dd $rr # ouch, first byte is missing utf-8 boundary "\x84\xC3\x96\xC3\x9C.\r\n\r\n" DB<70> $ru=Encode::decode('utf8',$rr) # lets decode to internal string DB<71> Dump $ru # Devel::Peek : utf8-flag is set, first byte translated to \x{fffd} SV = PVMG(0x36d3a28) at 0x36d56b8 REFCNT = 1 FLAGS = (SMG,POK,IsCOW,pPOK,UTF8) IV = 0 NV = 0 PV = 0x36195e8 "\357\277\275\303\226\303\234.\r\n\r\n"\0 [UTF8 "\x{fffd}\x{d6}\x{dc}.\r\n\r\n"] CUR = 12 LEN = 16 COW_REFCNT = 0 MAGIC = 0x3630f58 MG_VIRTUAL = &PL_vtbl_utf8 MG_TYPE = PERL_MAGIC_utf8(w) MG_LEN = -1 DB<72> dd $ru # Data::Dump agrees "\x{FFFD}\xD6\xDC.\r\n\r\n" DB<73> p length $ru # 8 chars = "*ÖÜ.\r\n\r\n" with * for fail 8 DB<74> p $ru # can't be printed without warning Wide character in print at (eval 84)[C:/Perl_524/lib/perl5db.pl:737] line 2, <$fr> line 8. ... yadda traceback ´┐¢├û├£. # OK cmd.exe can't handle unicode DB<75> @au = split//,$ru DB<76> p $au[0] # yeah first character causing trouble Wide character in print at (eval 86)[C:/Perl_524/lib/perl5db.pl:737] line 2, <$fr> line 8. ... yadda traceback ´┐¢ DB<77> p $au[1] Í DB<78> dd $au[1] # yep D6 is the codepoint for "Ö" in unicode "\xD6"