#hex dump of A #00000000 41 |A| #00000001 #### #hex dump of HIRAGANA LETTER A #00000000 e3 81 82 |...| #00000003 #### #Example 1: native string may not be native string #Code: $native_string=pack('W*', unpack('U*', $unicode_string)); use strict; use warnings; use Encode qw(encode); use Devel::Peek; use 5.012; my($code_point,$unicode_string,$native_string, $native_string2); $code_point=0x41;#"A"; $unicode_string=pack('U*', $code_point); $native_string=pack('W*', unpack('U*', $unicode_string)); Dump $unicode_string; Dump $native_string; # ==> here it is not UTF-8 flagged $code_point=0x3042;#HIRAGANA LETTER A $unicode_string=pack('U*', $code_point); $native_string=pack('W*', unpack('U*', $unicode_string)); $native_string2=Encode::encode('utf8', $unicode_string); Dump $unicode_string; Dump $native_string; # ==> this is UTF8 flaged may be transparently upgraded because code point > 255 Dump $native_string2; #### #Example 2: it is not bytes, it is array of code point. #Code: @bytes=unpack("C*", $unicode_string); use strict; use warnings; use Encode qw(encode); use 5.012; my($code_point,$unicode_string,@bytes); $code_point=0x41;#A $unicode_string=pack('U*', $code_point); @bytes=unpack("C*", $unicode_string); print join('|', @bytes), "\n"; $code_point=0x3042;#HIRAGANA LETTER A $unicode_string=pack('U*', $code_point); @bytes=unpack("C*", $unicode_string); print join('|', @bytes), "\n"; #==>these are not bytes ,but array of codepoints $code_point=0x3042;#HIRAGANA LETTER A $unicode_string=pack('U*', $code_point); @bytes=map{ sprintf("%X",$_) } unpack("C*", Encode::encode('utf8',$unicode_string)); print join('|', @bytes), "\n";