#hex dump of A
#00000000 41 |A|
#00000001
####
#hex dump of HIRAGANA LETTER A
#00000000 e3 81 82 |...|
#00000003
####
#Example 1: native string may not be native string
#Code: $native_string=pack('W*', unpack('U*', $unicode_string));
use strict;
use warnings;
use Encode qw(encode);
use Devel::Peek;
use 5.012;
my($code_point,$unicode_string,$native_string, $native_string2);
$code_point=0x41;#"A";
$unicode_string=pack('U*', $code_point);
$native_string=pack('W*', unpack('U*', $unicode_string));
Dump $unicode_string;
Dump $native_string; # ==> here it is not UTF-8 flagged
$code_point=0x3042;#HIRAGANA LETTER A
$unicode_string=pack('U*', $code_point);
$native_string=pack('W*', unpack('U*', $unicode_string));
$native_string2=Encode::encode('utf8', $unicode_string);
Dump $unicode_string;
Dump $native_string; # ==> this is UTF8 flaged may be transparently upgraded because code point > 255
Dump $native_string2;
####
#Example 2: it is not bytes, it is array of code point.
#Code: @bytes=unpack("C*", $unicode_string);
use strict;
use warnings;
use Encode qw(encode);
use 5.012;
my($code_point,$unicode_string,@bytes);
$code_point=0x41;#A
$unicode_string=pack('U*', $code_point);
@bytes=unpack("C*", $unicode_string);
print join('|', @bytes), "\n";
$code_point=0x3042;#HIRAGANA LETTER A
$unicode_string=pack('U*', $code_point);
@bytes=unpack("C*", $unicode_string);
print join('|', @bytes), "\n"; #==>these are not bytes ,but array of codepoints
$code_point=0x3042;#HIRAGANA LETTER A
$unicode_string=pack('U*', $code_point);
@bytes=map{ sprintf("%X",$_) } unpack("C*", Encode::encode('utf8',$unicode_string));
print join('|', @bytes), "\n";