our $input = "日本語は少しだけ分かります。 and sam I am!";
#japanese characters meanings, there is a little bit of understanding of the japanese language [at least].
####
my @bytes = unpack("U*",$input);
my $i=0;
while(scalar(@bytes)>0)
{
my $byt=1;
$byt=2 if ($bytes[$i] >= 192);
$byt=3 if ($bytes[$i] >= 224);
$byt=4 if ($bytes[$i] >= 240);
$byt=5 if ($bytes[$i] >= 248);
print "$bytes[$i]: ";
my @spl = splice(@bytes,0,$byt);
my $letter = pack("U*",@spl);
print $letter." [0x";
foreach (@spl){printf "%2.2X",$_;}
print "] ";
print "\n";
}
####
Wide character in print at text_kanji.pl line 24.
26085: 日本語は少 [0x65E5672C8A9E306F5C11]
Wide character in print at text_kanji.pl line 24.
12375: しだけ分か [0x3057306030515206304B]
Wide character in print at text_kanji.pl line 24.
12426: ります。 [0x308A307E3059300220]
97: a [0x61]
110: n [0x6E]
100: d [0x64]
32: [0x20]
115: s [0x73]
97: a [0x61]
109: m [0x6D]
####
230: 日 [0xE697A5]
230: 本 [0xE69CAC]
232: 語 [0xE8AA9E]
227: は [0xE381AF]
229: 少 [0xE5B091]
227: し [0xE38197]
227: だ [0xE381A0]
227: け [0xE38191]
229: 分 [0xE58886]
227: か [0xE3818B]
227: り [0xE3828A]
227: ま [0xE381BE]
227: す [0xE38199]
227: 。 [0xE38082]
####
my $letter = pack("U*",@spl);