in reply to Encoding Decoding on multiple formats RFC
Just to compare, I used what already was available to achieve almost the same:
use 5.18.2;
use warnings;
use utf8;
use Encode qw( encode decode );
use Data::Peek;
binmode STDOUT, ":encoding(utf8)";
my @lang = (
[ Arabic => "هذا اختبار" ],
[ Chinese => "這是一個測試" ],
[ Greek => "Αυτό είναι ένα τεστ" ],
[ Japanese => "これはテストです" ],
[ Russian => "Это тест" ],
);
my @encodings = (
"UCS-2",
"UCS-2BE",
"UCS-2LE",
"UTF-7",
"UTF-8",
"utf-8-strict",
"UTF-16",
"UTF-16BE",
"UTF-16LE",
"UTF-32",
"UTF-32BE",
"UTF-32LE",
);
my %hashOutput;
for (@lang) {
my ($lang, $str) = @$_;
foreach my $enc (@encodings) {
printf "--\n%-8s %s\n", $lang, $enc;
DPeek $str;
my $bytes = encode ($enc, $str);
DHexDump $bytes;
}
}
=>
-- Arabic UCS-2 PV("\331\207\330\260\330\247 \330\247\330\256\330\252\330\250\330\247\ +330\261"\0) [UTF8 "\x{647}\x{630}\x{627} \x{627}\x{62e}\x{62a}\x{628} +\x{627}\x{631}"] 0000 06 47 06 30 06 27 00 20 06 27 06 2e 06 2a 06 28 .G.0.'. .'...* +.( 0010 06 27 06 31 .'.1 -- Arabic UCS-2BE PV("\331\207\330\260\330\247 \330\247\330\256\330\252\330\250\330\247\ +330\261"\0) [UTF8 "\x{647}\x{630}\x{627} \x{627}\x{62e}\x{62a}\x{628} +\x{627}\x{631}"] 0000 06 47 06 30 06 27 00 20 06 27 06 2e 06 2a 06 28 .G.0.'. .'...* +.( 0010 06 27 06 31 .'.1 -- Arabic UCS-2LE PV("\331\207\330\260\330\247 \330\247\330\256\330\252\330\250\330\247\ +330\261"\0) [UTF8 "\x{647}\x{630}\x{627} \x{627}\x{62e}\x{62a}\x{628} +\x{627}\x{631}"] 0000 47 06 30 06 27 06 20 00 27 06 2e 06 2a 06 28 06 G.0.'. .'...*. +(. 0010 27 06 31 06 '.1. : : : -- Russian UTF-32BE PV("\320\255\321\202\320\276 \321\202\320\265\321\201\321\202"\0) [UTF +8 "\x{42d}\x{442}\x{43e} \x{442}\x{435}\x{441}\x{442}"] 0000 00 00 04 2d 00 00 04 42 00 00 04 3e 00 00 00 20 ...-...B...>.. +. 0010 00 00 04 42 00 00 04 35 00 00 04 41 00 00 04 42 ...B...5...A.. +.B -- Russian UTF-32LE PV("\320\255\321\202\320\276 \321\202\320\265\321\201\321\202"\0) [UTF +8 "\x{42d}\x{442}\x{43e} \x{442}\x{435}\x{441}\x{442}"] 0000 2d 04 00 00 42 04 00 00 3e 04 00 00 20 00 00 00 -...B...>... . +.. 0010 42 04 00 00 35 04 00 00 41 04 00 00 42 04 00 00 B...5...A...B. +..
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: Encoding Decoding on multiple formats RFC (UPDATED)
by thanos1983 (Parson) on Sep 22, 2017 at 12:23 UTC | |
by AppleFritter (Vicar) on Oct 01, 2017 at 10:36 UTC |