#!perl
use strict;
use warnings;
use v5.12;
use Encode qw( encode );
use English qw( -no_match_vars );
use Unicode::UCD qw( charinfo );
binmode STDOUT, ':encoding(UTF-8)';
# Include the Unicode byte order mark...
print "\x{FEFF}";
local $OUTPUT_AUTOFLUSH = 1;
local $OUTPUT_RECORD_SEPARATOR = "\n";
local $OUTPUT_FIELD_SEPARATOR = "\t";
CODE:
for my $code (0x0000 .. 0x10FFFF) {
# Don't complain about surrogate codes...
no warnings qw( utf8 );
my $charinfo = charinfo($code);
# Skip unassigned code points and non-characters...
next CODE unless defined $charinfo;
my $codepoint = sprintf 'U+%06X', $code;
my $character = chr $code;
my $name = $charinfo->{'name'};
my $category = $charinfo->{'category'};
my $block = $charinfo->{'block'};
my $script = $charinfo->{'script'};
my @utf8_octets
= map { ord }
split m//, encode('UTF-8', $character);
my $utf8_hexstring
= join ' ',
map { sprintf '%02X', $_ }
@utf8_octets;
my $utf8_binstring
= join ' ',
map { sprintf '%08b', $_ }
@utf8_octets;
# Don't try to print unprintable or private use characters...
$character = '' if $category eq 'Cc'
|| $category eq 'Co'
|| $category eq 'Cs';
print $character,
$code,
$codepoint,
$utf8_hexstring,
$utf8_binstring,
$name,
$category,
$block,
$script;
}
exit 0;
####
#!perl
use strict;
use warnings;
use v5.12;
use Encode qw( encode_utf8 );
use English qw( -no_match_vars );
use Unicode::UCD qw( charinfo );
binmode STDOUT, ':encoding(UTF-8)';
# Include a Unicode byte order mark in the output...
print "\x{FEFF}";
local $OUTPUT_AUTOFLUSH = 1;
local $OUTPUT_RECORD_SEPARATOR = "\n";
local $OUTPUT_FIELD_SEPARATOR = "\t";
CODE:
for my $code (0x000000 .. 0x10FFFF) {
# Look up the code point in the Unicode Character Database...
my $charinfo = charinfo($code);
# Skip unassigned code points and non-characters...
next CODE unless defined $charinfo;
my $codepoint = sprintf 'U+%06X', $code;
my $character = chr $code;
my $name = $charinfo->{'name'};
my $category = $charinfo->{'category'};
my $block = $charinfo->{'block'};
my $script = $charinfo->{'script'};
my @utf8_octets
= unpack 'C*', encode_utf8($character);
my $utf8_hex_string
= join ' ', map { sprintf '%02X', $ARG } @utf8_octets;
my $utf8_bin_string
= join ' ', map { sprintf '%08b', $ARG } @utf8_octets;
# Don't try to print unprintable or private use characters...
if ($category =~ m/^C[cfos]$/) {
$character = '';
# Don't falsely represent surrogates as valid UTF-8...
if ($category eq 'Cs') {
$utf8_hex_string = $utf8_bin_string = '';
}
}
print $character,
$code,
$codepoint,
$utf8_hex_string,
$utf8_bin_string,
$name,
$category,
$block,
$script;
}
exit 0;
####
# Don't complain about surrogates...
no warnings qw( surrogate );