#!perl use strict; use warnings; use v5.12; use Encode qw( encode ); use English qw( -no_match_vars ); use Unicode::UCD qw( charinfo ); binmode STDOUT, ':encoding(UTF-8)'; # Include the Unicode byte order mark... print "\x{FEFF}"; local $OUTPUT_AUTOFLUSH = 1; local $OUTPUT_RECORD_SEPARATOR = "\n"; local $OUTPUT_FIELD_SEPARATOR = "\t"; CODE: for my $code (0x0000 .. 0x10FFFF) { # Don't complain about surrogate codes... no warnings qw( utf8 ); my $charinfo = charinfo($code); # Skip unassigned code points and non-characters... next CODE unless defined $charinfo; my $codepoint = sprintf 'U+%06X', $code; my $character = chr $code; my $name = $charinfo->{'name'}; my $category = $charinfo->{'category'}; my $block = $charinfo->{'block'}; my $script = $charinfo->{'script'}; my @utf8_octets = map { ord } split m//, encode('UTF-8', $character); my $utf8_hexstring = join ' ', map { sprintf '%02X', $_ } @utf8_octets; my $utf8_binstring = join ' ', map { sprintf '%08b', $_ } @utf8_octets; # Don't try to print unprintable or private use characters... $character = '' if $category eq 'Cc' || $category eq 'Co' || $category eq 'Cs'; print $character, $code, $codepoint, $utf8_hexstring, $utf8_binstring, $name, $category, $block, $script; } exit 0;