use strict ;
use warnings ;
use Encode qw(_utf8_on) ;
for my $r ("\xC2\xAB \x61\x68\x61 \xC2\xBB", "\xC2\x7E \x61\x68\x61 \x80\xC0") {
for my $utf (0..1) {
_utf8_on($r) if $utf ;
printf "'%s', %d/%d %s\n", raw(unpack('a*', $r)) ;
} ;
} ;
sub raw {
my ($s) = @_ ;
my ($b, $q) ;
{ use bytes ;
$b = length($s) ;
$q = join '', map { ($_ >= 0x20) && ($_ <= 0x7E) ? chr($_) : sprintf('\\x%02X', $_)
} unpack('C*', $s) ;
} ;
return ($q, length($s), $b, utf8::is_utf8($s) ? 'utf8' : 'not utf8') ;
} ;
####
my $s = "C2AB2061686120C2BB" ; _utf8_on($s) ;
for my $unp ('a*', 'U0a*', 'C0a*') {
my ($q, $b, $l, $u) = raw(unpack($unp, pack('H*', $s))) ;
print "unpack('$unp', pack('H*', \$s)) -> '$q', $l/$b $u\n" ;
} ;
####
my $s = "C2AB2061686120C2BB" ;
for my $unp ('a*', 'U0a*', 'C0a*') {
printf "unpack('$unp', pack('U0H*', $s)) -> '%s', %d/%d %s\n",
raw(unpack( $unp, pack('U0H*', $s))) ;
} ;
####
for my $s ("C2AB2041686120C2BB", "C27E204168612080C0") {
printf "pack('U0H*', $s) -> '%s', %d/%d %s\n",
raw(pack('U0H*', $s)) ;
printf "unpack('a*', pack('U0H*', $s)) -> '%s', %d/%d %s\n",
raw(unpack('a*', pack('U0H*', $s))) ;
} ;
####
sub dehex {
my ($s) = @_ ;
$s =~ s/0[xX]((?:[0-9A-Fa-f]{2})+)/pack('U0H*', $1)/eg ;
return $s if utf8::valid($s) ;
... worry ... return undef ??
} ;