#!/usr/bin/perl
use strict;
use Encode;
use Text::Unaccent::PurePerl;
binmode STDOUT, ":utf8";
use utf8;
my $string = "Queensr˙che";
no utf8;
chars($string);
(Encode::is_utf8($string))? print "this is utf8\n" : print "this is NOT utf8\n";
print "$string\n";
print "unaccented: " . Text::Unaccent::PurePerl::unac_string($string) . "\n";
exit;
sub chars {
my $k = shift;
my @chars = split("",$k);
foreach (@chars) {
my $dec = ord($_);
my $chr = chr(ord($_));
my $q = qquote($_);
print "\t$dec\t$chr\t$q\n";
}
}
sub qquote {
local($_) = shift;
s/([\\\"\@\$])/\\$1/g;
my $bytes; { use bytes; $bytes = length }
s/([[:^ascii:]])/'\x{'.sprintf("%x",ord($1)).'}'/ge if $bytes > length;
return $_;
####
81 Q Q
117 u u
101 e e
101 e e
110 n n
115 s s
114 r r
255 ˙ \x{ff}
99 c c
104 h h
101 e e
this is utf8
Queensr˙che
unaccented: Queensryche
##
##
81 Q Q
117 u u
101 e e
101 e e
110 n n
115 s s
114 r r
195 - \x{c3}
191 - \x{bf}
99 c c
104 h h
101 e e