#!/usr/bin/perl
use strict;
use Encode;
use Text::Unaccent::PurePerl qw(unac_string);
use utf8;
my $string = "Queensr˙che";
no utf8;
chars($string);
(Encode::is_utf8($string))? print " - this is utf8\n" : print " - this is NOT utf8\n";
print "unaccented: " . Text::Unaccent::PurePerl::unac_string($string) . "\n";
print $string;
exit;
sub chars {
my $k = shift;
my @chars = split("",$k);
foreach (@chars) {
my $dec = ord($_);
my $chr = chr(ord($_));
my $escape = qquote($_);
print "\t$dec\t$chr\t$escape\n";
}
}
sub qquote {
local($_) = shift;
s/([\\\"\@\$])/\\$1/g;
my $bytes; { use bytes; $bytes = length }
s/([[:^ascii:]])/'\x{'.sprintf("%x",ord($1)).'}'/ge if $bytes > length;
return $_;
}
####
81 Q Q
117 u u
101 e e
101 e e
110 n n
115 s s
114 r r
255 {ff}
99 c c
104 h h
101 e e
- this is utf8
unaccented: Queensryche
Queensr
##
##
81 Q Q
117 u u
101 e e
101 e e
110 n n
115 s s
114 r r
195 {c3}
191 {bf}
99 c c
104 h h
101 e e
- this is utf8
unaccented: QueensrA
Queensr˙che
##
##
#use utf8;
my $string = "Queensr˙che";
#no utf8;
##
##
81 Q Q
117 u u
101 e e
101 e e
110 n n
115 s s
114 r r
195
191
99 c c
104 h h
101 e e
- this is NOT utf8
unaccented: QueensrA
Queensr˙che