#!/usr/bin/perl
use utf8;
use strict;
use Unicode::String qw(utf8 latin1 utf16);
use Encode;
use CGI;
use HTML::Entities;
require Unicode::Map8;
my $smiley = "\x{263a}";
my $l1_map = Unicode::Map8->new("latin1") || die;
my $win_map = Unicode::Map8->new("cp1252") || die;
my $cgiq = new CGI;
my $qtext = $cgiq->param('textInput');
binmode(STDOUT, ":utf8");
print $cgiq->header(-charset=>'utf-8');
print '
Character conversion test
';
my $encoded = '';
if ($cgiq->param('enc_sniffer') =~ /^\x{263a}$/) {
print "Unicode encoding detected.
\n";
my $u = utf8($qtext);
my $converted = $u->latin1;
$encoded = encode_entities($converted);
}
elsif ($cgiq->param('enc_sniffer') =~ /^\xe2\x98\xba$/ ) {
print "Latin1 encoding detected.
\n";
my $u = utf8($qtext);
my $converted = $u->latin1;
$encoded = encode_entities($converted);
}
elsif ($cgiq->param('enc_sniffer') =~ /not sure what to put here/ ) {
print "Windows 1252 encoding detected.
\n";
$encoded = TransWin1252($qtext);
}
print ' enc_sniffer: ' . $smiley;
print "\n\n";
print ' Text submitted:
' . $qtext . '
';
print ' Encoded:
' . $encoded . '
';
exit;
sub TransWin1252 {
my $s = $_[0];
$s =~ s/\x80/€/g;
$s =~ s/\x81/ /g;
$s =~ s/\x82/‚/g;
$s =~ s/\x83/ƒ/g;
$s =~ s/\x84/„/g;
$s =~ s/\x85/…/g;
$s =~ s/\x86/†/g;
$s =~ s/\x87/‡/g;
$s =~ s/\x88/ˆ/g;
$s =~ s/\x89/‰/g;
$s =~ s/\x8A/Š/g;
$s =~ s/\x8B/‹/g;
$s =~ s/\x8C/Œ/g;
$s =~ s/\x8D/ /g;
$s =~ s/\x8E/Ž/g;
$s =~ s/\x8F/ /g;
$s =~ s/\x90/ /g;
$s =~ s/\x91/‘/g;
$s =~ s/\x92/’/g;
$s =~ s/\x93/“/g;
$s =~ s/\x94/”/g;
$s =~ s/\x95/•/g;
$s =~ s/\x96/–/g;
$s =~ s/\x97/—/g;
$s =~ s/\x98/˜/g;
$s =~ s/\x99/™/g;
$s =~ s/\x9A/š/g;
$s =~ s/\x9B/›/g;
$s =~ s/\x9C/œ/g;
$s =~ s/\x9D/ /g;
$s =~ s/\x9E/ž/g;
$s =~ s/\x9F/Ÿ/g;
$s =~ s/\xA0/ /g;
$s =~ s/\xA1/¡/g;
$s =~ s/\xA2/¢/g;
$s =~ s/\xA3/£/g;
$s =~ s/\xA4/¤/g;
$s =~ s/\xA5/¥/g;
$s =~ s/\xA6/¦/g;
$s =~ s/\xA7/§/g;
$s =~ s/\xA8/¨/g;
$s =~ s/\xA9/©/g;
$s =~ s/\xAA/ª/g;
$s =~ s/\xAB/«/g;
$s =~ s/\xAC/¬/g;
$s =~ s/\xAD//g;
$s =~ s/\xAE/®/g;
$s =~ s/\xAF/¯/g;
$s =~ s/\xB0/°/g;
$s =~ s/\xB1/±/g;
$s =~ s/\xB2/²/g;
$s =~ s/\x83/³/g;
$s =~ s/\xB4/´/g;
$s =~ s/\xB5/µ/g;
$s =~ s/\xB6/¶/g;
$s =~ s/\xB7/·/g;
$s =~ s/\xB8/¸/g;
$s =~ s/\xB9/¹/g;
$s =~ s/\xBA/º/g;
$s =~ s/\xBB/»/g;
$s =~ s/\xBC/¼/g;
$s =~ s/\xBD/½/g;
$s =~ s/\xBE/¾/g;
$s =~ s/\xBF/¿/g;
$s =~ s/\xC0/À/g;
$s =~ s/\xC1/Á/g;
$s =~ s/\xC2/Â/g;
$s =~ s/\x83/Ã/g;
$s =~ s/\xC4/Ä/g;
$s =~ s/\xC5/Å/g;
$s =~ s/\xC6/Æ/g;
$s =~ s/\xC7/Ç/g;
$s =~ s/\xC8/È/g;
$s =~ s/\xC9/É/g;
$s =~ s/\xCA/Ê/g;
$s =~ s/\xCB/Ë/g;
$s =~ s/\xCC/Ì/g;
$s =~ s/\xCD/Í/g;
$s =~ s/\xCE/Î/g;
$s =~ s/\xCF/Ï/g;
$s =~ s/\xD0/Ð/g;
$s =~ s/\xD1/Ñ/g;
$s =~ s/\xD2/Ò/g;
$s =~ s/\x83/Ó/g;
$s =~ s/\xD4/Ô/g;
$s =~ s/\xD5/Õ/g;
$s =~ s/\xD6/Ö/g;
$s =~ s/\xD7/×/g;
$s =~ s/\xD8/Ø/g;
$s =~ s/\xD9/Ù/g;
$s =~ s/\xDA/Ú/g;
$s =~ s/\xDB/Û/g;
$s =~ s/\xDC/Ü/g;
$s =~ s/\xDD/Ý/g;
$s =~ s/\xDE/Þ/g;
$s =~ s/\xDF/ß/g;
$s =~ s/\xE0/à/g;
$s =~ s/\xE1/á/g;
$s =~ s/\xE2/â/g;
$s =~ s/\x83/ã/g;
$s =~ s/\xE4/ä/g;
$s =~ s/\xE5/å/g;
$s =~ s/\xE6/æ/g;
$s =~ s/\xE7/ç/g;
$s =~ s/\xE8/è/g;
$s =~ s/\xE9/é/g;
$s =~ s/\xEA/ê/g;
$s =~ s/\xEB/ë/g;
$s =~ s/\xEC/ì/g;
$s =~ s/\xED/í/g;
$s =~ s/\xEE/î/g;
$s =~ s/\xEF/ï/g;
$s =~ s/\xF0/ð/g;
$s =~ s/\xF1/ñ/g;
$s =~ s/\xF2/ò/g;
$s =~ s/\x83/ó/g;
$s =~ s/\xF4/ô/g;
$s =~ s/\xF5/õ/g;
$s =~ s/\xF6/ö/g;
$s =~ s/\xF7/÷/g;
$s =~ s/\xF8/ø/g;
$s =~ s/\xF9/ù/g;
$s =~ s/\xFA/ú/g;
$s =~ s/\xFB/û/g;
$s =~ s/\xFC/ü/g;
$s =~ s/\xFD/ý/g;
$s =~ s/\xFE/þ/g;
$s =~ s/\xFF/ÿ/g;
return($s);
}