#!/usr/bin/perl use utf8; use strict; use Unicode::String qw(utf8 latin1 utf16); use Encode; use CGI; use HTML::Entities; require Unicode::Map8; my $smiley = "\x{263a}"; my $l1_map = Unicode::Map8->new("latin1") || die; my $win_map = Unicode::Map8->new("cp1252") || die; my $cgiq = new CGI; my $qtext = $cgiq->param('textInput'); binmode(STDOUT, ":utf8"); print $cgiq->header(-charset=>'utf-8'); print ' Character conversion test '; my $encoded = ''; if ($cgiq->param('enc_sniffer') =~ /^\x{263a}$/) { print "

Unicode encoding detected.

\n"; my $u = utf8($qtext); my $converted = $u->latin1; $encoded = encode_entities($converted); } elsif ($cgiq->param('enc_sniffer') =~ /^\xe2\x98\xba$/ ) { print "

Latin1 encoding detected.

\n"; my $u = utf8($qtext); my $converted = $u->latin1; $encoded = encode_entities($converted); } elsif ($cgiq->param('enc_sniffer') =~ /not sure what to put here/ ) { print "

Windows 1252 encoding detected.

\n"; $encoded = TransWin1252($qtext); } print ' enc_sniffer: ' . $smiley; print "\n

\n"; print ' Text submitted:
' . $qtext . '

'; print ' Encoded:
' . $encoded . '

'; exit; sub TransWin1252 { my $s = $_[0]; $s =~ s/\x80/€/g; $s =~ s/\x81/ /g; $s =~ s/\x82/‚/g; $s =~ s/\x83/ƒ/g; $s =~ s/\x84/„/g; $s =~ s/\x85/…/g; $s =~ s/\x86/†/g; $s =~ s/\x87/‡/g; $s =~ s/\x88/ˆ/g; $s =~ s/\x89/‰/g; $s =~ s/\x8A/Š/g; $s =~ s/\x8B/‹/g; $s =~ s/\x8C/Œ/g; $s =~ s/\x8D/ /g; $s =~ s/\x8E/Ž/g; $s =~ s/\x8F/ /g; $s =~ s/\x90/ /g; $s =~ s/\x91/‘/g; $s =~ s/\x92/’/g; $s =~ s/\x93/“/g; $s =~ s/\x94/”/g; $s =~ s/\x95/•/g; $s =~ s/\x96/–/g; $s =~ s/\x97/—/g; $s =~ s/\x98/˜/g; $s =~ s/\x99/™/g; $s =~ s/\x9A/š/g; $s =~ s/\x9B/›/g; $s =~ s/\x9C/œ/g; $s =~ s/\x9D/ /g; $s =~ s/\x9E/ž/g; $s =~ s/\x9F/Ÿ/g; $s =~ s/\xA0/ /g; $s =~ s/\xA1/¡/g; $s =~ s/\xA2/¢/g; $s =~ s/\xA3/£/g; $s =~ s/\xA4/¤/g; $s =~ s/\xA5/¥/g; $s =~ s/\xA6/¦/g; $s =~ s/\xA7/§/g; $s =~ s/\xA8/¨/g; $s =~ s/\xA9/©/g; $s =~ s/\xAA/ª/g; $s =~ s/\xAB/«/g; $s =~ s/\xAC/¬/g; $s =~ s/\xAD/­/g; $s =~ s/\xAE/®/g; $s =~ s/\xAF/¯/g; $s =~ s/\xB0/°/g; $s =~ s/\xB1/±/g; $s =~ s/\xB2/²/g; $s =~ s/\x83/³/g; $s =~ s/\xB4/´/g; $s =~ s/\xB5/µ/g; $s =~ s/\xB6/¶/g; $s =~ s/\xB7/·/g; $s =~ s/\xB8/¸/g; $s =~ s/\xB9/¹/g; $s =~ s/\xBA/º/g; $s =~ s/\xBB/»/g; $s =~ s/\xBC/¼/g; $s =~ s/\xBD/½/g; $s =~ s/\xBE/¾/g; $s =~ s/\xBF/¿/g; $s =~ s/\xC0/À/g; $s =~ s/\xC1/Á/g; $s =~ s/\xC2/Â/g; $s =~ s/\x83/Ã/g; $s =~ s/\xC4/Ä/g; $s =~ s/\xC5/Å/g; $s =~ s/\xC6/Æ/g; $s =~ s/\xC7/Ç/g; $s =~ s/\xC8/È/g; $s =~ s/\xC9/É/g; $s =~ s/\xCA/Ê/g; $s =~ s/\xCB/Ë/g; $s =~ s/\xCC/Ì/g; $s =~ s/\xCD/Í/g; $s =~ s/\xCE/Î/g; $s =~ s/\xCF/Ï/g; $s =~ s/\xD0/Ð/g; $s =~ s/\xD1/Ñ/g; $s =~ s/\xD2/Ò/g; $s =~ s/\x83/Ó/g; $s =~ s/\xD4/Ô/g; $s =~ s/\xD5/Õ/g; $s =~ s/\xD6/Ö/g; $s =~ s/\xD7/×/g; $s =~ s/\xD8/Ø/g; $s =~ s/\xD9/Ù/g; $s =~ s/\xDA/Ú/g; $s =~ s/\xDB/Û/g; $s =~ s/\xDC/Ü/g; $s =~ s/\xDD/Ý/g; $s =~ s/\xDE/Þ/g; $s =~ s/\xDF/ß/g; $s =~ s/\xE0/à/g; $s =~ s/\xE1/á/g; $s =~ s/\xE2/â/g; $s =~ s/\x83/ã/g; $s =~ s/\xE4/ä/g; $s =~ s/\xE5/å/g; $s =~ s/\xE6/æ/g; $s =~ s/\xE7/ç/g; $s =~ s/\xE8/è/g; $s =~ s/\xE9/é/g; $s =~ s/\xEA/ê/g; $s =~ s/\xEB/ë/g; $s =~ s/\xEC/ì/g; $s =~ s/\xED/í/g; $s =~ s/\xEE/î/g; $s =~ s/\xEF/ï/g; $s =~ s/\xF0/ð/g; $s =~ s/\xF1/ñ/g; $s =~ s/\xF2/ò/g; $s =~ s/\x83/ó/g; $s =~ s/\xF4/ô/g; $s =~ s/\xF5/õ/g; $s =~ s/\xF6/ö/g; $s =~ s/\xF7/÷/g; $s =~ s/\xF8/ø/g; $s =~ s/\xF9/ù/g; $s =~ s/\xFA/ú/g; $s =~ s/\xFB/û/g; $s =~ s/\xFC/ü/g; $s =~ s/\xFD/ý/g; $s =~ s/\xFE/þ/g; $s =~ s/\xFF/ÿ/g; return($s); }