#!/usr/bin/perl use strict; use warnings; use HTML::Entities; # random selection of cp1252 goodies my $str = join('', chr(0x80), chr(0x81), chr(0x91), chr(0x92), chr(0x93), chr(0x94), chr(0x95), chr(0x96), ); my $original = $str; # delete any chars not assigned $str =~ tr/\x81\x8D\x8F\x90\x9D//d; # replace the rest $str =~ tr{\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8E\x91\x9 +2\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9E\x9F} {\x{20AC}\x{201A}\x{0192}\x{201E}\x{2026}\x{2020}\x{2021}\x{ +02C6}\x{2030}\x{0160}\x{2039}\x{0152}\x{017D}\x{2018}\x{2019}\x{201C} +\x{201D}\x{2022}\x{2013}\x{2014}\x{02DC}\x{2122}\x{0161}\x{203A}\x{01 +53}\x{017E}\x{0178}/}; # check what happened without trying to print wide chars my $encoded = encode_entities($str); $str =~ s/(.)/sprintf( "\\x{%x}", ord($1))/eg; print qq{original: $original\n}; print qq{hex: $str\n}; print qq{encoded: $encoded\n}; print qq{done\n}; __DATA__ 80 0x20AC 81 82 0x201A 83 0x0192 84 0x201E 85 0x2026 86 0x2020 87 0x2021 88 0x02C6 89 0x2030 8A 0x0160 8B 0x2039 8C 0x0152 8D 8E 0x017D 8F 90 91 0x2018 92 0x2019 93 0x201C 94 0x201D 95 0x2022 96 0x2013 97 0x2014 98 0x02DC 99 0x2122 9A 0x0161 9B 0x203A 9C 0x0153 9D 9E 0x017E 9F 0x0178
In reply to Transliterate cp1252 0x80-0x9f to utf8 equivalents by wfsp
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |