my(%encoding,%decoding); sub UTF8::chr ($) { my $ord = shift; if($ord && $ord < 0x80) { return chr $ord; # OR: pack 'C', $ord; } elsif ($ord < 0x800) { return pack 'C2', 0xC0 | ($ord>>6), 0x80 | ($ord & 0x3F); } else { return pack 'C3', 0xE0 | ($ord>>12), 0x80 | (($ord>>6) & 0x3F), 0x80 | ($ord & 0x3F); } } #initialize for my $ord (0, 128 .. 256) { $encoding{chr $ord} = UTF8::chr($ord); } %decoding = reverse %encoding; sub UTF8_to_L1 { foreach (@_ = @_) { s/(\000|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xFF][\x80-\xBF][\x80-\xBF])/$decoding{$1} || "(#$1#)"/ge; } return wantarray ? @_ : pop; } sub L1_to_UTF8 { foreach (@_ = @_) { s/([\000\x80-\xFF])/$encoding{$1}/g; } return wantarray?@arg:$arg[-1]; }