#!/usr/bin/perl -w use strict; use warnings; #Convert annoying, worthless 'fullwidth' Latin-1 characters #to their semi-sane normal ASCII counterparts my(%codes,$wide,$ascii,$x); #this is the land where the fullwidth latin-1 characters reside for($x=65281;$x<65374;$x++){ ($wide,$ascii) = make_codes($x); $codes{$wide} = $ascii; } while(<>){ chomp; foreach my $utf(keys %codes){ s/$utf/$codes{$utf}/g; } print $_."\n"; } sub make_codes{ my $ud = $_[0]; my $from = ud_to_utf8hex($ud); #subtract 65248 to get the ASCII value my $to = ud_to_utf8hex($ud-65248); return($from, $to); } sub ud_to_utf8hex{ my $ud = $_[0]; my ($b1,$b2,$b3); if($ud >= 0 && $ud <= 127){ #basic ASCII values don't need to be altered return(sprintf("%c",$ud)); }elsif($ud >= 2048 && $ud <= 65535){ #valid for 2048 <= $ud <= 65535 $b1 = 224 + sprintf("%d", ($ud/4096)); $b2 = 128 + (($ud/64) % 64); $b3 = 128 + ($ud % 64); } return(sprintf("\\x\{%X\}\\x\{%X\}\\x\{%X\}",$b1,$b2,$b3)); }