#!/usr/bin/env perl use warnings; use strict; use open qw/:std :utf8/; use Text::Unidecode; use Encode; print "# Text::Unidecode demo:\n"; my $test = "\N{U+201C}test\N{U+201D} \N{U+2013} test\N{U+2026}"; print " original: ", $test, "\n"; print "asciified: ", unidecode($test), "\n"; # set up some test data my $str = "\N{U+CF} spent 20\N{U+20AC} \N{U+C3}t the c\N{U+AA}f\N{U+E9}\n"; { open my $fh1, '>:raw:encoding(CP-1252)', 'one.txt' or die $!; print $fh1 $str; close $fh1; open my $fh2, '>:raw:encoding(Latin-9)', 'two.txt' or die $!; print $fh2 $str; close $fh2; open my $fh3, '>:raw:encoding(UTF-8)', 'three.txt' or die $!; print $fh3 $str; close $fh3; } my $expected_chars = qr/[\N{U+20AC}]/u; # heuristic my $unexpected_chars = qr/[\N{U+80}]/u; # heuristic for my $file (qw/ one.txt two.txt three.txt /) { # slurp the raw file as undecoded bytes open my $fh, '<:raw', $file or die "$file: $!"; my $bytes = do { local $/; <$fh> }; close $fh; my $string; # try different encodings for my $enc (qw/ UTF-8 Latin-9 CP-1252 /) { $string = eval { decode($enc, $bytes, Encode::FB_CROAK|Encode::LEAVE_SRC) }; if ( defined $string && $string =~ $expected_chars && $string !~ $unexpected_chars ) { print "### $file looks like $enc\n"; last } else { print "### $file is NOT $enc\n" } } die "Failed to decode $file" unless defined $string; print $string; print unidecode($string); }