#! /usr/bin/perl -w use strict; use Encode; # cp437 should be the old IBM PC character set, which is used in a # lot of Gutenberg etexts from the 1990s #use Encode::Guess qw( iso-8859-1 cp437 ); use Encode::Guess qw( iso-8859-1 ); #use Encode::Guess; foreach ( @ARGV ) { open FH, $_ or die qq(can't open "$_" for reading\n); my @lines = ; close FH; my $content = join '\n', @lines; my $decoder = eval { Encode::Guess->guess( $content ); }; if ( $@ ) { my $eval_err = chomp $@; print qq($_: Encode::Guess->guess() failed horribly: "$eval_err"\n); next; } if ( ref $decoder ) { print "$_: appears to be " . $decoder->name . "\n"; } else { print "$_: bad decoder returned by Encode::Guess->guess() "; print ( ( defined $decoder ) ? $decoder : "(undefined)" ); print "\n"; } } #### /home/jim/Documents/homepage/gzb/drafts/universal-violations.txt: bad decoder returned by Encode::Guess->guess() iso-8859-1 or utf8