#!/usr/bin/perl -w =pod head1 NAME PostScript::Glyph::MapToUnicode - PostScript glyph name to Unicode conversion =head1 SYNOPSIS use PostScript::Glyph::MapToUnicode file => '/usr/doc/PostScript/aglfn13.txt'; print PostScript::Glyph::MapToUnicode::map('Euro'), "\n"; =head1 DESCRIPTION This module implements (most of - see L) the PostScript glyph name to Unicode code point conversion algorithm described by Adobe at L. To do something more than marginally useful with this module you should download the B from L. =head1 INTERFACE =over 4 =item parse_adobeglyphlist() This function parses an B file and returns true on success. On failure, it returns false and supplies an error message in the package variable C<$ERROR>. It expects its first argument to specify how to retrieve the data. The following options exist: =over 4 =item file Takes the name of a file containing the B. =item fh Takes a filehandle reference that should be open on a file containing the Adobe Glyph List. =item array Takes an array reference. Each array element is expected to contain one line from the B. =item data Takes a scalar that is expected to contain the entire B file. =back For convenience, you can pass the same parameter to the module's C function, as exemplified in L. It will croak if it encounters any errors. =item map() Takes a list of strings containing whitespace separated PostScript glyphs and returns them concatenated as a single string in Unicode encoding. You may want to memoize this function when processing large PostScript documents. =back =head1 BUGS C does not take the font into account, so it will produce incorrect results for glyphs from the B font. =head1 AUTHOR Aristotle Pagaltzis L =head1 COPYRIGHT This program is Copyright (c)2003 Aristotle Pagaltzis. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of either: a) the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version, or b) the "Artistic License" which comes with Perl. =head1 DISCLAIMER This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See either the GNU General Public License or the Artistic License for more details. =cut package PostScript::Glyph::MapToUnicode; use strict; use vars qw($ERROR); my $uni_notation = qr{ \A uni ( (?: [0-9ABCEF] [\dA-F] {3} | D [0-7] [\dA-F] {2} )+ ) \z }x; my $u_notation = qr{ \A u ( [0-9ABCEF] [\dA-F] {3,5} | (?: D [0-7] [\dA-F] {2,3} | D [8-9A-F] [\dA-F] {3} ) ) \z }x; my %agl; sub map { my $digits; return join '', map { exists $agl{$_} ? $agl{$_} : (($digits) = m/$uni_notation/) ? map { pack "U", hex } $digits =~ /(....)/g : (($digits) = m/$u_notation/) ? pack "U", hex $digits : do { '' }; } map { split /_/ } map { /\A(.+?)\./ ? $1 : $_ } map { split } @_; } sub parse_adobeglyphlist { my $method = shift; my $data = $method eq 'array' ? do { my $array = shift; unless(ref $array eq 'ARRAY') { $ERROR = "Expected array reference in '$array'"; return; } $array; } : $method eq 'data' ? [ split /^/m, shift ] : ($method eq 'file' or $method eq 'fh') ? do { my $fh = $method eq 'fh' ? shift : do { open my $fh, '<', shift or ($ERROR = "$!", return); $fh; }; [ <$fh> ]; } : ($ERROR = "Unknown parsing interface '$method'", return); %agl = do { @$data = grep !/\A (?: \# | \s* \z)/x, @$data; chomp @$data; map { my ($code_pt, $glyph) = split /;/; ($glyph => pack "U", hex $code_pt); } @$data; }; delete $agl{'.notdef'}; return 1; } sub import { shift; unless(&parse_adobeglyphlist) { require Carp; Carp::croak("Failed to parse AdobeGlyphList: $ERROR"); } } 1;