#!/usr/bin/perl -w

=pod

head1 NAME

PostScript::Glyph::MapToUnicode - PostScript glyph name to Unicode conversion

=head1 SYNOPSIS

 use PostScript::Glyph::MapToUnicode file => '/usr/doc/PostScript/aglfn13.txt';
 print PostScript::Glyph::MapToUnicode::map('Euro'), "\n";

=head1 DESCRIPTION

This module implements (most of - see L</"BUGS">) the PostScript glyph name to
Unicode code point conversion algorithm described by Adobe at
L<http://partners.adobe.com/asn/tech/type/unicodegn.jsp>.

To do something more than marginally useful with this module you should
download the B<Adobe Glyph List> from
L<http://partners.adobe.com/asn/tech/type/aglfn13.txt>.

=head1 INTERFACE

=over 4

=item parse_adobeglyphlist()

This function parses an B<Adobe Glyph List> file and returns true on success.
On failure, it returns false and supplies an error message in the package
variable C<$ERROR>. It expects its first argument to specify how to retrieve
the data. The following options exist: 

=over 4

=item file

Takes the name of a file containing the B<Adobe Glyph List>.

=item fh

Takes a filehandle reference that should be open on a file containing the Adobe
Glyph List.

=item array

Takes an array reference. Each array element is expected to contain one line
from the B<Adobe Glyph List>.

=item data

Takes a scalar that is expected to contain the entire B<Adobe Glyph List> file.

=back

For convenience, you can pass the same parameter to the module's C<import()>
function, as exemplified in L</"SYNOPSIS">. It will croak if it encounters any
errors.

=item map()

Takes a list of strings containing whitespace separated PostScript glyphs and
returns them concatenated as a single string in Unicode encoding. You may want
to memoize this function when processing large PostScript documents.

=back

=head1 BUGS

C<map()> does not take the font into account, so it will produce incorrect
results for glyphs from the B<ZapfDingbats> font.

=head1 AUTHOR

Aristotle Pagaltzis L<mailto:pagaltzis@gmx.de>

=head1 COPYRIGHT

This program is Copyright (c)2003 Aristotle Pagaltzis.  All rights reserved.

This program is free software; you can redistribute it and/or modify it under
the terms of either: a) the GNU General Public License as published by the Free
Software Foundation; either version 1, or (at your option) any later version,
or b) the "Artistic License" which comes with Perl.

=head1 DISCLAIMER

This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See either the GNU General Public License or the Artistic
License for more details. 

=cut

package PostScript::Glyph::MapToUnicode;
use strict;
use vars qw($ERROR);

my $uni_notation = qr{
	\A uni
	(
		(?:
			[0-9ABCEF] [\dA-F] {3}
			|
			D [0-7] [\dA-F] {2}
		)+
	)
	\z
}x;

my $u_notation = qr{
	\A u
	(
		[0-9ABCEF] [\dA-F] {3,5}
		|
		(?:
			D [0-7] [\dA-F] {2,3}
			|
			D [8-9A-F] [\dA-F] {3}
		)
	)
	\z
}x;

my %agl;

sub map {
	my $digits;
	return
		join '',
	 	map {
	 		exists $agl{$_}
	 			? $agl{$_}
	 		: (($digits) = m/$uni_notation/)
	 			? map { pack "U", hex } $digits =~ /(....)/g
	 		: (($digits) = m/$u_notation/)
	 			? pack "U", hex $digits
	 		: do { '' };
	 	}
	 	map { split /_/ }
	 	map { /\A(.+?)\./ ? $1 : $_ }
		map { split }
		@_;
}

sub parse_adobeglyphlist {
	my $method = shift;
	my $data =
		$method eq 'array' ? do {
			my $array = shift;
			unless(ref $array eq 'ARRAY') {
				$ERROR = "Expected array reference in '$array'";
				return;
			}
			$array;
		}
		: $method  eq 'data' ? [ split /^/m, shift ]
		: ($method eq 'file' or $method eq 'fh') ? do {
			my $fh = $method eq 'fh'
				? shift
				: do {
					open my $fh, '<', shift or ($ERROR = "$!", return);
					$fh;
				};
			[ <$fh> ];
		}
		: ($ERROR = "Unknown parsing interface '$method'", return);
	
	%agl = do {
		@$data = grep !/\A (?: \# | \s* \z)/x, @$data;
		chomp @$data;
		map {
			my ($code_pt, $glyph) = split /;/;
			($glyph => pack "U", hex $code_pt);
		} @$data;
	};

	delete $agl{'.notdef'};

	return 1;
}

sub import {
	shift;
	unless(&parse_adobeglyphlist) {
		require Carp;
		Carp::croak("Failed to parse AdobeGlyphList: $ERROR");
	}
}

1;