=pod

=head1 NAME

pdfmerger.pl

=head1 SYNOPSIS

 $perl pdfmerger.pl -outfile <file>
                  [-inpath <directory>]
                  [-inmask <regex>]
                  [-logfile <file>]
                  [-engine <pdf-engine>]
                  [-order <file 1>] [-order <file 2>] [-order <file n>]
                  [-bookmark]
                  [-help]
                  [@optionsfile]

=head1 ARGUMENTS

 -outfile      : req. : none          : file to create
 -inpath       : opt. : ./            : path to source-files
 -inmask       : opt. : /./           : regex to match files (no glob!)
 -logfile      : opt. : pdfmerger.log : name of the logfile
 -bookmark     : opt. : false         : add bookmarks
 -help         : opt. : no            : show help
 -@optionsfile : opt. : none          : file with options
 -order        : opt. : none          : sort merged files
 -engine       : opt. : reuse         : choose between api2|acro|reuse
 -backup       : opt. : false         : backup existing -outfile

=head1 DESCRIPTION:

pdfmerger.pl merges multiple Adobe PDF-files to a single document.
it supports multiple "engines" to produce pdf, namly

=over 3

=item PDF::API2

 due to a bug in PDF::Api2, it seems to be impossible to merge files
 that were merged before.

=item PDF::Reuse

  I encountered a bug with Reuse and documents that were produced by
  MS Word/Acrobat PDF::Writer.

=item Win32 Acrobat OLE-Automation

  No bookmarks, due to the limitations of the Acrobat-Object-Model.

=back

When the -order option is used only the mentioned files are merged.

@optionsfile is a file that contains options for the script, one per line. This is especially
convenient for storing long -order - lists.


=head1 AUTOR

<a href="http://holli.perlmonk.org/">holli</a>


=head1 PREREQUISITIES:

 Getopt::ArgvFile
 Getopt::Attribute
 POD::Usage
 File::Log::Shortcut
 File::Copy
 File::Spec
 File::Basename
 PDF::API2;
 PDF::Reuse;
 Win32::OLE

=head1 HISTORY:

version 0.1

=cut
# version
our $VERSION = 0.1;

# modules
use strict;
use warnings;

use Getopt::ArgvFile;
use Getopt::Attribute;

use Pod::Usage;

use File::Log::Shortcut;
use File::Spec qw(rel2abs);

use File::Copy;
use File::Basename qw (fileparse);

use PDF::API2;
use PDF::Reuse;


# arguments
our $inpath   : Getopt(inpath=s ./);
our $inmask   : Getopt(inmask=s .);
our $outfile  : Getopt(outfile=s);
our $logfile  : Getopt(logfile=s pdfmerger.rpt);
our $bookmark : Getopt(bookmark);
our $help     : Getopt(help);
our $engine   : Getopt(engine=s api2);
our @order    : Getopt(order=s);
our $backup   : Getopt(backup);

# Help?
pod2usage(-exitval=>0, -verbose=>1)
    if $help;

# inpath?
pod2usage(-message => "'$inpath' is no valid directory!\n", -exitval=>1, -verbose=>1)
    unless -e $inpath && -d $inpath;
$inpath .= "/" unless $inpath =~ m:/$:;

# outfile?
pod2usage(-message => "-outfile missing!\n", -exitval=>1, -verbose=>1)
    unless $outfile;

pod2usage(-message => "-outfile is a directory!\n", -exitval=>1, -verbose=>1)
    if -e $outfile && -f $outfile;

# build file list
my $i=0;
my %files = map { $_ => ++$i } grep { /$inmask/i } glob ( "$inpath*.pdf" );

$i=0;
my %order = @order ? map { $inpath.$_ => ++$i } @order : %files;

my @dateien = grep { defined $files{$_} } sort { $order{$a} <=> $order{$b} } keys %order;

die "no files to merge!\n" unless @dateien;

# create log
my $log = File::Log::Shortcut->new
(
    {
        storeExpText    => 1,                   # Store internally all exp text
        authorName      => "Holli",
        logFileName     => $logfile,
        versionFrom     => "20050206",
    }
);

$log->pProgramHeader(2);

# create pdf
eval
{
    $log->msg(2, "using Engine: $engine\n");
    write_pdfs ($outfile, @dateien);
};

# catch errors
if ( $@ )
{
    $log->msg(2,"\n!**ABORT**!\n$@\n$!\n");
    exit 1;
}

$log->pProgramFooter(2);


#engine-dispatcher
sub write_pdfs
{
    my ($outputpdf, $outdir) = fileparse ($_[0]);


    if ( $backup )
    {
        if ( -e "$outdir$outputpdf")
        {
            # backup existing file
            my $i=0;
            while ( -e "$outdir$outputpdf.$i" ) { $i++ };

            move ("$outdir$outputpdf", "$outdir$outputpdf.$i")
                or die "File '$outputpdf' cannot be renamed\n";

            $log->msg(2, "'$outputpdf' renamed to '$outputpdf.$i'\n");
        }
    }
    else
    {
        unlink "$outdir$outputpdf" if -e "$outdir$outputpdf";
    }



    if ( $engine eq "acrobat" )
    {
        write_pdf_acro (@_);
    }
    elsif ( $engine eq "api2" )
    {
        write_pdf_api2 (@_);
    }
    elsif ( $engine eq "reuse" )
    {
        write_pdf_reuse (@_);
    }
    else
    {
        die "Unknow PDF-Engine!\n";
    }

    -e "$outdir$outputpdf" ?
        $log->pFileInfo(2, "$outdir$outputpdf", "File written $outdir$outputpdf : ") :
        die "File '$outputpdf' not written!\n";
}


sub write_pdf_api2
{

    my ($outputpdf, $outdir) = fileparse (shift @_);

    my @pdfFiles = @_;

    my
    (
        $file,
        $pdf,
        $root,
    );

    $pdf = PDF::API2->new( -file => $outdir."\\".$outputpdf );

    #default mediabox to A4
    $pdf->mediabox (0,0,594.9,841.3597);
    $root = $pdf->outlines;

    my $import_page = 0;
    my $document_page = 0;

    foreach $file ( @pdfFiles )
    {
        my ($inputpdf, $inputdir) = fileparse (shift @_);

        my $input = PDF::API2->open( $file );

        my @pages = 1 .. $input->pages;

        if ( scalar @pages > 0 )
        {
            my $outline;
            $outline = $root->outline
                if $bookmark;

            foreach ( @pages )
            {
                ++$import_page;
                ++$document_page;

                my $page = $pdf->importpage($input, $_, $import_page);

                if ( $bookmark )
                {
                    # create bookmark
                    my ($bmtext) = ($inputpdf =~ /([^\.]+)/ );
                    $outline->title($bmtext);

                    my $bm = $outline->outline;
                    $bm->title("page $document_page");

                    $bm->dest($page);
                    $outline->dest($page) if $document_page == 1;
                    $outline->closed;
                }
            }

        }
        $log->pFileInfo(2, "$file", "Processsing $file : ");

    }


    $pdf->preferences( -outlines => 1 )
        if $bookmark;

    $pdf->update;
    $pdf->end;
}


sub write_pdf_acro
{

    eval "use Win32::OLE;";

    my ($outputpdf, $outdir) = fileparse (shift @_);
    my @pdfFiles = map { File::Spec->rel2abs($_) } @_;

    my (
        $_empty_pdf,
        $file,
        $bigpdf,
        $singlepdf,
        $fillpdf,
        $acro,
    );

    my $pages=0;

    $singlepdf  = Win32::OLE->new('AcroExch.AVDoc');
    $bigpdf     = Win32::OLE->new('AcroExch.AVDoc');

    prFile ($outdir."_empty_tmp_.pdf");
    prPage;
    prEnd;

    $bigpdf->open (File::Spec->rel2abs($outdir."_empty_tmp_.pdf"), "");

    foreach $file (@pdfFiles)
    {
        $log->pFileInfo(2, $file, "Processsing $file : ");

        $singlepdf->open($file, "");

        $bigpdf->GetPDDoc->InsertPages
        (
            $bigpdf->GetPDDoc->GetNumPages-1,
            $singlepdf->GetPDDoc(),
            0,
            $singlepdf->GetPDDoc->GetNumPages,
            1
        );

        $pages += $singlepdf->GetPDDoc->GetNumPages;

        $singlepdf->close(1);
    }

    $bigpdf->GetPDDoc->DeletePages ( 0, 0 );

    unless ( $bigpdf->GetPDDoc->GetNumPages == $pages )
    {
        my $p = $bigpdf->GetPDDoc->GetNumPages;

        $bigpdf->close(1),
        #bug in acrobat? some files just don´t get merged
        die "not all pages inserted. shoud be: $pages, is: $p!\n";
    }

    $bigpdf->GetPDDoc->Save(1, File::Spec->rel2abs("$outdir$outputpdf"));

    $bigpdf->close(1);

    unlink $outdir."_empty_.pdf";
}


sub write_pdf_reuse
{
    my ($outputpdf, $outdir) = fileparse (shift @_);
    my @pdfFiles = @_;

    prFile ("$outdir$outputpdf");

    my $pages=0;

    for ( @pdfFiles )
    {
        my $page=prDoc($_);
        if ( $bookmark )
        {
            my ($inputpdf, $indir) = fileparse ($_, /\.pdf$/i);
            prBookmark({ text  => $inputpdf, act   => "this.pageNum = $pages;"});

        }
        $log->pFileInfo(2, $_, "Processsing $_ : ");
        $pages += $page;
    }

    prEnd;
}


__END__