=pod =head1 NAME pdfmerger.pl =head1 SYNOPSIS $perl pdfmerger.pl -outfile [-inpath ] [-inmask ] [-logfile ] [-engine ] [-order ] [-order ] [-order ] [-bookmark] [-help] [@optionsfile] =head1 ARGUMENTS -outfile : req. : none : file to create -inpath : opt. : ./ : path to source-files -inmask : opt. : /./ : regex to match files (no glob!) -logfile : opt. : pdfmerger.log : name of the logfile -bookmark : opt. : false : add bookmarks -help : opt. : no : show help -@optionsfile : opt. : none : file with options -order : opt. : none : sort merged files -engine : opt. : reuse : choose between api2|acro|reuse -backup : opt. : false : backup existing -outfile =head1 DESCRIPTION: pdfmerger.pl merges multiple Adobe PDF-files to a single document. it supports multiple "engines" to produce pdf, namly =over 3 =item PDF::API2 due to a bug in PDF::Api2, it seems to be impossible to merge files that were merged before. =item PDF::Reuse I encountered a bug with Reuse and documents that were produced by MS Word/Acrobat PDF::Writer. =item Win32 Acrobat OLE-Automation No bookmarks, due to the limitations of the Acrobat-Object-Model. =back When the -order option is used only the mentioned files are merged. @optionsfile is a file that contains options for the script, one per line. This is especially convenient for storing long -order - lists. =head1 AUTOR holli =head1 PREREQUISITIES: Getopt::ArgvFile Getopt::Attribute POD::Usage File::Log::Shortcut File::Copy File::Spec File::Basename PDF::API2; PDF::Reuse; Win32::OLE =head1 HISTORY: version 0.1 =cut # version our $VERSION = 0.1; # modules use strict; use warnings; use Getopt::ArgvFile; use Getopt::Attribute; use Pod::Usage; use File::Log::Shortcut; use File::Spec qw(rel2abs); use File::Copy; use File::Basename qw (fileparse); use PDF::API2; use PDF::Reuse; # arguments our $inpath : Getopt(inpath=s ./); our $inmask : Getopt(inmask=s .); our $outfile : Getopt(outfile=s); our $logfile : Getopt(logfile=s pdfmerger.rpt); our $bookmark : Getopt(bookmark); our $help : Getopt(help); our $engine : Getopt(engine=s api2); our @order : Getopt(order=s); our $backup : Getopt(backup); # Help? pod2usage(-exitval=>0, -verbose=>1) if $help; # inpath? pod2usage(-message => "'$inpath' is no valid directory!\n", -exitval=>1, -verbose=>1) unless -e $inpath && -d $inpath; $inpath .= "/" unless $inpath =~ m:/$:; # outfile? pod2usage(-message => "-outfile missing!\n", -exitval=>1, -verbose=>1) unless $outfile; pod2usage(-message => "-outfile is a directory!\n", -exitval=>1, -verbose=>1) if -e $outfile && -f $outfile; # build file list my $i=0; my %files = map { $_ => ++$i } grep { /$inmask/i } glob ( "$inpath*.pdf" ); $i=0; my %order = @order ? map { $inpath.$_ => ++$i } @order : %files; my @dateien = grep { defined $files{$_} } sort { $order{$a} <=> $order{$b} } keys %order; die "no files to merge!\n" unless @dateien; # create log my $log = File::Log::Shortcut->new ( { storeExpText => 1, # Store internally all exp text authorName => "Holli", logFileName => $logfile, versionFrom => "20050206", } ); $log->pProgramHeader(2); # create pdf eval { $log->msg(2, "using Engine: $engine\n"); write_pdfs ($outfile, @dateien); }; # catch errors if ( $@ ) { $log->msg(2,"\n!**ABORT**!\n$@\n$!\n"); exit 1; } $log->pProgramFooter(2); #engine-dispatcher sub write_pdfs { my ($outputpdf, $outdir) = fileparse ($_[0]); if ( $backup ) { if ( -e "$outdir$outputpdf") { # backup existing file my $i=0; while ( -e "$outdir$outputpdf.$i" ) { $i++ }; move ("$outdir$outputpdf", "$outdir$outputpdf.$i") or die "File '$outputpdf' cannot be renamed\n"; $log->msg(2, "'$outputpdf' renamed to '$outputpdf.$i'\n"); } } else { unlink "$outdir$outputpdf" if -e "$outdir$outputpdf"; } if ( $engine eq "acrobat" ) { write_pdf_acro (@_); } elsif ( $engine eq "api2" ) { write_pdf_api2 (@_); } elsif ( $engine eq "reuse" ) { write_pdf_reuse (@_); } else { die "Unknow PDF-Engine!\n"; } -e "$outdir$outputpdf" ? $log->pFileInfo(2, "$outdir$outputpdf", "File written $outdir$outputpdf : ") : die "File '$outputpdf' not written!\n"; } sub write_pdf_api2 { my ($outputpdf, $outdir) = fileparse (shift @_); my @pdfFiles = @_; my ( $file, $pdf, $root, ); $pdf = PDF::API2->new( -file => $outdir."\\".$outputpdf ); #default mediabox to A4 $pdf->mediabox (0,0,594.9,841.3597); $root = $pdf->outlines; my $import_page = 0; my $document_page = 0; foreach $file ( @pdfFiles ) { my ($inputpdf, $inputdir) = fileparse (shift @_); my $input = PDF::API2->open( $file ); my @pages = 1 .. $input->pages; if ( scalar @pages > 0 ) { my $outline; $outline = $root->outline if $bookmark; foreach ( @pages ) { ++$import_page; ++$document_page; my $page = $pdf->importpage($input, $_, $import_page); if ( $bookmark ) { # create bookmark my ($bmtext) = ($inputpdf =~ /([^\.]+)/ ); $outline->title($bmtext); my $bm = $outline->outline; $bm->title("page $document_page"); $bm->dest($page); $outline->dest($page) if $document_page == 1; $outline->closed; } } } $log->pFileInfo(2, "$file", "Processsing $file : "); } $pdf->preferences( -outlines => 1 ) if $bookmark; $pdf->update; $pdf->end; } sub write_pdf_acro { eval "use Win32::OLE;"; my ($outputpdf, $outdir) = fileparse (shift @_); my @pdfFiles = map { File::Spec->rel2abs($_) } @_; my ( $_empty_pdf, $file, $bigpdf, $singlepdf, $fillpdf, $acro, ); my $pages=0; $singlepdf = Win32::OLE->new('AcroExch.AVDoc'); $bigpdf = Win32::OLE->new('AcroExch.AVDoc'); prFile ($outdir."_empty_tmp_.pdf"); prPage; prEnd; $bigpdf->open (File::Spec->rel2abs($outdir."_empty_tmp_.pdf"), ""); foreach $file (@pdfFiles) { $log->pFileInfo(2, $file, "Processsing $file : "); $singlepdf->open($file, ""); $bigpdf->GetPDDoc->InsertPages ( $bigpdf->GetPDDoc->GetNumPages-1, $singlepdf->GetPDDoc(), 0, $singlepdf->GetPDDoc->GetNumPages, 1 ); $pages += $singlepdf->GetPDDoc->GetNumPages; $singlepdf->close(1); } $bigpdf->GetPDDoc->DeletePages ( 0, 0 ); unless ( $bigpdf->GetPDDoc->GetNumPages == $pages ) { my $p = $bigpdf->GetPDDoc->GetNumPages; $bigpdf->close(1), #bug in acrobat? some files just donīt get merged die "not all pages inserted. shoud be: $pages, is: $p!\n"; } $bigpdf->GetPDDoc->Save(1, File::Spec->rel2abs("$outdir$outputpdf")); $bigpdf->close(1); unlink $outdir."_empty_.pdf"; } sub write_pdf_reuse { my ($outputpdf, $outdir) = fileparse (shift @_); my @pdfFiles = @_; prFile ("$outdir$outputpdf"); my $pages=0; for ( @pdfFiles ) { my $page=prDoc($_); if ( $bookmark ) { my ($inputpdf, $indir) = fileparse ($_, /\.pdf$/i); prBookmark({ text => $inputpdf, act => "this.pageNum = $pages;"}); } $log->pFileInfo(2, $_, "Processsing $_ : "); $pages += $page; } prEnd; } __END__