Beefy Boxes and Bandwidth Generously Provided by pair Networks
good chemistry is complicated,
and a little bit messy -LW
 
PerlMonks  

PDF Concatenation and Extraction Tool

by rob_au (Abbot)
on Jan 14, 2004 at 21:59 UTC ( [id://321391]=sourcecode: print w/replies, xml ) Need Help??
Category: Utility Scripts
Author/Contact Info /msg rob_au
Description: This is a PDF concatenation tool designed to merge PDF files or portions thereof together to a single output PDF file. The command line arguments for this tool take the form:

pdfcat.perl [input files ...] [options] [output file]

-i|--input [filename]

Specify an input file for concatenation into the output file. If a single file is specified with the --page parameter, this script can also be used for extracting specific page ranges.

-o|--output [filename]

Specify the output file for concatenated PDF output.

-p|--page|--pages

This argument, which follows an input file argument, defines the pages to be extracted for concatenation from a given input file. If this argument is not defined, all pages from the input file are concatenated. The pages specified for extraction may be separated by commas or designed by ranges.

For example, the arguments --input input.pdf --pages 1,4-6 would result in pages 1, 4, 5 and 6 inclusively being extracted for concatenation.

#!/usr/bin/perl


use File::Basename;
use Getopt::Long;
use PDF::API2;

use strict;


#   Process command line arguments and populate corresponding variable
+s

my %pages = ();
my @input = ();

GetOptions(
    'i|input=s'         =>  \@input,
    'o|output=s'        =>  \( my $output = '' ),
    'p|page|pages=s'    =>  sub {

        if (scalar @input > 0) {

        #   If an input file name has previously been defined, associa
+te the given page 
        #   ranges to be extracted with the last input file name suppl
+ied.

            my @files = split /,/, $input[-1];
            push @{ $pages{ $_ } }, $_[1] foreach @files;
        }
    }
);

exit 1 unless scalar @input > 0 and length $output > 0;


#   Split the input files specified on any comma characters present - 
+This 
#   allows for multiple input files to be specified either by multiple
+ --input 
#   arguments or by a single argument in a comma delimited fashion.

@input = map { split /,/ } @input;


#   Open the PDF file for output (via the PDF::API2 object constructor
+)

my $pdf = PDF::API2->new( -file => $output );
my $root = $pdf->outlines;


#   Step through each of the input files specified and extract the doc
+ument 
#   pages with the options specified.

my $import_page = 0;

foreach my $file ( @input ) {

    my $input = PDF::API2->open( $file );


    #   Expand the page list and range definitions passed with the --p
+age argument 
    #   associated with the given input file.  By default, all pages o
+f the input
    #   file are included in the output.

    my @pages = ();
    if ( exists $pages{ $file } ) {

    @pages = map { split /,/ } @{ $pages{ $file } };
    @pages = map { /^(\d+)-(\d+)$/ ? $1 .. $2 : $_ } @pages;
    }
    else {

        @pages = 1 .. $input->pages;
    }


    #   Import the pages from the input file input the output PDF file
+ being 
    #   constructed

    if (scalar @pages > 0) {


        #   Extract the filename of the input file without the file ex
+tension for 
        #   incorporation into the document outline.

        my ($name, undef, undef) = fileparse($file, '\.[^\.]*');

        my $outline = $root->outline;
        $outline->title( $name );


        #   Step through each of the pages to be imported, import the 
+page and add an 
        #   entry to the document outline.

        my $document_page = 0;
        foreach (@pages) {

            ++$import_page;
            ++$document_page;

            my $page = $pdf->importpage($input, $_, $import_page);
            my $bookmark = $outline->outline;
            $bookmark->title("Page $document_page");
            $bookmark->dest($page);
            $outline->dest($page) if $document_page == 1;
        }
    }
}

$pdf->preferences( -outlines => 1 );
$pdf->update;
$pdf->end;

exit 0;

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://321391]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others browsing the Monastery: (4)
As of 2024-03-29 09:19 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found