#!/usr/bin/perl use strict; use warnings; use DBI; use PDF::API2; use Data::Dump 'pp'; use File::Basename; use File::Find::Rule; my $t0 = time(); #my $pdfs = get_pdfs(); my $pdfs = (); %{$pdfs} = ( "testa.pdf" => 0, "testb.pdf" => 0, ); #pp $pdfs; my @search_dirs = ('/doca', '/docb'); my $rule = File::Find::Rule->new; $rule->file; $rule->name( '*.pdf' ); my $count = 0; my $totalpages = 0; for my $file ($rule->in(@search_dirs)){ my ($filename, $path) = fileparse($file); #print " *$filename* ^$pdfs->{$filename}^\n"; if (exists $pdfs->{$filename}){ my $pages = get_pagecount($file); $totalpages += $pages; print " $filename | $pages | $path\n"; # print to file ++$count; }#else { print " File $filename not found in $path\n";} }; my $dur = time()-$t0; print "\n\n Results: $count files found in $dur seconds - Total number of pages: $totalpages\n\n\n"; sub get_pagecount { my $doc = shift; my $pages; eval { my $pdf = PDF::API2->open($doc); $pages = $pdf->pages; }; if ($@){ warn "$doc | Error captured : $@\n"; } else { return $pages; }; } sub get_pdfs { my $dbh = DBI->connect ("dbi:CSV:", undef, undef, { f_ext => ".txt/r", #f_dir => "db", f_enc => "utf-8", RaiseError => 1 } ) or die "Cannot connect: $DBI::errstr"; $dbh->{csv_tables}{prod_pdf_files} = { f_file => "prod_pdf_test.txt", # list of file names to search - test file = prod_pdf_test.txt col_names => [qw( doc_name file_name acc_nbr )], }; my $sth = $dbh->prepare (" SELECT DISTINCT file_name FROM prod_pdf_files WHERE LOWER(file_name) LIKE ? "); $sth->execute('%.pdf'); my $pdfs = $sth->fetchall_arrayref(); my %pdfs = map{lc($_->[0]) => 0} @$pdfs; #pp \%pdfs; return \%pdfs; }