#!/usr/bin/perl use strict; use warnings; use DBI; use File::Find::Rule; use PDF::API2; use File::Basename; #use Data::Dumper; #use Data::Dump 'pp'; my $t0 = time(); my ($got_count, $got_timed, $got_total_pages) = get_pdfs(); print "\n\n Result: $got_count files found in $got_timed seconds - Total number of pages: $got_total_pages\n\n\n"; exit; sub get_pdfs { my $dbh = DBI->connect ("dbi:CSV:", undef, undef, { f_ext => ".txt/r", #f_dir => "", f_enc => "utf-8", RaiseError => 1 } ) or die "Cannot connect: $DBI::errstr"; $dbh->{csv_tables}{prod_pdf_files} = { f_file => "data.txt", # list of file names to search col_names => [qw( file_name location acc_nbr )], }; my $sth = $dbh->prepare (" SELECT DISTINCT file_name, location FROM prod_pdf_files --WHERE file_name LIKE ? "); #$sth->execute('%.pdf'); $sth->execute(); my $pdfs = $sth->fetchall_arrayref(); #pp @$pdfs; my $count = 0; my $totalpages = 0; my $other_path = '/alldocs'; foreach my $files ( @{$pdfs} ) { next unless ($files->[0] =~ m/\.pdf$/i); my $filename_loc = $other_path.$files->[1].$files->[0]; my $pages = pdf_pagecount( $filename_loc ); $totalpages += $pages; $count++; } my $dur = time()-$t0; return $count, $dur, $totalpages; } # End get_pdfs Sub sub pdf_pagecount { my $doc = shift; my $pages; eval { my $pdf = PDF::API2->open($doc); $pages = $pdf->pages; }; if ($@){ warn "$doc | Error captured : $@\n"; } else { return $pages; }; }