#!/usr/bin/perl use strict; use warnings; use DBI; use File::Find::Rule; use PDF::API2; use File::Basename; use Data::Dumper; my $t0 = time(); # Get PDFs from file: my $pdfs = get_pdfs(); #print Dumper $pdfs; # Search for these pdf files: my $got_pdf = search_pdf( $pdfs ); #print Dumper $got_pdfs; # Process these pdf files: my $results = process_data($got_pdf); #print Dumper $results; # Display time to process files my $dur = time()-$t0; print "$results doc processed in $dur seconds\n"; sub get_pdfs { my $dbh = DBI->connect ("dbi:CSV:", undef, undef, { f_ext => ".txt/r", #f_dir => "", f_enc => "utf-8", RaiseError => 1 } ) or die "Cannot connect: $DBI::errstr"; $dbh->{csv_tables}{prod_pdf_files} = { f_file => "pdfs.txt", # list of file names to search col_names => [qw( doc_name file_name acc_nbr )], }; my $sth = $dbh->prepare (" SELECT DISTINCT file_name FROM prod_pdf_files --WHERE file_name LIKE ? "); #$sth->execute('%.pdf'); # removed cause it could be .PDF or .pdf $sth->execute(); my $pdfs = $sth->fetchall_arrayref(); my %pdfs = map{$_->[0]=> 0} @$pdfs; #print Dumper \%pdfs; return \%pdfs; } # End get_pdfs Sub sub search_pdf { my $pdf_ref = shift; my %pdf = %$pdf_ref; my @pdfs = keys %pdf; my @search_dirs = ( '/doca', '/docb'); my $rule = File::Find::Rule->new; $rule->file; my @found_pdf; foreach my $pdf_file (@pdfs) { $rule->name( $pdf_file ); for my $files ($rule->in(@search_dirs)){ #print $files,"\n"; push @found_pdf, $files; }; } return \@found_pdf; } # End search pdfs Sub sub process_data { my $pdfs_file = shift; my $totalpages = 0; # To log results in a file. #my $page_count = 'count_pdf_pages.txt'; #open my $fh, '>>', $page_count or die "Unable to create file: $!"; #print $fh "filename,path,full_path,pages\n"; foreach my $doc (@{$pdfs_file}) { next unless ($doc =~ m/\.pdf$/i); my ($filename, $path) = fileparse($doc); eval { my $pdf = PDF::API2->open($doc); # or die "Can't open PDF file $doc: $!"; my $pages = $pdf->pages; $totalpages += $pages; # log results into a file #print $fh "$filename,$path,$doc,$pages\n"; print " File name: $filename - Number of pages: $pages\n"; }; print "$doc | Error captured : $@\n" if $@; } #close $fh; return $totalpages; }