#!/usr/bin/perl

use strict;
use warnings; 
use Data::Dumper;

use File::Find qw(finddepth);
use File::Find qw(find);
use PDF::API2;
use Time::Progress;
use List::MoreUtils qw( natatime );
use Time::Piece;
use File::Basename;
use DBI;


BEGIN {
    
  # Set up log file
  my $log_file = "log.txt";
  use CGI::Carp qw(carpout);
  open(LOG, ">>$log_file") or print "Unable to append to log: $!";
  carpout(*LOG);

  $| = 1; # Disable buffering
  
}

# Where to search
my @search_dirs = ( "/doca", "/docb" ); # dir doca size is 1.6TB and docb size is 3.1TB 

my $stime = Time::Piece->new;
my $started_time = $stime->hms;

my $start_time = Time::Piece->strptime( $started_time, '%H:%M:%S' );

my @pdf_files;

# call list_dirs sub
my $data = list_dirs(\@search_dirs);

 if ($data eq "NoData") {
   print "\n No SQL data available.\n\n";
   exit;
 }
 
my $count_data = scalar @$data;

# Start progress bar
$| = 1;
my $p = new Time::Progress;


print "\n Starting Counting Process: \n\n";
my $total_pages = process_data($data);


print "\n\n Total Number of pages: $total_pages\n";
warn " Total Number of pages: $total_pages\n";

my $etime = Time::Piece->new;
my $end_t = $etime->hms;
my $end_time = Time::Piece->strptime( $end_t, '%H:%M:%S' );

my $done_time = $end_time - $start_time;

my $converted_time = convert_time($done_time);

print " \n\n Started at: $started_time \n";
print " \n Ended   at: $end_t \n";
print "\n Processing time: $converted_time \n\n\n";

warn " Started at: $started_time | Ended at: $end_t | Processing time: $converted_time\n";

exit;


sub process_data {

 my $dirs = shift;

 my $c = 0;
 my $totalpages = 0;
  
 foreach my $doc (@{$dirs}) {
     
   $c++;
  
   next unless ($doc =~ m/\.pdf$/i);

   print $p->report(" %45b %p\r", $c);

   my ($filename, $path) = fileparse($doc);


   eval { my $pdf = PDF::API2->open($doc);
     my $pages = $pdf->pages;
     $totalpages += $pages;
     #log results
     warn" $doc | $filename: Pages: $pages\n";
   };
   
   warn "$doc | Error captured : $@\n" if $@;
   

 }
    
 print $p->report("\n Done %p elapsed: %L (%l sec)", $c);
 
 return $totalpages;

}


sub list_dirs {
    
 my ($dirs_ref ) = @_;
 my @dirs = @{ $dirs_ref };

 print "\n Searching in: @dirs \n\n";
 
 my @files;

 # call process sub
 find( { wanted => \&process, follow => 0, no_chdir => 1 }, @dirs);

 print "\n\n Found PDF docs in:\n\n";
 
 foreach my $found_pdf_doc (@pdf_files) {

    next unless ($found_pdf_doc =~ m/\.pdf$/i);
    
 }

 # I only want values with path and file names in it.
 for ( my $index = $#pdf_files; $index >= 0; --$index ) {
     
   splice @pdf_files, $index, 1 if $pdf_files[$index] !~ m/\.pdf$/i;

  }

 return \@pdf_files;

}


sub process{
 
 my $dbh = DBI->connect ("dbi:CSV:", undef, undef, {
      f_ext   => ".txt/r",
      f_enc   => "utf-8",
      });

  $dbh->{csv_tables}{prod_pdf_files} = {
      file      => "pdfs.txt", # list of file names to search
      col_names => [qw( doc_name file_name acc_nbr )],
      };

  my $sth = $dbh->prepare ("SELECT DISTINCT file_name FROM pdfs ");
  $sth->execute;
  my $sql_data = $sth->fetchrow_hashref;
  
  my @filenames = ();
  while (my $row = $sth->fetchrow_hashref) {
      
      next unless ($row->{file_name} =~ m/\.pdf$/i);
      push @filenames, $row->{file_name};

  }
  
 my $addfile = 0;

 my $pdfs_iter = natatime( 50, @filenames );

  my $c = 0;
  while (my @files = $pdfs_iter->()) {
    $c++;

    for my $test_file (@files) {

      if( index( $_,$test_file ) >-1 ) {
	 
        ++$addfile;
        last;
       
      }
   
   }

 }
 
 push @pdf_files, $File::Find::name;# if $addfile;
 
} # End process Sub

sub convert_time {
    
  my $time = shift;
  my $days = int($time / 86400);
  $time -= ($days * 86400);
  
  my $hours = int($time / 3600);
  $time -= ($hours * 3600);
  
  my $minutes = int($time / 60);
  my $seconds = $time % 60;

  $days = $days < 1 ? '' : $days .'d ';
  $hours = $hours < 1 ? '' : $hours .'h ';
  $minutes = $minutes < 1 ? '' : $minutes . 'm ';
  $time = $days . $hours . $minutes . $seconds . 's';
  
  return $time;
  
}