#!perl use 5.020; use feature 'signatures'; no warnings 'experimental::signatures'; use Getopt::Long; use utf8; use File::Basename 'dirname'; use File::Spec; use Win32::OLE 'in'; use Win32::OLE::Const 'Microsoft Outlook'; use Win32::OLE::Variant; use Scalar::Util 'blessed'; use Encode 'encode', 'decode'; use POSIX 'strftime'; #use PDFContents::Cache; # We output UTF-8 system('chcp 65001 >NUL:'); binmode STDOUT, ':encoding(UTF-8)'; local $| = 1; GetOptions( 'quick' => \my $quick_run, 'target-directory|t=s' => \my $target_dir, ); $target_dir ||= dirname($0) . "/INPUT"; my $target_dir = File::Spec->rel2abs( $target_dir ); my $outlook = Win32::OLE->GetActiveObject('Outlook.Application') || Win32::OLE->new('Outlook.Application', 'Quit'); my $namespace = $outlook->GetNamespace("MAPI"); #my $Folder = $namespace->GetDefaultFolder(olFolderInbox); # Output some folder names for debugging #for my $f (in($namespace->Folders->{"#MAGIC-MAILBOX"}->Folders->{Posteingang}->Folders)) { # #say "Posteingang!" . $f->{Name}; #} sub progress( $info ) { state $last_progress; print join "", " " x length($last_progress), "\r", $info, "\r"; $last_progress = $info; } sub find_folder($path) { my $folder = $namespace->Folders->{"#MAGIC-MAILBOX"}; for my $el (split /!/, $path) { $el = encode('Latin-1', $el); my $next_folder = $folder->Folders->{$el}; if( ! $next_folder ) { warn "No folder found for '$el' in '$path'"; for( in($folder->Folders) ) { say "<$_->{Name}>"; }; }; $folder = $next_folder; }; return $folder; } # Read all PDFs we already rejected opendir my $dh, "$target_dir/rejected"; our @blacklist = readdir $dh; closedir $dh; # iterate over folders sub for_all_mails( $folder, $callback ) { if( ! $folder->Items ) { return 0; }; my $count; my $list = $folder->Items; my $msg = $list->GetFirst; while( $msg ) { $count += $callback->($msg); $msg = $list->GetNext; } return $count; } sub save_mail_attachments( $msg, $target_directory=$target_dir ) { foreach my $atch (reverse in($msg->{Attachments})) { my $name = $atch->{FileName}; if($name =~ m/.pdf$/i){ #say "Überspringe $_" if grep { $_ eq $name } @blacklist; next if grep { $_ eq $name } @blacklist; my $target = $target_dir . "/" . $name; if( ! -f $target or ! -s $target) { #$new++; $atch->SaveAsFile($target); } else { #say "Already exists ".$atch->{FileName}; } } } } sub save_attachments( $folder ) { progress($folder->Name); for_all_mails( $folder, \&save_mail_attachments ); } sub in_all_subfolders( $folder, $callback, $visual=$folder->Name ) { $callback->($folder); #for my $subfolder (grep { defined } $folder->Folders) { my $folders = $folder->Folders; my $subfolder = $folders->GetLast; while( $subfolder ) { in_all_subfolders( $subfolder, $callback, $visual . ' > ' . $subfolder->Name ); $subfolder = $folders->GetPrevious; }; } my $count = 0; my $Folder = find_folder("Posteingang!incoming stuff"); #for my $f (in ($Folder->Folders)) { # say join "/", $Folder->{Name}, $f->{Name}; #}; # Find a folder named "from Somebody", but as a substring, since it might contain Umlauts or whatever for my $f (in ($Folder->Folders)) { #say join "/", $Folder->{Name}, $f->{Name}; if( $f->Name =~ m!from Somebody$! ) { $Folder = $f; last; }; }; $count += save_attachments( $Folder ); if( $quick_run ) { # nothing to do } else { in_all_subfolders( $Folder, sub( $this_folder ) { $count += save_attachments($this_folder); }); $count += save_attachments( find_folder("Posteingang")); $count += save_attachments( find_folder("Posteingang!to-sort")); $count += save_attachments( find_folder("Posteingang!to-sort-later")); for my $folder (in(find_folder('Posteingang!in-progress')->Folders)) { progress( $folder->Name ); $count += save_attachments( $folder ); } for my $folder (reverse in(find_folder('Posteingang!by-ticket-number')->Folders)) { in_all_subfolders( $folder, sub( $this_folder ) { $count += save_attachments($this_folder); }); } } my $ts = strftime '%Y-%m-%dT%H:%M:%S', localtime; in_all_subfolders( find_folder("Posteingang!some!deep!subfolder"), sub($folder) { my $foldername = $folder->{Name}; #progress($foldername); my $count; for_all_mails( $folder, sub( $msg ) { progress( "$foldername - $count" ); $count++; for my $att (reverse in($msg->{Attachments})) { my $id = $msg->{EntryId}; my $fn = $att->{FileName}; return unless $fn =~ /\.pdf\z/i; # process the PDF contents # PDFContents::Cache::add_mailinfo($foldername, $fn, $id, $ts); } 1 }); }); progress(""); say "$count new PDFs found";