in reply to pull off PDF attachment
As long as the code is running it keeps track of seen emails but every time you start the code over it looks at the same email messages.#!/usr/bin/perl -w use MIME::Base64; $|++; my $VERSION = "1.0"; use Getopt::Long; my %opts; # make sure we have the modules we need, else die peacefully. eval("use Net::POP3;"); die "[err] Net::POP3 not installed.\n" if $@; eval("use MIME::Parser;"); die "[err] MIME::Parser not installed.\n" i +f $@; # define our command line flags (long and short versions). GetOptions(\%opts, 'server|s=s', # the POP3 server to use. 'username|u=s', # the POP3 username to use. 'password|p=s', # the POP3 password to use. 'begin|b=i', # what msg number to start at. ); $opts{server} = "localhost"; $opts{username} = "username"; $opts{password} = "password"; # at the very least, we need our login information. die "[err] POP3 server missing, use --server or -s.\n" unless $opts{se +rver}; die "[err] Username missing, use --username or -u.\n" unless $opts{use +rname}; die "[err] Password missing, use --password or -p.\n" unless $opts{pas +sword}; # try an initial connection to the server. print "-" x 76, "\n"; # merely a visual seperator. my $conn = Net::POP3->new( $opts{server} ) or die "[err] There was a problem connecting to the server.\n"; print "Connecting to POP3 server at $opts{server}.\n"; # and now the login information. $conn->login( $opts{username}, $opts{password} ) or die "[err] There was a problem logging in (.poplock? credentials? +).\n"; print "Connected successfully as $opts{username}.\n"; # purdy stats about our mailbox. my ($msg_total, $mbox_size) = $conn->popstat( ); if ($msg_total eq 0) { print "No new emails are available.\n"; exit; +} if ($msg_total eq '0E0') { print "No new emails are available.\n"; ex +it; } print "You have $msg_total messages totalling ", commify($mbox_size), +"k.\n"; # the list of valid file extensions. we do extensions, not # mime-types, because they're easier to understand from # an end-user perspective (no research is required). my $valid_exts = "pdf"; my %msg_ids; # used to keep track of seen emails. my $msg_num = $opts{begin} || 1; # user specified or 1. # create a subdirectory based on today's date. my ($d,$m,$y) = (localtime)[3,4,5]; $y += 1900; $m++; $d = sprintf "%02.0d", $d; $m = sprintf "%02.0d", $m; print "Using directory '$y-$m-$d' for newly downloaded files.\n"; my $savedir = "$y-$m-$d"; mkdir($savedir, 0777); # begin looping through each msg. print "-" x 76, "\n"; # merely a visual seperator. while ($msg_num <= $msg_total) { # the size of the individual email. my $msg_size = $conn->list($msg_num); # get the header of the message # so we can check for duplicates. my $headers = $conn->top($msg_num); # print/store the good bits. my ($msg_subj, $msg_id); foreach my $header (@$headers) { # print subject line and size. if ($header =~ /^Subject: (.*)/) { $msg_subj = substr($1, 0, 50); # trim subject down a bit. print "Msg $msg_num / ",commify($msg_size),"k / $msg_subj. +..\n"; } if ($header =~ /^Date: (.*)/) { my $msg_date = substr($1, 0, 50); # trim subject down a bi +t. print "Date $msg_date\n"; } # save Message-ID for duplicate comparison. elsif ($header =~ /^Message-ID: <(.*)>/i) { $msg_id = $1; $msg_ids{$msg_id}++; } # move on to the filtering. elsif ($msg_subj and $msg_id) { last; } } # if the message size is too small, then it # could be a reply or something of low quality. if (defined($msg_size) and $msg_size < 40) { print " Skipping - message size is smaller than our threshold +.\n"; $msg_num++; next; } # check for matching Message-ID. If found, # skip this message. This will help eliminate # crossposting and duplicate downloads. if (defined($msg_id) and $msg_ids{$msg_id} >= 2) { print " Skipping - we've already seen this Message-ID.\n"; $msg_num++; next; } # get the message to feed to MIME::Parser. my $msg = $conn->get($msg_num); # create a MIME::Parser object to # extract any attachments found within. my $parser = new MIME::Parser; $parser->output_dir( $savedir ); #my $enmsg = encode_base64($msg); my $entity = $parser->parse_data($msg); # extract our mime parts and go through each one. my @parts = $entity->parts; foreach my $part (@parts) { # determine the path to the file in question. my $path = ($part->bodyhandle) ? $part->bodyhandle->path : und +ef; # move on if it's not defined, # else figure out the extension. next unless $path; $path =~ /\w+\.([^.]+)$/; my $ext = $1; next unless $ext; # we continue only if our extension is correct. my $continue; $continue++ if $valid_exts =~ /$ext/i; # delete the blasted thing. unless ($valid_exts =~ /$ext/) { print " Removingg unwanted filetype ($ext): $path\n"; unlink $path or print " > Error removing file at $path: $!. +"; next; # move on to the next attachment or message. } # a valid file type. yummy! print " Keeping valid file: $path.\n"; my $encodedfile = base64_encode_file($path); #print "$encodedfile"; } # increase our counter. $msg_num++; } # clean up and close the connection. $conn->quit; # now, jump into our savedir and remove all msg-* # files, which are message bodies saved by MIME::Parser. chdir ($savedir); opendir(SAVE, "./") or die $!; my @dir_files = grep !/^\.\.?$/, readdir(SAVE); closedir(SAVE); foreach (@dir_files) { unlink if $_ =~ /^msg-/; } # cookbook 2.17. sub commify { my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $text; } sub base64_encode_file { # Initialize variables local($file) = $_[0]; local($encoded, $line) = ''; local($len, $bytes, $pad) = 0; # Open the file if (open (DATA, "<$file")) { # Process the data while ($bytes = read(DATA, $line, 45)) { $len += $bytes; # uuencode the line and remove the first and last characte +rs $encoded .= substr(pack('u', $line), 1); chop($encoded); } # Convert from uuencoded to base64 $encoded =~ tr| -_`|A-Za-z0-9+/A|; $pad = (3 - ($len % 3)) % 3; substr($encoded, -$pad, $pad) = '=' x $pad; $encoded =~ s/(.{76})/$1\n/g; } else { $Error_Message = "The file \"$file\" could not be opened ($!). +"; } # Return the result (null if the file couldn't be opened) return($encoded); }
|
|---|