I actually came across some really good base code that I modified to do what I need. Where I am stuck now is I am trying to move a message after I have removed the attachment to a different folder so I am not constantly looking at the same emails. Here is what I have so far:
#!/usr/bin/perl -w
use MIME::Base64;
$|++;
my $VERSION = "1.0";
use Getopt::Long;
my %opts;
# make sure we have the modules we need, else die peacefully.
eval("use Net::POP3;"); die "[err] Net::POP3 not installed.\n" if $@;
eval("use MIME::Parser;"); die "[err] MIME::Parser not installed.\n" i
+f $@;
# define our command line flags (long and short versions).
GetOptions(\%opts, 'server|s=s', # the POP3 server to use.
'username|u=s', # the POP3 username to use.
'password|p=s', # the POP3 password to use.
'begin|b=i', # what msg number to start at.
);
$opts{server} = "localhost";
$opts{username} = "username";
$opts{password} = "password";
# at the very least, we need our login information.
die "[err] POP3 server missing, use --server or -s.\n" unless $opts{se
+rver};
die "[err] Username missing, use --username or -u.\n" unless $opts{use
+rname};
die "[err] Password missing, use --password or -p.\n" unless $opts{pas
+sword};
# try an initial connection to the server.
print "-" x 76, "\n"; # merely a visual seperator.
my $conn = Net::POP3->new( $opts{server} )
or die "[err] There was a problem connecting to the server.\n";
print "Connecting to POP3 server at $opts{server}.\n";
# and now the login information.
$conn->login( $opts{username}, $opts{password} )
or die "[err] There was a problem logging in (.poplock? credentials?
+).\n";
print "Connected successfully as $opts{username}.\n";
# purdy stats about our mailbox.
my ($msg_total, $mbox_size) = $conn->popstat( );
if ($msg_total eq 0) { print "No new emails are available.\n"; exit;
+}
if ($msg_total eq '0E0') { print "No new emails are available.\n"; ex
+it; }
print "You have $msg_total messages totalling ", commify($mbox_size),
+"k.\n";
# the list of valid file extensions. we do extensions, not
# mime-types, because they're easier to understand from
# an end-user perspective (no research is required).
my $valid_exts = "pdf";
my %msg_ids; # used to keep track of seen emails.
my $msg_num = $opts{begin} || 1; # user specified or 1.
# create a subdirectory based on today's date.
my ($d,$m,$y) = (localtime)[3,4,5]; $y += 1900; $m++;
$d = sprintf "%02.0d", $d; $m = sprintf "%02.0d", $m;
print "Using directory '$y-$m-$d' for newly downloaded files.\n";
my $savedir = "$y-$m-$d"; mkdir($savedir, 0777);
# begin looping through each msg.
print "-" x 76, "\n"; # merely a visual seperator.
while ($msg_num <= $msg_total) {
# the size of the individual email.
my $msg_size = $conn->list($msg_num);
# get the header of the message
# so we can check for duplicates.
my $headers = $conn->top($msg_num);
# print/store the good bits.
my ($msg_subj, $msg_id);
foreach my $header (@$headers) {
# print subject line and size.
if ($header =~ /^Subject: (.*)/) {
$msg_subj = substr($1, 0, 50); # trim subject down a bit.
print "Msg $msg_num / ",commify($msg_size),"k / $msg_subj.
+..\n";
}
if ($header =~ /^Date: (.*)/) {
my $msg_date = substr($1, 0, 50); # trim subject down a bi
+t.
print "Date $msg_date\n";
}
# save Message-ID for duplicate comparison.
elsif ($header =~ /^Message-ID: <(.*)>/i) {
$msg_id = $1; $msg_ids{$msg_id}++;
}
# move on to the filtering.
elsif ($msg_subj and $msg_id) { last; }
}
# if the message size is too small, then it
# could be a reply or something of low quality.
if (defined($msg_size) and $msg_size < 40) {
print " Skipping - message size is smaller than our threshold
+.\n";
$msg_num++; next;
}
# check for matching Message-ID. If found,
# skip this message. This will help eliminate
# crossposting and duplicate downloads.
if (defined($msg_id) and $msg_ids{$msg_id} >= 2) {
print " Skipping - we've already seen this Message-ID.\n";
$msg_num++; next;
}
# get the message to feed to MIME::Parser.
my $msg = $conn->get($msg_num);
# create a MIME::Parser object to
# extract any attachments found within.
my $parser = new MIME::Parser;
$parser->output_dir( $savedir );
#my $enmsg = encode_base64($msg);
my $entity = $parser->parse_data($msg);
# extract our mime parts and go through each one.
my @parts = $entity->parts;
foreach my $part (@parts) {
# determine the path to the file in question.
my $path = ($part->bodyhandle) ? $part->bodyhandle->path : und
+ef;
# move on if it's not defined,
# else figure out the extension.
next unless $path; $path =~ /\w+\.([^.]+)$/;
my $ext = $1; next unless $ext;
# we continue only if our extension is correct.
my $continue; $continue++ if $valid_exts =~ /$ext/i;
# delete the blasted thing.
unless ($valid_exts =~ /$ext/) {
print " Removingg unwanted filetype ($ext): $path\n";
unlink $path or print " > Error removing file at $path: $!.
+";
next; # move on to the next attachment or message.
}
# a valid file type. yummy!
print " Keeping valid file: $path.\n";
my $encodedfile = base64_encode_file($path);
#print "$encodedfile";
}
# increase our counter.
$msg_num++;
}
# clean up and close the connection.
$conn->quit;
# now, jump into our savedir and remove all msg-*
# files, which are message bodies saved by MIME::Parser.
chdir ($savedir); opendir(SAVE, "./") or die $!;
my @dir_files = grep !/^\.\.?$/, readdir(SAVE); closedir(SAVE);
foreach (@dir_files) { unlink if $_ =~ /^msg-/; }
# cookbook 2.17.
sub commify {
my $text = reverse $_[0];
$text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
return scalar reverse $text;
}
sub base64_encode_file {
# Initialize variables
local($file) = $_[0];
local($encoded, $line) = '';
local($len, $bytes, $pad) = 0;
# Open the file
if (open (DATA, "<$file")) {
# Process the data
while ($bytes = read(DATA, $line, 45)) {
$len += $bytes;
# uuencode the line and remove the first and last characte
+rs
$encoded .= substr(pack('u', $line), 1);
chop($encoded);
}
# Convert from uuencoded to base64
$encoded =~ tr| -_`|A-Za-z0-9+/A|;
$pad = (3 - ($len % 3)) % 3;
substr($encoded, -$pad, $pad) = '=' x $pad;
$encoded =~ s/(.{76})/$1\n/g;
}
else {
$Error_Message = "The file \"$file\" could not be opened ($!).
+";
}
# Return the result (null if the file couldn't be opened)
return($encoded);
}
As long as the code is running it keeps track of seen emails but every time you start the code over it looks at the same email messages. |