Anonymous Monk has asked for the wisdom of the Perl Monks concerning the following question:

I am trying to create an email parser that will download mail from a POP3 server, Parse the body of the email, and save any attachments to disk.

I have it all working like a champ except the attachments section. I've tried Mime::parser, and Mime::base64, but have not been able to decode and save an attachment yet. I've tried numerous ways of doing this, including the one offered here on Perl Monks at http://www.perlmonks.com/index.pl?node_id=195442 But so far no luck.

I don't know if this is do to my running on Win2K ( sorry, it's a work thing ) or just my own ignorance.

Any help is greatly appreciated.

Karoo

Replies are listed 'Best First'.
Re: Win2K MIME Decoding
by fokat (Deacon) on Feb 02, 2003 at 04:49 UTC

    The fragments of code below have been taken from one of my upcoming Perl articles which deals with something similar:

    47 my $mp = new MIME::Parser; 48 $mp->ignore_errors(1); 49 $mp->extract_uuencode(1); 50 51 eval { $e = $mp->parse($fh); }; 52 my $error = ($@ || $mp->last_error); 53 54 if ($error) 55 { 56 $mp->filer->purge; # Get rid of the temp files 57 die "Error parsing the message: $error\n"; 58 } decode_entities($e); 103 sub decode_entities 104 { 105 my $ent = shift; 106 107 if (my @parts = $ent->parts) 108 { 109 decode_entities($_) for @parts; 110 } 111 elsif (my $body = $ent->bodyhandle) 112 { 113 my $type = $ent->head->mime_type; 114 115 setup_decoder($ent->head); 116 117 if ($type eq 'text/plain') 118 { print d($body->as_string); } 119 elsif ($type eq 'text/html') 120 { $parser->parse($body->as_string); } 121 else 122 { print "[Unhandled part of type $type]"; } 123 } 124 }

    I'm sorry for not providing a direct link to the article, but I don't want to risk Google picking it up before publishing at TPJ :) You would need to rewrite the decode_entities() function to work with all the attachments. I haven't tested, but I would bet you can get to the attachment's payload using the ->as_string method.

    Best regards

    -lem, but some call me fokat

Re: Win2K MIME Decoding
by tachyon (Chancellor) on Feb 02, 2003 at 12:26 UTC

    Here are some snippets of a perl POP3 mail client that I have been running for some years. By no means the prettiest code I've ever written but it works fine....

    use Mail::POP3Client; use MIME::Base64; my $pop3 = open_pop3_connection(); sub open_pop3_connection { my $pop = new Mail::POP3Client( USER => $global{username}, PASSWORD => $global{password}, HOST => $POP_SERVER, AUTH_MODE => $POP_AUTH_MODE ); unless($pop->Message =~ m/^\+OK/){ ($pop->Message =~ m/ERR(.*)/) ? show_login("$CONNECTION_FAILED: $1") : show_login("$CONNECTION_FAILED:" . $pop->Message); return $pop; } sub get_mail_body { my ($uidl, $want_text, $raw_text ) = @_; my $head = $pop3->Head( uidl_to_msg_num($uidl) ); $head = unescape_mime($head); my $header = parse_mail_header($head); my $body = $pop3->Body(uidl_to_msg_num($uidl)); $body = unescape_mime($body); my $html = ''; if ($header->{content_type} =~ m!multipart/(?:mixed|related)!i) { my $parse = parse_mail_body( $body, $header->{boundary} ); for my $part (@$parse) { if ( $part->{content_type} =~ m!multipart/alternative!i ) +{ $html .= get_best_alternative( $part->{content}, $want +_text, $raw_text ); } elsif ( $part->{content_disposition} =~ m!attachment! or +$part->{filename} or $part->{name} ) { $html .= $raw_text ? "\n[$ATTACHMENT]\n" : get_attachm +ent_link($part, $uidl); } elsif ($part->{content_type} =~ m!text/html!i ) { $html .= $part->{content} unless $raw_text; } else { $part->{content} = decode_base64($part->{content}) if +$part->{encoding} =~ m/base64/i; $html .= get_text_plain( $part->{content}, $raw_text ) +; } } } elsif ( $header->{content_type} =~ m!multipart/alternative!i ) { my $parse = parse_mail_body( $body, $header->{boundary} ); $html = get_best_alternative( $parse, $want_text, $raw_text ); } elsif ( $header->{content_type} =~ m!text/html!i ) { $html = $body; } else { $html = get_text_plain( $body , $raw_text); } return $html } sub get_text_plain { my ($text, $raw_text) = @_; require Text::Wrap; $Text::Wrap::columns = 120; $text = Text::Wrap::wrap('','',$text); return $raw_text ? $text : '<pre>' . escapeHTML($text) . '</pre>'; } sub get_attachment_link { my ($part, $uidl) = @_; my $filename = $part->{filename} || $part->{name}; my $link = make_link( $filename, action => 'get_attachment', uidl => $uidl, filename => $filename, ); return "\n<br>$ATTACHMENT [$link]"; } sub get_best_alternative { my ($parse, $want_text, $raw_text) = @_; for my $options ( @$parse ) { $global{charset} = $options->{charset} if $options->{charset}; return $options->{content} if $options->{content_type} =~ m!text +/html!i ; return get_text_plain($options->{content}, $raw_text) if $want_t +ext and $options->{content_type} =~ m!text/plain!i; } } sub parse_mail_header { local $_ = shift; my %header; s/\r?\n\s+/ /g; # unwrap soft line break lines $header{return_path} = m/^Return-Path:\s*([^\r\n]*)/im ? $1 : ' +'; $header{reply_to} = m/^Reply-To:\s*([^\r\n]*)/im ? $1 : ''; $header{in_reply_to} = m/^In-Reply-To:\s*([^\r\n]*)/im ? $1 : ' +'; $header{references} = m/^References:\s*([^\r\n]*)/im ? $1 : '' +; $header{from} = m/^From:\s*([^\r\n]*)/im ? $1 : ''; $header{to} = m/^To:\s*([^\r\n]*)/im ? $1 : ''; $header{cc} = m/^Cc:\s*([^\r\n]*)/im ? $1 : ''; $header{subject} = m/^Subject:\s*([^\r\n]*)/im ? $1 : ''; $header{date} = m/^Date:\s*([^\r\n]*)/im ? $1 : ''; $header{message_id} = m/^Message-ID:\s*([^\r\n]*)/im ? $1 : '' +; $header{content_type} = m/^Content-Type:\s*([^;]*)/im ? $1 : ''; $header{boundary} = m/\s+boundary="([^"]*)"/i ? $1 : ''; $header{charset} = m/\s+charset="([^"]*)"/i ? $1 : ''; $header{reply_to} = $header{reply_to} || $header{from} || $head +er{return_path} || ''; $header{subject} ||= $NO_SUBJECT; $header{from} ||= $EMPTY_FROM; $header{charset} ||= $CHARSET || "iso-8859-1"; $header{subject} = decode_charset_encoding( $header{subject} ); # make sure all values are defined $header{$_} ||= '' for qw( return_path in_reply_to references to c +c date message_id content_type boundary); return \%header; } sub parse_mail_body { my ($body, $boundary ) = @_; my @parts; my $CRLF = qr/\r\n/; for (split/$CRLF--$boundary/, $body) { my %parse; next unless s/^Content-Type:\s*([^;\r\n]+)//mi; $parse{content_type} = $1; if ( $parse{content_type} =~ m!multipart/alternative!i ) { s/\s*boundary="([^"]*)"//m; $parse{content} = parse_mail_body($_, $1); # use a litt +le recursion to split parts } else { # split at CRLFCRLF sequence; first element is header, rem +aining part is body my ($head,$content) = $_ =~ /^(.+?)$CRLF$CRLF(.*)\z/os; $parse{content} = $content; ($_ = $head) =~ s/$CRLF\s+/ /og; # unfold header per RFC82 +2 $parse{content_length} = $1 if m/^Content-Length:\s*( +[^\r\n;]*)/mi; $parse{encoding} = $1 if m/^Content-Transfer-En +coding:\s*([^\r\n;]*)/mi; $parse{content_dispositon} = $1 if m/^Content-Disposition +:\s*(attachment|inline)/mi; $parse{content_location} = $1 if m/^Content-Location:\s +*([^\r\n;]*)/mi; $parse{content_id} = $1 if m/^Content-ID:\s*([^\r +\n;]*)/mi; $parse{filename} = $1 if m/\s*filename="([^"]*) +"/i; $parse{name} = $1 if m/\s*name="([^"]*)"/i; $parse{charset} = $1 if m/\s*charset="([^"]*)" +/i; } push @parts, \%parse; } return \@parts; } sub get_attachment { my ($uidl, $filename) = @_; my $head = $pop3->Head( uidl_to_msg_num($uidl) ); my $header = parse_mail_header($head); my $body = $pop3->Body( uidl_to_msg_num($uidl) ); $body = unescape_mime($body); my $parse = parse_mail_body( $body, $header->{boundary} ); for my $part (@$parse) { next unless $part->{filename} eq $filename or $part->{name} eq $ +filename; $part->{content} = decode_base64($part->{content}) if $part->{ +encoding} =~ m/base64/i; my $size = length $part->{content}; my $type = $part->{content_type} || 'application/octet-stream' +; binmode STDOUT; print STDOUT "Content-type: $type\n"; print STDOUT "Content-Length: $size\n"; print STDOUT "Content-Disposition: attachment; filename=$filen +ame\n\n"; print STDOUT $part->{content}; exit; } # error if we get here } sub uidl_to_msg_num { my $uidl = shift; return undef unless $uidl; my @uidls = $pop3->Uidl(); for my $i ( 1..$#uidls ) { return $i if $uidl eq $uidls[$i]; } return undef; # uidl not found } sub msg_num_to_uidl { my $msg_num = shift; my (undef, $uidl) = split ' ', $pop3->Uidl($msg_num); return $uidl; } sub unescape_mime { my ( $todecode ) = @_; return '' unless defined $todecode; $todecode =~ s/[ \t]+?(\r?\n)/$1/g; # rule #3 (trailing space mus +t be deleted) $todecode =~ s/=\r?\n//g; # rule #5 (soft line breaks) $todecode =~ s/=([0-9a-fA-F]{2})/ chr hex $1 /ge; return $todecode; }

    cheers

    tachyon

    s&&rsenoyhcatreve&&&s&n.+t&"$'$`$\"$\&"&ee&&y&srve&&d&&print