I use MIME::Parser. Be aware however that MIME documents are multipart and are commonly recursive so you generally have to have an understanding of the structure of your document to be able to extract the bits you want. The following code may get you started:
use strict;
use warnings;
use MIME::Parser;
my $email = <<'EMAIL';
Return-path: <grandfather@some.where.com>
Received: from localhost.localdomain (0.0.0.1) by some.where.com (Merc
+ury/32 v4.61) with ESMTP ID MG016DE1;
24 Nov 2009 17:42:04 +1300
MIME-Version: 1.0
Content-Transfer-Encoding: binary
Content-Type: multipart/alternative; boundary="_----------=_1259037666
+48720"
X-Mailer: MIME::Lite 3.027 (F2.77; T1.28; A2.04; B3.07; Q3.07)
Date: Tue, 24 Nov 2009 17:41:06 +1300
Subject: Sample
To: grandfather@some.where.com
From: some.one@some.where.com
This is a multi-part message in MIME format.
--_----------=_125903766648720
Content-Disposition: inline
Content-Transfer-Encoding: 8bit
Content-Type: text/plain
Hello World.
--_----------=_125903766648720--
EMAIL
my %fields = ParseMail ($email);
print $fields{body};
sub parseParts {
my $savedText = '';
for my $part (@_) {
my $type = $part->effective_type ();
if (-1 < index $type, 'multipart') {
my @subParts = $part->parts ();
$savedText = parseParts (@subParts);
} elsif ($type eq 'text/plain') {
return $part->stringify_body ();
} elsif ($type eq 'text/html') {
my $str = $part->stringify_body ();
my $tree = HTML::TreeBuilder->new_from_content ($str);
$savedText = $tree->as_text ();
}
}
return $savedText;
}
sub ParseMail {
my ($emailStr) = @_;
my $parser = new MIME::Parser;
my %fields;
$parser->tmp_to_core (1);
$parser->output_to_core (1);
my $entity = $parser->parse_data ($emailStr);
my @parts = $entity->parts ();
my $head = $entity->head ();
$fields{subject} = $head->get ('subject') || '';
$fields{from} = $head->get ('from');
$fields{ccList} = $head->get ('Cc');
$fields{date} = $head->get ('Date');
if (! @parts) {
$fields{body} = $entity->bodyhandle ()->as_string ();
} else {
$fields{body} = parseParts (@parts);
}
return %fields;
}
Prints:
Hello World.
True laziness is hard work
|