#=========================================================== # Program EM.pl #!/usr/bin/perl -w #use strict; use Email::MIME; use HTML::TokeParser; use Data::Dumper; my $msgfile = "Andrew.msg"; # A test message file from MS Outlook open (MSG, "$msgfile") or die "Can't open $msgfile: $!\n"; my $message = do { local $/; }; # $/=undef; my $e=; close(MSG); my $parsed = Email::MIME->new($message) or die "Could not parse email message: $!"; #$message is full text of entire email message foreach my $part ($parsed->parts) { if ($part->content_type =~ /text\/plain/i) { #You have a plain text part: do stuff here with $part->body print $part->body; } elsif ($part->content_type =~ /image\/jpeg/i) { #You have a JPEG part in $part->body } elsif ($part->content_type =~ /text\/html/i) { #You have an HTML part in part body my $html = $part->body; my $plain_text; my $parsed_text = HTML::TokeParser->new(\$html) or die "Cannot read message text for parsing and cleaning: $!"; while (my $token = $parsed_text->get_token) { ´ if ($token->[0] eq 'T') { $plain_text .= $token->[1];} # text } #Do stuff with $plain_text extracted from HTML here print $plain_text; } else { print "NO MATCH\n"; foreach (keys %$part) { ${%$part}{$_} =~ s/\W*//g; } # Zap non-word print Data::Dumper->Dump( [%$part] ); # for test output } } #=========================================================== C:\Perl\Test\MIME>perl -w EM.pl NO MATCH $VAR1 = 'body'; $VAR2 = 'PPPBYahooGroupsLinksBBRPULLITovisityourgrouponthewebgotoBRAhrefhttpgroupsyahoocomgrouphaiku kaiIIIhttpgroupsyahoocomgrouphaikukaiIIIABRnbspLITounsubscribefromthisgroupsendanemailtoBRAhref mailt stg10_5FF70102DFA__properties_version100X99q___Nd_Ad0'; $VAR3 = 'head'; $VAR4 = 'HASH0x1625814'; $VAR5 = 'mycrlf'; $VAR6 = ''; $VAR7 = 'header_names'; $VAR8 = 'HASH0x1c60278'; $VAR9 = 'order'; $VAR10 = 'ARRAY0x1af5550'; $VAR11 = 'parts'; $VAR12 = 'ARRAY0x16259ac'; $VAR13 = 'ct'; $VAR14 = 'HASH0x1aa33e4'; C:\Perl\Test\MIME> #===========================================================