#!/usr/local/bin/perl use MIME::Parser; use MIME::Entity; use MIME::Body; my (@body, $i, $subentity); my $parser = new MIME::Parser; $parser->output_to_core(1); my $entity = $parser->parse_open("C:\\Documents and Settings\\user\\My Documents\\email.txt"); %request = (); my $header = $entity->head; $request{SUBJECT} = $header->get('Subject'); chomp $request{SUBJECT}; $request{TO} = $header->get('To'); chomp $request{TO}; $request{FROM} = $header->get('From'); chomp $request{FROM}; $request{CC} = $header->get('CC'); chomp $request{CC}; if ($entity->parts > 0){ for ($i=0; $i<$entity->parts; $i++){ $subentity = $entity->parts($i); if (($subentity->mime_type =~ m/text\/html/i) || ($subentity->mime_type =~ m/text\/plain/i)){ $body = join "", @{$subentity->body}; next; } #Multipart/Alternative elsif ($subentity->mime_type =~ m/multipart\/alternative/i){ $body = join "", @{$subentity->body}; @body = split /------=_NextPart_\S*\n/, $body; $body = $body[1]; $body =~ s/^Content-Type.*Content-Transfer-Encoding.*?\n+//is; next; } } } else { print("No body text."); } $body = &untag($body); #remove extra space $body =~ tr/ //s; $body =~ s/\n+ +\n//g; $body =~ s/\n\n{2,}//g; print($body); sub untag { local $_ = $_[0] || $_; # ALGORITHM: # find < , # comment , # or comment , # or one of the start tags which require correspond # end tag plus all to end tag # or if \s or =" # then skip to next " # else [^>] # > s{ < # open tag (?: # open group (A) (!--) | # comment (1) or (\?) | # another comment (2) or (?i: # open group (B) for /i ( TITLE | # one of start tags SCRIPT | # for which APPLET | # must be skipped OBJECT | # all content STYLE # to correspond ) # end tag (3) ) | # close group (B), or ([!/A-Za-z]) # one of these chars, remember in (4) ) # close group (A) (?(4) # if previous case is (4) (?: # open group (C) (?! # and next is not : (D) [\s=] # \s or "=" ["`'] # with open quotes ) # close (D) [^>] | # and not close tag or [\s=] # \s or "=" with `[^`]*` | # something in quotes ` or [\s=] # \s or "=" with '[^']*' | # something in quotes ' or [\s=] # \s or "=" with "[^"]*" # something in quotes " )* # repeat (C) 0 or more times | # else (if previous case is not (4)) .*? # minimum of any chars ) # end if previous char is (4) (?(1) # if comment (1) (?<=--) # wait for "--" ) # end if comment (1) (?(2) # if another comment (2) (?<=\?) # wait for "?" ) # end if another comment (2) (?(3) # if one of tags-containers (3) ]*)? # skip junk to ">" ) # end if (3) > # tag closed }{}gsx; # STRIP THIS TAG return $_ ? $_ : ""; }