Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl-Sensitive Sunglasses
 
PerlMonks  

comment on

( [id://3333]=superdoc: print w/replies, xml ) Need Help??
#!/usr/bin/perl -w use Time::Local; use strict; my $mbox = shift || "/home/lukas/mbox"; my $outputdir = shift || "/home/lukas/tmp/test"; my %monthmap = ( Jan => 1, Feb => 2, Mar => 3, Apr => 4, May => 5, Jun => 6, Jul => 7, Aug => 8, Sep => 9, Oct => 10, Nov => 11, Dec => 12 ); my %mails; &read_mbox(); &print_html(); &generate_index(); #### # Generate a HTML file for each email. sub print_html { my $count = 1; for (sort { $mails{$a}->{date} <=> $mails{$b}->{date} } keys %mail +s) { # warn "WARNING: overwriting $outputdir/mail$count.html\n" #if (-f "$outputdir/mail$count.htm +l"); open HTML, "> $outputdir/mail$count.html" or die "Couldn't open $outputdir/mail$count.html: +$!\n"; my $html = qq(<html> <head> <title>$mails{$_}->{subject}</title> </head> <body bgcolor="#ffffff"> <center> <h1>$mails{$_}->{subject}</h1> </center> <p align="center"><small>[ <a href="index.html#$count">thread< +/a> | <a href="date.html#$count">date</a> | <a href="subject.html#$co +unt">subject</a> | <a href="author.html#$count">author</a> ]</small>< +/p> <hr> ); my $date = scalar localtime($mails{$_}->{date}); my $from = &html_escape($mails{$_}->{from}); my $body = &html_escape($mails{$_}->{body}); $html .= qq( <b>From:</b> $from<br> <b>Subject:</b> $mails{$_}->{subject}<br> <b>Date:</b> $date<br> <hr> $body <hr> <p align="center"><small>[ <a href="index.html#$count">thread< +/a> | <a href="date.html#$count">date</a> | <a href="subject.html#$co +unt">subject</a> | <a href="author.html#$count">author</a> ]</small>< +/p> </body> </html> ); print HTML $html; close HTML; $mails{$_}->{html} = "mail$count.html"; $count++; } } #### # Generate the index files. sub generate_index { my $countmsg = scalar keys %mails; # Sorted by thread. open INDEX, "> $outputdir/index.html" or die "Couldn't open $outpu +tdir/index.html: $!\n"; my $html = qq(<html> <head> <title>Mailbox</title> </head> <body bgcolor="#ffffff"> <center><h1>Mailbox</h1></center> <p align="center"> <b>$countmsg messages.</b><br> Ordered by thread.<br> <small>Order by [ <a href="date.html">date</a> | <a href="subj +ect.html">subject</a> | <a href="author.html">author</a> ].</small> </p> <hr> <ul> ); for (sort { $mails{$a}->{date} <=> $mails{$b}->{date} } keys %mail +s) { next if $mails{$_}->{seen_thread}; my $date = scalar localtime($mails{$_}->{date}); my $from = &strip_email($mails{$_}->{from}); my $anchor = $mails{$_}->{'html'}; $anchor =~ s/mail(\d+)\.html/$1/; $html .= qq| <li><b><a href="$mails{$_}->{'html'}">$mails{$_}->{'subjec +t'}</a></b> <i><small><a name="$anchor">$from ($date)</a></small></i> +</li> |; $mails{$_}->{seen_thread}++; $html .= &check_replies($_); } $html .= qq( </ul> <hr> </body> </html> ); print INDEX $html; close INDEX; # Sorted by date. open DATE, "> $outputdir/date.html" or die "Couldn't open $outputdir/date.html: $! +\n"; $html = qq(<html> <head> <title>Mailbox</title> </head> <body bgcolor="#ffffff"> <center><h1>Mailbox</h1></center> <p align="center"> <b>$countmsg messages.</b><br> Ordered by date.<br> <small>Order by [ <a href="index.html">thread</a> | <a href="s +ubject.html">subject</a> | <a href="author.html">author</a> ].</small +> </p> <hr> <ul> ); for (sort { $mails{$a}->{date} <=> $mails{$b}->{date} } keys %mail +s) { my $date = scalar localtime($mails{$_}->{date}); my $from = &strip_email($mails{$_}->{from}); my $anchor = $mails{$_}->{'html'}; $anchor =~ s/mail(\d+)\.html/$1/; $html .= qq| <li><b><a href="$mails{$_}->{'html'}">$mails{$_}->{'subjec +t'}</a></b> <i><small><a name="$anchor">$from ($date)</a></small></i> +</li> |; } $html .= qq( </ul> <hr> </body> </html> ); print DATE $html; close DATE; # Sorted by subject. open SUBJECT, "> $outputdir/subject.html" or die "Couldn't open $outputdir/subject.html: $!\ +n"; $html = qq(<html> <head> <title>Mailbox</title> </head> <body bgcolor="#ffffff"> <center><h1>Mailbox</h1></center> <p align="center"> <b>$countmsg messages.</b><br> Ordered by subject.<br> <small>Order by [ <a href="index.html">thread</a> | <a href="d +ate.html">date</a> | <a href="author.html">author</a> ].</small> </p> <hr> <ul> ); foreach my $mail (sort { lc $mails{$a}->{clean_subject} cmp lc $ma +ils{$b}->{clean_subject} } keys %mails) { next if $mails{$mail}->{seen_subject}; $html .= qq( <li><b>$mails{$mail}->{clean_subject}</b></li> <ul> ); foreach (keys %mails) { if ($mails{$_}->{clean_subject} eq $mails{$mail}->{clean_s +ubject}) { my $date = scalar localtime($mails{$_}->{date}); my $from = &strip_email($mails{$_}->{from}); my $anchor = $mails{$_}->{'html'}; $anchor =~ s/mail(\d+)\.html/$1/; $html .= qq| <li><a href="$mails{$_}->{html}">$from</a> <small><i>< +a name="$anchor">($date)</a></i></small></a></li> |; $mails{$_}->{seen_subject}++; } } $html .= "</ul>\n"; } $html .= qq( </ul> <hr> </body> </html> ); print SUBJECT $html; close SUBJECT; # Sorted by author open AUTHOR, "> $outputdir/author.html" or die "Couldn't open $outputdir/author.html: $!\n +"; $html = qq(<html> <head> <title>Mailbox</title> </head> <body bgcolor="#ffffff"> <center><h1>Mailbox</h1></center> <p align="center"> <b>$countmsg messages.</b><br> Ordered by author.<br> <small>Order by [ <a href="index.html">thread</a> | <a href="d +ate.html">date</a> | <a href="subject.html">subject</a> ].</small> </p> <hr> <ul> ); foreach my $mail (sort { lc $mails{$a}->{from} cmp lc $mails{$b}-> +{from} } keys %mails) { next if $mails{$mail}->{seen_author}; my $from = &html_escape($mails{$mail}->{from}); $html .= qq( <li><b>$from</b></li> <ul> ); foreach (keys %mails) { if ($mails{$mail}->{from} eq $mails{$_}->{from}) { my $date = scalar localtime($mails{$_}->{date}); my $anchor = $mails{$_}->{'html'}; $anchor =~ s/mail(\d+)\.html/$1/; $html .= qq| <li><a href="$mails{$_}->{html}">$mails{$_}->{subject} +</a> <small><i><a name="$anchor">($date)</a></i></small></a></li> |; $mails{$_}->{seen_author}++; } } $html .= "</ul>\n"; } print AUTHOR $html; close AUTHOR; $html .= qq( </ul> <hr> </body> </html> ); } #### # Recursive subroutine the check for replies. sub check_replies { my $id = shift; my $html; $html = "<ul>\n"; foreach (sort { $mails{$a}->{date} <=> $mails{$b}->{date} } keys % +mails) { next if $_ eq $id; next unless $mails{$_}->{refs}; next if $mails{$_}->{seen_thread}; if ($mails{$_}->{refs} eq $id) { my $date = scalar localtime($mails{$_}->{date}); my $from = &strip_email($mails{$_}->{from}); my $anchor = $mails{$_}->{'html'}; $anchor =~ s/mail(\d+)\.html/$1/; $html .= qq| <a name="$anchor"></a> <li><b><a href="$mails{$_}->{html}">$mails{$_}->{subje +ct}</a></b> <i><small><a name="anchor">$from ($date)</a></small></i>< +/li> |; $mails{$_}->{seen_thread}++; $html .= &check_replies($_); } } $html .= "</ul>\n"; return $html eq "<ul>\n</ul>\n" ? '' : $html; } #### # Beautify the output, create links of appropriate tags. sub html_escape { my $thing = shift; $thing =~ s/</&lt;/g; $thing =~ s/>/&gt;/g; $thing =~ s/"/&quot;/g; $thing =~ s/\n/<br>/g; $thing =~ s!\b([-\w+.]+\@[-\w+.]+)\b!<a href="mailto:$1">$1</a>!g; $thing =~ s!\b(https?://[-\w?&/.+]+)\b!<a href="$1"></a>!g; return $thing; } #### # Strip the email address sub strip_email { my $original = shift; my ($email) = $original =~ m/\b<[-\w.]+\@[-\w.]+>\b/; $original =~ s/<.*>//; return $original ? $original : $email; } #### # Read in the mailbox file and generate the data structure. sub read_mbox { # This will be our message container. my $current; # This indicates, that the last line was blank, initially set to t +rue, # so we can parse the first mail correctly. my $blank = 1; open MBOX, $mbox or die "Couldn't open mailbox $mbox: $!\n"; while (<MBOX>) { # There was a blank line before, and this line looks like the +beginning # of a new mail, so we need to take some action. if ($blank && /^From .*\d{4}$/) { # Save the message that we've parsed before (if there was +one). $mails{$current->{message_id}} = $current if scalar keys %{$current}; # Create a new container for this message. $current = {}; # Set the blank line to zero. $blank = 0; # We're still in the header part, so we save some. } elsif (!$blank && /^From: (.*)/i) { my $from = $1; $from =~ s/"//g; $current->{from} = $from; } elsif (!$blank && /^Subject: (.*)/i) { $current->{subject} = $1; my $clean_subject = $1; $clean_subject =~ s/Re: (.*)/$1/i; $current->{clean_subject} = $clean_subject; } elsif (!$blank && /^Message-Id: (.*)/i) { $current->{message_id} = $1; } elsif (!$blank && /^Date: (.*)/) { $current->{date} = parsedate($1); warn "Could parse date: $!\n" unless $current->{date}; } elsif (!$blank && /^(?:References|In-Reply-To): (<.+>)/i) { $current->{refs} = @{ [ split(/ /, $1) ] }[-1]; # There was a blank line before, but it wasn't catched by the +if- # statement above, so it must be the message body. } elsif ($blank) { $current->{body} .= $_; } # Aha, we have a blank line. This could've been the end of the + header. $blank = 1 if /^$/; } close MBOX; } sub parsedate { my $date = shift; # print $date, "\n"; my ($wday, $mday, $mon, $year, $time, $hrs, $min, $sec); if ($date =~ /^\d\d?\s/) { ($mday, $mon, $year, $time) = split(/ /, $date); } elsif ($date =~ /^\w{3},\s/) { ($wday, $mday, $mon, $year, $time) = split(/,?\s+/, $date); } ($hrs, $min, $sec) = split(/:/, $time); $mon = $monthmap{$mon}; $mon--; $year -= 1900; return timelocal($sec, $min, $hrs, $mday, $mon, $year); }

In reply to PMail by le

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Are you posting in the right place? Check out Where do I post X? to know for sure.
  • Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
    <code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
  • Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
  • Want more info? How to link or How to display code and escape characters are good places to start.
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others avoiding work at the Monastery: (8)
As of 2024-04-23 10:02 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found