Category: E-Mail Programs
Author/Contact Info josh@grenekatz.org
Description:

Creates a white-list of people that are allowed to send you e-mail because they are in your inbox or you have already talked to them via e-mail. This program culls your address book, sent mail and anything else you give to it for e-mail addresses.

Alter your procmailrc to filter for viruses, the whitelist and then your spam folder.

# SAMPLE ~/.procmailrc
MAILDIR=$HOME/.Mail
PMDIR=$HOME/.procmail

# Known-good traffic because it comes from a mailing list
INCLUDERC=$PMDIR/rc.mailinglists

# Filter out virus traffic - see
# http://www.panix.com/~dman/vdoms/parve/pub/procmail/virussnag.rc
INCLUDERC=$PMDIR/rc.virus

# Filter out known spam
INCLUDERC=$PMDIR/rc.spam

# Include the whitelist so that everything else that hasn't already be
+en accounted for
# can either be moved to your Inbox or ...
INCLUDERC=$PMDIR/rc.green

# Send everything else to a spam inbox
:0:
spam/ archive/spam/

DEFAULT=$MAILDIR/spam

Alter your crontab so that this script is run once a day so it keeps your whitelist up to date with your e-mail.

# SAMPLE crontab entry
0 0 * * * $HOME/.procmail/write_greenlist

Executable script

#!/home/josh/perl5.8.3/bin/perl
use strict;
use warnings;
use File::Find 'find';
use File::Slurp 'read_file';
use Email::Find 'find_emails';
use vars qw( $WHITELIST $PROCMAIL_ACTION $ALIASES $HEADERS_TO_SEARCH
            @MAIL_TO_CULL_FOR_ADDRESSES $LOCAL_ADDRESS $EXCLUDE );
#use Data::Dumper;
#sub test ($) { print Dumper ${$_[0]};print "\n"; $_[0] }

# The whitelist will be written to this file. You must include this fi
+le in
# your ~/.procmailrc file somewhat like this:
#
# INCLUDERC=$HOME/.procmail/rc.green
$WHITELIST = "$ENV{HOME}/.procmail/rc.green";

# This is the procmail action that will be taken for any e-mails
# that are matched by this rule.
$PROCMAIL_ACTION  = "Inbox/ archive/Inbox/";

# glob() will pull in any .aliases files for your address book
$ALIASES = "$ENV{'HOME'}/.aliases*";

# File::Find will search everything here for addresses to put into the
# white-list.
@MAIL_TO_CULL_FOR_ADDRESSES = ( map( "$ENV{'HOME'}/.Mail/$_",
                                 "Sent/",
                                 map( "archive/$_/",
                                     qw(Uptown
                                        MTM
                                        greens
                                        morris
                                        mrf) ) ) );

# This is a regular expression to decide what the local address is. Wh
+en
# mail is searched for likely addresses, only things that do not match
# this pattern will be included.
$LOCAL_ADDRESS = qr/josh.{1,3}(?:lavendergreens|mngreens|grenekatz|gre
+entechnologist)\.org|jjore\@cpan\.org/i;

# Examine only lines that match this signature. This prevents garbage 
+like
# message-id headers from getting in.
$HEADERS_TO_SEARCH = qr/^(?:To|From|Cc|Bcc): /;

$EXCLUDE = qr/confirm|subscribe|request|acceptsub.+yahoogroups\.com/;

exit main( @ARGV );

sub main
{
    open my $out, ">", $WHITELIST
       or die "Can't open $WHITELIST for writing: $!";
    
    print( $out
          ":0:\n"
          . join( "",
                 ( map "* 1^0 $_\n",
                   get_addr() ) )
          . $PROCMAIL_ACTION 
          . "\n" );
    
    return 0;
}

sub get_addr
{
    map unpack( "N/a*", $_ ),
    sort { $b cmp $a }
    map pack( "N/a*", $_ ),
    unique( grep /\w[^\@]*\@.+\w/,
           map +(lc() =~ /(\S+)/g)[0],
           do { map /\<(.+?[^.>])\>/g,
               grep !/^#/,
               map read_file( $_ ),
               glob $ALIASES },
           do {
              my @sent_to;
              find( sub {
                  return 1 unless -f and -r;
                  
                  # use & to override the prototype the author of
                  # find_emails stupidly used. :-(
                  &find_emails( \ join( "",
                                     grep /$HEADERS_TO_SEARCH/ && !/$E
+XCLUDE/,
                                     split /^/,
                                     read_file( $_ ) ),
                              sub {
                                  local $_ = $_[1];
                                  return if /$LOCAL_ADDRESS/;
                                  my $lhs = (/^([^\@]+)@/)[0];
                                  my $num = () = $lhs =~ /(\d)/g;
                                  return if ( $num / length( $lhs ) ) 
+> .5;
                                  push @sent_to, $_;
                              } );
              }, @MAIL_TO_CULL_FOR_ADDRESSES );
              @sent_to; } );
}

sub unique
{
    my %c;
    return grep !$c{$_}++, @_;
}