in reply to using Parallel::TaskManager and FileHandles.
#!/usr/bin/perl -w #checks for valid email address #usage validemail <file containing email addresses> use warnings; use strict; use Email::Valid::Loose; use Net::DNS; use Parallel::ForkManager; use Fcntl qw/:flock :seek/; # 20 is the number or threads. Increase at your own risk. # If your box takes a performance hit decrease this number. my $pm=new Parallel::ForkManager(20); my $resolver=Net::DNS::Resolver->new(); my $addrfile = $ARGV[0]; my ($is_valid, $host, $x, @mx, $add, @adds); #custom words that make emails invalid to you my @custom = ( qr /postmaster/i , qr /webmaster/i ); open (EMAILS, "$addrfile"); while (<EMAILS>){ $_ =~ s/\015//; chomp $_; push @adds, $_; } close (EMAILS); #warning, I will delete existing files open (BADADDR, ">badmails") || die; open (GOODADDR, ">goodmails") || die; foreach $add (@adds){ $pm->start and next; foreach $x (@custom){ if ($add =~ $x){ writeaddr(*BADADDR, $add); #address is bad $pm->finish; } } #if email is invalid move on if (!defined(Email::Valid::Loose->address($add))){ writeaddr(*BADADDR, $add); #address is bad $pm->finish; } #if email is valid get domain name $is_valid = Email::Valid::Loose->address($add); if ($is_valid =~ m/\@(.*)$/) { $host = $1; } $is_valid=""; # perform dsn lookup to check domain @mx=mx($resolver, $host); if (@mx) { writeaddr(*GOODADDR, $add); #address is good }else{ writeaddr(*BADADDR, $add); #address is bad } $pm->finish; } $pm->wait_all_children; close (BADADDR); close (GOODADDR); sub writeaddr{ my $FH = $_[0]; my $address = $_[1]; flock $FH, LOCK_EX or die "Flock failed: $!\n"; seek $FH, 0, 2 or die "Seek failed: $!\n"; print $FH "$address\n"; flock $FH, LOCK_UN or die "unFlock failed: $!\n"; }
Comments welcome.
Neil Watson
watson-wilson.ca
|
|---|