in reply to extract email addresses

use strict; my $type = shift || die "usage: filetype [directory]\n"; my $dir = shift || ""; my $mail = qr{\W*(\.*(?:\w+|-)+\.*\@\.*(?:\w+|-)+(?:\.\w+)+)\W*}; my @files = glob("$dir\*.$type"); my $mails; my $s_time = time; for( @files ){ open FH, "$_" or die "can't open $_\n"; my @cont = <FH>; close FH; for( @cont ){ if( /$mail/go ){ $mails->{$1} or $mails->{$1} = 1 } } } my $e_time = time; if( keys %$mails ){ print "Total time: ", $e_time - $s_time, "\n"; print "Total email addresses: ", scalar keys %$mails, "\n\n"; print "$_\n" for keys %$mails; }else{ print "No email address found\n" }


Replies are listed 'Best First'.
Re^2: extract email addresses
by johnajb (Novice) on Feb 19, 2005 at 01:13 UTC
    all email addresses will be formatted like so.
    <smtp:"emailaddress">
    could be something.something.something@something.something.something.com
    but it wil always be in the brackets with smtp: in it.
      my $text = '<smtp:something.something.something@something.something.so +mething.com>'; my @addresses = $text =~ m!<smtp:(.*@.*)>!g; print "$_\n" foreach @addresses;
      But i still prefer my Email::Find solution; then you will be safe! <edit>miss-spelling fixed</edit>
      It does not matter - this regular expression takes care:
      $mail_reg = qr{\W*(\.*(?:\w+|-)+(?:\.\w+|-)*\@\.*(?:\w+|-)+(?:\.\w+)+ +)\W*}; $mail_1 = '<smtp:"email.address@something.something.something.com"> +'; $mail_2 = '<smtp:emailaddress@something.something.something.com>'; $mail_1 =~ m#$mail_reg# and print $1, $/; $mail_2 =~ m#$mail_reg# and print $1, $/; # which outputs: # email.address@something.something.something.com # emailaddress@something.something.something.com


        A reply falls below the community's threshold of quality. You may see it by logging in.