Well I adjusted the code a bit with a check to upload every 10 records, instead of just building a huge array first, which I was doing. Now it runs fine, and on a small test of 123 messages it worked fine. I will continue to scal up to see how it performs. Gradually increasing the number of messages added to the local array before uploading to the db. I really want to keep the DB calls to a minimum, but if this is what it takes to make it run I'm not worried about it.
Thanks for all your insight. And of course if you read this and say, "Hey you could have just done this..." Feel free to pick me apart.
Ketema
#Modules
use strict;
use Net::POP3;
use MIME::Parser;
use Shell;
use DBI;
use HTTP::Date;
use MessageData; #Hand written data structure object to hold Message
+data
#use XML::Parser; #A perl module for parsing XML documents
#use XML::Stream; #Creates and XML Stream connection and parses ret
+urn data
#use XML::Writer; #thinking about implementing XML in the future to
+ avoid character escape problems.
#use IO::File;
#use Mail::IMAPClient; In Future for moving messages around in the sto
+re
#MIME Tools Settings
MIME::Tools->debugging(0);
MIME::Tools->quiet(0);
my $host = "somepopserver";
my $port = "110";
my $user = "someuser";
my $pass = "123456";
my $dbUser = "user";
my $dbPass = "pass";
my $knetUser = "user";
my $knetPass = "pass";
my $pop_server = Net::POP3->new($host); #Use POP3 Protocol
#my $imap_server = Mail::IMAPClient->new(Server => $host,User => $user
+, Password => $pass) or die "IMAP connect Failed $@\n"; #use IMAP pro
+tocol
my $mime_parser = new MIME::Parser;
$mime_parser->output_to_core(1); # keep in memory
$mime_parser->decode_headers(1);#Variables
my $fdnnt40 = DBI->connect('DBI:Sybase:server=fdnnt40', $dbUser, $dbPa
+sswd);
my $tema1 = DBI->connect('DBI:Sybase:server=tema-1',$knetUser,$knetPas
+s);
#Win32 Only
#my $DSN = "Provider=SQLOLEDB.1;Integrated Security=SSPI;Persist Secur
+ity Info=False;Initial Catalog=ketemanetDB;Data Source=tema-1";
#my $localConn = Win32::OLE->new("ADODB.Connection"); #connection to l
+ocal SQL Server
#$localConn->Open($DSN);
$pop_server->login($user, $pass) or die "Login Failed @{[$pop_server->
+message]}\n";
my $messages = $pop_server->list();
my $msgData = new MessageData(); # make new message data object
my @msgArray = [];
my $indexCounter = 0;
#this subroutine goes through a message until
# all its body parts are dissected and printed
sub getAttachments {
my $numAttachments = $_[0]->parts;
print "This message has $numAttachments attachments\n";
for (my $i = 0; $i < $numAttachments; $i++) {
my $attachment = $_[0]->parts($i);
#print the attachment preamble
my $lineCounter = 0;
my $att_preamble = $attachment->preamble;
foreach (@$att_preamble) {
$msgData->{"attachmentdata"} = $msgData->{"attachm
+entdata"}."$$att_preamble[$lineCounter++]\n";
}
$lineCounter = 0;
#print the attachment header
my $att_header = $attachment->head;
$msgData->{"attachmentdata"} = $msgData->{"attachmentd
+ata"}.$att_header->as_string;
#print the body
my $att_body = $attachment->bodyhandle;
if (defined($att_body)) {
$msgData->{"attachmentdata"} = $msgData->{"attachm
+entdata"}.$att_body->as_string;
#print $msgData->{"attachmentdata"}."\n";
$msgData->{"attachmentdata"} =~ s/'/''/mg;
#$msgData->{"attachmentdata"} =~ s/&/$ordValue/mg;
}
if ($attachment->parts() > 0) {
getAttachments($attachment)
}
}
print "Exiting getAttachments Subroutine\n";
}
sub upLoad {
foreach my $record (@msgArray) {
#my $xmldoc = "<Mail>
#<msgid>$record->{"msgid"}</msgid>
#<subject>$record->{"subject"}</subject>
#<sourceip>$record->{"sourceip"}</sourceip>
#<from>$record->{"from"}</from>
#<to>$record->{"to"}</to>
#<allheaders>$record->{"allheaders"}</allheaders>
#<preamble>$record->{"preamble"}</preamble>
#<body>$record->{"body"}</body>
#<receiveddate>$record->{"receiveddate"}</receiveddate
+>
#<attachmentdata>$record->{"attachmentdata"}</attachme
+ntdata>
#</Mail>";
#$xmlRecord = $xmlRecord.$xmldoc;
my $executeSQL = "INSERT INTO FDNMail (msgid,subject,sourceip,
+sourcednsname,[from],[to],allheaders,preamble,body,receiveddate,attac
+hmentdata,
custnum,[X-AOLIP]) VALUES ('".$record->{"msgid"}."','"
+.$record->{"subject"}."','".$record->{"sourceip"}."','".$record->{"so
+urcednsname"}.
"','".$record->{"from"}."','".$record->{"to"}."','".$r
+ecord->{"allheaders"}."','".$record->{"preamble"}."','".$record->{"bo
+dy"}.
"','".$record->{"receiveddate"}."','".$record->{"attac
+hmentdata"}."','".$record->{"custnum"}."','".$record->{"xaolip"}."')"
+;
my $action = $tema1->prepare($executeSQL);
eval($action->execute);
if ($@) {
print "$_\n";
}
}
}
#Loop through Each Message, Parse it, Store it, and Figure out who sen
+t it
if( $messages ) {
#get the key ids of messages in the hash
my $numMessages = keys %$messages;
print "there are $numMessages total messages:\n";
while(my ($msgid, $size) = each %$messages) {
my $fh = $pop_server->getfh($msgid);
my $mime_entity = $mime_parser->parse($fh);
#my $ordValue = ord("&"); Need this when dealing with XML
#print the preamble
my $lineCounter = 0;
my $preamble = $mime_entity->preamble;
foreach (@$preamble) {
$msgData->{"preamble"} = $msgData->{"preamble"} . "$$pream
+ble[$lineCounter++]";
$msgData->{"preamble"} =~ s/'/''/mg;
#$msgData->{"preamble"} =~ s/&/$ordValue/mg;
#$msgData->{"preamble"} =~ s/</</mg;
#$msgData->{"preamble"} =~ s/>/>/mg;
}
$lineCounter = 0;
#print the header
my $header = $mime_entity->head;
$msgData->{"allheaders"} = $header->as_string;
$msgData->{"allheaders"} =~ s/'/''/mg;
#$msgData->{"allheaders"} =~ s/&/$ordValue/mg;
#$msgData->{"allheaders"} =~ s/</</mg;
#$msgData->{"allheaders"} =~ s/>/>/mg;
$msgData->{"msgid"} = $header->get('Message-Id');
$msgData->{"msgid"} =~ s/'/''/mg;
#$msgData->{"msgid"} =~ s/&/$ordValue/mg;
#$msgData->{"msgid"} =~ s/</</mg;
#$msgData->{"msgid"} =~ s/>/>/mg;
$msgData->{"subject"} = $header->get('Subject');
$msgData->{"subject"} =~ s/'/''/mg;
chop($msgData->{"subject"});
#$msgData->{"subject"} =~ s/&/$ordValue/mg;
#$msgData->{"subject"} =~ s/</</mg;
#$msgData->{"subject"} =~ s/>/>/mg;
$msgData->{"from"} = $header->get('From');
$msgData->{"from"} =~ s/'/''/mg;
#$msgData->{"from"} =~ s/&/$ordValue/mg;
#$msgData->{"from"} =~ s/</</mg;
#$msgData->{"from"} =~ s/>/>/mg;
$msgData->{"to"} = $header->get('To');
$msgData->{"to"} =~ s/'/''/mg;
#$msgData->{"to"} =~ s/&/$ordValue/mg;
#$msgData->{"to"} =~ s/</</mg;
#$msgData->{"to"} =~ s/>/>/mg;
$msgData->{"receiveddate"} = HTTP::Date::time2iso(str2time($he
+ader->get('Date')));
#print the body
my $body = $mime_entity->bodyhandle;
if (defined($body)) {
$msgData->{"body"} = $body->as_string;
$msgData->{"body"} =~ s/'/''/mg;
#$msgData->{"body"} =~ s/&/$ordValue/mg;
#keep this check here also incase the format of the aol me
+ssages changes
if ($msgData->{"subject"} eq "Client TOS Notification" &&
+$msgData->{"body"} =~ /X-AOL-IP: [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.
+[0-9]{1,3}/m){
$msgData->{"xaolip"} = $&;
}
}
#get any attachments and print those
if ($mime_entity->parts() > 0) {
getAttachments($mime_entity);
}
#after we have all the attachmetn data, many of the messages f
+rom AOL appear inthe body, but actually have the offender IP in the
#attachment data, find it, and put it in its own column
if ($msgData->{"subject"} eq "Client TOS Notification" && $msg
+Data->{"attachmentdata"} =~ /X-AOL-IP: [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{
+1,3}\.[0-9]{1,3}/m){
my $aolLine = $&;
$aolLine =~ /[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3
+}/m;
$msgData->{"xaolip"} = $&;
}
#figure out the source IP of the message sender
#first we will take a look at where the message actually came
+from, then we attempt to determine what FDN customer the message is t
+alking about
#messages with an X-AOL-IP field will just be looked up, other
+s we have to search the body of the message for an FDN IP. We wil ta
+ke teh first match until
#better rules are formulated to determine whom the message is
+taking about. I can implement rules for MyNetwatchman in addition to
+ the aol reports.
my $firstSender = $header->get('Received', $header->count('Rec
+eived') - 1);
if($firstSender =~ /([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]
+{1,3})/m) {
$msgData->{"sourceip"} = $1;
}
elsif ($firstSender =~ /from.*by [\w\-\.]*[\s\S]/m) { #reading
+ the first Received Header line
my $sender = $&;
my @fromBy = split(/ /,$sender);
my $firstHop = hostx( substr($fromBy[1],0,length($fromBy[1
+]) - 2) );
my $secondHop = hostx( $fromBy[2]);
if ($firstHop =~ /([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-
+9]{1,3})/m) {
$firstHop = $&;
my @octets = split(/\./, "$&");
my $custNum = "";
if ($octets[0] == '216' && $octets[1] == '199') {
my $commandString = "SELECT netaddr1,netaddr2,neta
+ddr3,netaddr4,assignedto,rqst_no
FROM ( SELECT *
FROM dbo.IPMAP
WHERE (netaddr3 = '$octets[2]')
)as temptable
WHERE netaddr4 = '$octets[3]'";
my $commandString2 = "SELECT top 1 netaddr1,netadd
+r2,netaddr3,netaddr4,assignedto,rqst_no
FROM ( SELECT *
FROM dbo.IPMAP
WHERE (netaddr3 = '$octets[2]')
)as temptable
WHERE netaddr4 <= '$octets[3]'
ORDER BY netaddr4 desc";
my $action = $fdnnt40->prepare($commandString);
$action->execute;
my @rows = $action->fetchrow_array;
die "Open Failed: $@" if $@;
if (@rows == 0) {
$action = $fdnnt40->prepare($commandString2);
$action->execute;
@rows = $action->fetchrow_array;
if (@rows == 0) {
$custNum = "NOT A CUSTOMER IP";
}
else {
my $fieldnum = 0;
$custNum = $rows[4];
}
}
else {
$custNum = $rows[4];
}
}
$msgData->{"sourceip"} = $&;
$msgData->{"sourcednsname"} = substr($fromBy[1],0,leng
+th($fromBy[1]) - 2);
$msgData->{"custnum"} = $custNum;
}
elsif ($secondHop =~ /([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\
+.[0-9]{1,3})/m) {
$secondHop = $&;
my @octets = split(/\./, "$&");
my $custNum = "";
if ($octets[0] == '216' && $octets[1] == '199') {
my $commandString = "SELECT netaddr1,netaddr2,neta
+ddr3,netaddr4,assignedto,rqst_no
FROM ( SELECT *
FROM dbo.IPMAP
WHERE (netaddr3 = '$octets[2]')
)as temptable
WHERE netaddr4 = '$octets[3]'";
my $commandString2 = "SELECT top 1 netaddr1,netadd
+r2,netaddr3,netaddr4,assignedto,rqst_no
FROM ( SELECT *
FROM dbo.IPMAP
WHERE (netaddr3 = '$octets[2]')
)as temptable
WHERE netaddr4 <= '$octets[3]'
ORDER BY netaddr4 desc";
my $action = $fdnnt40->prepare($commandString);
$action->execute;
my @rows = $action->fetchrow_array;
die "Open Failed: $@" if $@;
if (@rows == 0) {
$action = $fdnnt40->prepare($commandString2);
$action->execute;
@rows = $action->fetchrow_array;
if (@rows == 0) {
$custNum = "Invalid customer IP";
}
else {
my $fieldnum = 0;
$custNum = $rows[4];
}
}
else {
$custNum = $rows[4];
}
}
$msgData->{"sourceip"} = $&;
$msgData->{"sourcednsname"} = $fromBy[2];
$msgData->{"custnum"} = $custNum;
}
}
else {
$msgData->{"sourceip"} = "fraudsuspect";
$msgData->{"custnum"} = "Invalid customer IP";
}
if ($msgData->{"xaolip"} ne "" && $msgData->{"custnum"} eq "In
+valid customer IP") {
my @octets = split(/\./, $msgData->{"xaolip"});
my $custNum = "";
if ($octets[0] == '216' && $octets[1] == '199') {
my $commandString = "SELECT netaddr1,netaddr2,netaddr3
+,netaddr4,assignedto,rqst_no
FROM ( SELECT *
FROM dbo.IPMAP
WHERE (netaddr3 = '$octets[2]')
)as temptable
WHERE netaddr4 = '$octets[3]'";
my $commandString2 = "SELECT top 1 netaddr1,netaddr2,n
+etaddr3,netaddr4,assignedto,rqst_no
FROM ( SELECT *
FROM dbo.IPMAP
WHERE (netaddr3 = '$octets[2]')
)as temptable
WHERE netaddr4 <= '$octets[3]'
ORDER BY netaddr4 desc";
my $action = $fdnnt40->prepare($commandString);
$action->execute;
my @rows = $action->fetchrow_array;
die "Open Failed: $@" if $@;
if (@rows == 0) {
$action = $fdnnt40->prepare($commandString2);
$action->execute;
@rows = $action->fetchrow_array;
if (@rows == 0) {
$custNum = "Invalid customer IP";
}
else {
my $fieldnum = 0;
$custNum = $rows[4];
}
}
else {
$custNum = $rows[4];
}
}
$msgData->{"custnum"} = $custNum;
}
#add this hash to array of items to be uploaded to database, s
+o we don't have multiple calls to the DB
$msgArray[$indexCounter++] = $msgData;
#added to control amount of messages in local array.
+Explicitly destroying array and recreating it.
undef($msgData);
if ($indexCounter >= 10) {
upLoad();
splice(@msgArray,0,scalar(@msgArray));
print "Uploaded $indexCounter messages to database\n";
$indexCounter = 0;
}
print $indexCounter."\n";
}
}
$pop_server->quit();
#now we should have an array of MessageData Items. let's build our si
+ngle Insert statement and make one call to the database to upload
#add this message as a xml record
#will perform this later. For now Insert Each Record individually as
+text data.
#my $xmlRecord = "'<ROOT>";
#$xmlRecord = $xmlRecord."</ROOT>'";
#win32only
#eval ($localConn->Execute("addMail($xmlRecord)"));
# if (Win32::OLE->LastError()) {
# open DEBUG, ">C:\\Documents and Settings\\Administrator\\My D
+ocuments\\Perl Programs\\popmailbox\\debug.txt";
# print DEBUG "------------------------------------------------
+---------------------------------------\n";
# print DEBUG Win32::OLE->LastError()."\n";
# print DEBUG "------------------------------------------------
+---------------------------------------\n";
# print DEBUG "-----------------END OF SQL STATEMENT-----------
+----------------\n\n";
# print DEBUG "$xmlRecord\n";
# }
#close DEBUG;
print "Message Data uploaded to database";
1
| [reply] [d/l] |
I don't have time right now to look through all your code. I'll spend some time on it later tonight, but for now a quick glance reveals what I consider a major problem. You have many SQL insert and update statements that use outside data, but you don't use placeholders. I haven't looked carefully enough to know for sure if you're vulnerable, but this is a prime candidate for SQL injection attacks.
Here's an example of one of your SQL inserts:
my $executeSQL = "INSERT INTO FDNMail (msgid,subject,sourceip,
+sourcednsname,[from],[to],allheaders,preamble,body,receiveddate,attac
+hmentdata,
custnum,[X-AOLIP]) VALUES ('".$record->{"msgid"}."','"
+.$record->{"subject"}."','".$record->{"sourceip"}."','".$record->{"so
+urcednsname"}. "','".$record->{"from"}."','".$record->
+{"to"}."','".$record->{"allheaders"}."','".$record->{"preamble"}."','
+".$record->{"body"}. "','".$record->{"receiveddate"}."
+','".$record->{"attachmentdata"}."','".$record->{"custnum"}."','".$re
+cord->{"xaolip"}."')";
my $action = $tema1->prepare($executeSQL);
Besides being generally hard to read and somewhat messy, if any of the values from $record contain tainted data, you have a huge vulnerability. I would write that like this:
my %fields = (
msgid => $record->{msgid},
subject => $record->{subject},
sourceip => $record->{sourceip},
sourcednsname => $record->{sourcednsname},
'[from]' => $record->{from},
'[to]' => $record->{to},
allheaders => $record->{allheaders},
preamble => $record->{preamble},
body => $record->{body},
receiveddate => $record->{receiveddate},
attachmentdata => $record->{attachmentdata},
custnum => $record->{custnum},
'[X-AOLIP]' => $record->{xaolip},
);
my $sql = "INSERT"
. " INTO FDNMail (" . join(",", keys %fields) . ")"
. " VALUES (" . join(",", ('?') x keys %fields) . ")";
my $fdnmail_sth = $temal->prepare($sql);
$fdnmail_sth->execute(values %fields);
This has several advantages.
- It's easier to maintain. The whole thing is controlled by the %fields hash, so new fields can be added, or fields can be removed, very easily. There won't be problems counting commas to make sure everything lines up.
- It's safer. Using placeholders eliminates the possiblity of SQL injection attacks, because proper escaping and quoting is done automatically.
- It's generic. The actual insertion code could be abstracted to a subroutine that takes in the DB handle and a hash of fields, making it easy to get these advantages all througout your code. Also, since the quoting and escaping is done by the DB driver, this is portable to any database that DBI supports.
| [reply] [d/l] [select] |