sub procXml { my($inFile)=@_; my $triples; my (%countries,%avDetails,%avFiles); my $fsize = -s $inFile; my $fmb=$fsize/1048576; print "PROCESSING: $inFile (".sprintf("%.4f",$fmb)." MB)\n"; my $INFILE; open($INFILE,'<',$inFile); my $xmlString=read_file($inFile); close($INFILE); my $xml_converter = XML::Hash->new(); my $xml_hash; eval { $xml_hash = $xml_converter->fromXMLStringtoHash($xmlString); }; if($@) { print "BAD XML: $inFile\n"; return; } $xmlString = undef; foreach my $outer (@{$xml_hash->{'cyveillance'}->{'inspected_url'}->{'URL'}}) { my $domainName=$outer->{'Domain_Name'}->{'text'}; my $exploitType=$outer->{'Exploit_Type'}->{'text'}; my $inspectedTime=$outer->{'InspectedTime'}->{'text'}; my ($ss,$mm,$hh,$day,$month,$year,$zone) = strptime($inspectedTime); $year+=1900; $month+=1; $month=sprintf("%02d",$month); $hh='00' unless defined $hh; $mm='00' unless defined $mm; $ss='01' unless defined $ss; $inspectedTime="$year-$month-$day"."T"."$hh:$mm:$ss"; my $ip=$outer->{'IP'}->{'text'}; my $exploitDescription=$outer->{'Exploit_Description'}->{'text'}; my $hostName=$outer->{'Host_Name'}->{'text'}; my $referenceUrl=$outer->{'reference_url'}; $ip=defined $ip?$ip eq ''?undef:$ip=~m/^-$|^unknown$/i?undef:$ip:undef; $exploitDescription=defined $exploitDescription?$exploitDescription eq ''?undef:$exploitDescription=~m/^-$|^unknown$/i?undef:$exploitDescription:undef; $hostName=defined $hostName?$hostName eq ''?undef:$hostName=~m/^-$|^unknown$/i?undef:$hostName:undef; $referenceUrl=defined $referenceUrl?$referenceUrl eq ''?undef:$referenceUrl=~m/^-$|^unknown$/i?undef:$referenceUrl:undef; if(ref($outer->{'Binary'}) eq 'ARRAY') { foreach my $binary (@{$outer->{'Binary'}}) { my $fileName=$binary->{'File_Name'}->{'text'}; my $fileURL=$binary->{'Binary_Path'}->{'text'}; my $pestName=$binary->{'Pest_Name'}->{'text'}; my $md5=$binary->{'Hash'}->{'MD5'}->{'text'}; my $fileSize=$binary->{'File_Size'}->{'text'}; $fileName=defined $fileName?$fileName eq ''?undef:$fileName=~m/^-$|^unknown$|^Unidentified Threat$/i?undef:$fileName:undef; $fileURL=defined $fileURL?$fileURL eq ''?undef:$fileURL=~m/^-$|^unknown$|^Unidentified Threat$/i?undef:$fileURL:undef; $pestName=defined $pestName?$pestName eq ''?undef:$pestName=~m/^-$|^unknown$|^Unidentified Threat$/i?undef:$pestName:undef; $pestName=$1 if defined $pestName && $pestName =~ m/Found potentially unwanted program (.*)\./; $md5=defined $md5?$md5 eq ''?undef:$md5=~m/^-$|^unknown$|^Unidentified Threat$/i?undef:$md5:undef; $fileSize=defined $fileSize?$fileSize eq ''?undef:$fileSize=~m/^-$|^unknown$|^Unidentified Threat$/i?undef:$fileSize=~m/^.[0-9]+$/?$fileSize:undef:undef; my $server_domainName=$binary->{'Server_Properties'}->{'Domain_Name'}->{'text'}; my $server_hostName=$binary->{'Server_Properties'}->{'Host_Name'}->{'text'}; my $server_ip=$binary->{'Server_Properties'}->{'IP'}->{'text'}; my $server_ISP=$binary->{'Server_Properties'}->{'ISP_Data'}->{'ISP'}->{'text'}; my $server_numBinaries=$binary->{'Server_Properties'}->{'ISP_Data'}->{'Number_Hosted_Binaries'}->{'text'}; my $server_zipCode=$binary->{'Server_Properties'}->{'ISP_Data'}->{'Zip_Code'}->{'text'} if exists $binary->{'Server_Properties'}->{'ISP_Data'}->{'Zip_Code'}->{'text'}; my $server_city=$binary->{'Server_Properties'}->{'ISP_Data'}->{'City'}->{'text'} if exists $binary->{'Server_Properties'}->{'ISP_Data'}->{'City'}->{'text'}; my $server_region=$binary->{'Server_Properties'}->{'ISP_Data'}->{'Region'}->{'text'} if exists $binary->{'Server_Properties'}->{'ISP_Data'}->{'Region'}->{'text'}; my $server_country=$binary->{'Server_Properties'}->{'ISP_Data'}->{'Country'}->{'text'} if exists $binary->{'Server_Properties'}->{'ISP_Data'}->{'Country'}->{'text'}; my $server_numSitesHosted=$binary->{'Server_Properties'}->{'ISP_Data'}->{'Number_Hosted_Sites'}->{'text'} if exists $binary->{'Server_Properties'}->{'ISP_Data'}->{'Number_Hosted_Sites'}->{'text'}; my $webServer=$binary->{'Server_Properties'}->{'ISP_Data'}->{'Web_Server_info'}->{'text'}; $server_domainName=defined $server_domainName?$server_domainName eq ''?undef:$server_domainName=~m/^-$|^unknown$/i?undef:$server_domainName:undef; $server_hostName=defined $server_hostName?$server_hostName eq ''?undef:$server_hostName=~m/^-$|^unknown$/i?undef:$server_hostName:undef; $server_ip=defined $server_ip?$server_ip eq ''?undef:$server_ip=~m/^-$|^unknown$/i?undef:$server_ip:undef; $server_ISP=defined $server_ISP?$server_ISP eq ''?undef:$server_ISP=~m/^-$|^unknown$/i?undef:$server_ISP:undef; $server_numBinaries=defined $server_numBinaries?$server_numBinaries eq ''?'1':$server_numBinaries=~m/^-$|^unknown$/i?'1':$server_numBinaries=~m/^.[0-9]+$/?$server_numBinaries:'1':'1'; $server_zipCode=defined $server_zipCode?$server_zipCode eq ''?undef:$server_zipCode=~m/^-$|^unknown$/i?undef:$server_zipCode:undef; $server_city=defined $server_city?$server_city eq ''?undef:$server_city=~m/^-$|^unknown$/i?undef:$server_city:undef; $server_region=defined $server_region?$server_region eq ''?undef:$server_region=~m/^-$|^unknown$/i?undef:$server_region:undef; $server_country=defined $server_country?$server_country eq ''?undef:$server_country=~m/^-$|^unknown$/i?undef:$server_country:undef; $server_numSitesHosted=defined $server_numSitesHosted?$server_numSitesHosted eq ''?'1':$server_numSitesHosted=~m/^-$|^unknown$/i?'1':$server_numSitesHosted=~m/^.[0-9]+$/?$server_numSitesHosted:'1':'1'; $webServer=defined $webServer?$webServer eq ''?'unknown':$webServer=~m/^-$|^unknown$/i?'unknown':$webServer:'unknown'; $server_country =~ s/\s/_/g if defined $server_country; my (%avDetections,%threatTypes,%classes); next if !defined $binary->{'Class'}; foreach(keys $binary->{'Class'}) { $classes{$_}=1 if $binary->{'Class'}->{$_}->{'text'} == 1; } foreach(keys $binary->{'Anti-Virus'}) { $avDetections{$_}->{'Signature_Version'}=$binary->{'Anti-Virus'}->{$_}->{'Signature_Version'} unless $binary->{'Anti-Virus'}->{$_}->{'Signature_Version'} eq ''; $avDetections{$_}->{'Engine_Version'}=$binary->{'Anti-Virus'}->{$_}->{'Engine_Version'} unless $binary->{'Anti-Virus'}->{$_}->{'Engine_Version'} eq ''; $avDetections{$_}->{'Threat_Name'}=$binary->{'Anti-Virus'}->{$_}->{'Threat_Name'} unless $binary->{'Anti-Virus'}->{$_}->{'Threat_Name'} eq ''; } foreach(keys $binary->{'Type'}) { $threatTypes{$_}=1 if $binary->{'Type'}->{$_}->{'text'} == 1; } $triples .= qq| .\n| if defined $domainName; $triples .= qq| .\n| if defined $exploitType; $triples .= qq| "$exploitDescription" .\n| if defined $exploitDescription; $triples .= qq| "$inspectedTime"^^ .\n| if defined $inspectedTime; $triples .= qq| .\n| if defined $ip; $triples .= qq| .\n| if defined $hostName; $triples .= qq| .\n| if defined $referenceUrl; $triples .= qq| .\n| if defined $fileName; $triples .= qq| .\n| if defined $fileURL; $triples .= qq| .\n| if defined $md5; $triples .= qq| "$fileSize"^^ .\n| if defined $fileSize; $triples .= qq| .\n| if defined $pestName; $triples .= qq| "$webServer" .\n| if defined $webServer; $triples .= qq| .\n| if defined $server_domainName; $triples .= qq| .\n| if defined $server_hostName; $triples .= qq| .\n| if defined $server_ip; $triples .= qq| "$server_numSitesHosted"^^ .\n| if defined $server_numSitesHosted; $triples .= qq| "$server_numBinaries"^^ .\n| if defined $server_numBinaries; $triples .= qq| "$server_ISP" .\n| if defined $server_ISP; $triples .= qq| "$server_zipCode" .\n| if defined $server_zipCode; $triples .= qq| .\n| if defined $server_city; $triples .= qq| .\n| if defined $server_region; $triples .= qq| .\n| if defined $server_country; $triples .= qq| .\n| unless (!defined $server_country || !defined $server_region) || exists $countries{$server_country}->{'regions'}->{$server_region}; $triples .= qq| .\n| unless (!defined $server_region || !defined $server_city) || exists $countries{$server_country}->{'cities'}->{$server_city}; $triples .= qq| .\n| unless (!defined $server_city || !defined $server_zipCode) || exists $countries{$server_country}->{'zipcodes'}->{$server_zipCode}; $countries{$server_country}->{'regions'}->{$server_region}=1 if defined $server_region && defined $server_country; $countries{$server_country}->{'cities'}->{$server_city}=1 if defined $server_city && defined $server_country; $countries{$server_country}->{'zipcodes'}->{$server_zipCode}=1 if defined $server_zipCode && defined $server_country; $triples .= qq| .\n| if (defined $fileName && defined $pestName) && (!exists $avFiles{$pestName} || $avFiles{$pestName} ne $pestName); $avFiles{$pestName}=$fileName if defined $fileName && defined $pestName; foreach(keys %avDetections) { my $sig=$avDetections{$_}->{'Signature_Version'}; my $eng=$avDetections{$_}->{'Engine_Version'}; my $tn=$avDetections{$_}->{'Threat_Name'}; $tn =~ s/\s/_/g if defined $tn; $triples .= qq| .\n|; $triples .= qq| "$eng" .\n| if defined $eng; $triples .= qq| "$sig" .\n| if defined $sig; $triples .= qq| .\n| if defined $tn; $triples .= qq| .\n| unless !defined $fileName || exists $avDetails{$fileName}->{'avDetection'}->{$_}; $triples .= qq| .\n| unless (!defined $tn || !defined $fileName) || exists $avDetails{$fileName}->{'avThreatName'}->{$tn}; $avDetails{$fileName}->{'avDetection'}->{$_}=1 if defined $fileName; $avDetails{$fileName}->{'avThreatName'}->{$tn}=1 if defined $tn && defined $fileName; $avFiles{$tn}=$fileName if defined $fileName && defined $tn; } foreach(keys %threatTypes) { $triples .= qq| .\n|; $triples .= qq| .\n| unless !defined $fileName || exists $avDetails{$fileName}->{'avThreatType'}->{$_}; $avDetails{$fileName}->{'avThreatType'}->{$_}=1 if defined $fileName; } foreach(keys %classes) { $triples .= qq| .\n|; $triples .= qq| .\n| unless !defined $fileName || exists $avDetails{$fileName}->{'avThreatClass'}->{$_}; $avDetails{$fileName}->{'avThreatClass'}->{$_}=1 if defined $fileName; } $similar{$domainName}='domain' if defined $domainName; $similar{$hostName}='host' if defined $hostName; $similar{$fileName}='file' if defined $fileName; $similar{$pestName}='pest_name' if defined $pestName; $similar{$server_domainName}='domain' if defined $server_domainName; $similar{$server_hostName}='host' if defined $server_hostName; $recordCount++; } } else { my $fileName=$outer->{'Binary'}->{'File_Name'}->{'text'}; my $fileURL=$outer->{'Binary'}->{'Binary_Path'}->{'text'}; my $pestName=$outer->{'Binary'}->{'Pest_Name'}->{'text'}; my $md5=$outer->{'Binary'}->{'Hash'}->{'MD5'}->{'text'}; my $fileSize=$outer->{'Binary'}->{'File_Size'}->{'text'}; $fileName=defined $fileName?$fileName eq ''?undef:$fileName=~m/^-$|^unknown$|^Unidentified Threat$/i?undef:$fileName:undef; $fileURL=defined $fileURL?$fileURL eq ''?undef:$fileURL=~m/^-$|^unknown$|^Unidentified Threat$/i?undef:$fileURL:undef; $pestName=defined $pestName?$pestName eq ''?undef:$pestName=~m/^-$|^unknown$|^Unidentified Threat$/i?undef:$pestName:undef; $pestName=$1 if defined $pestName && $pestName =~ m/Found potentially unwanted program (.*)\./; $md5=defined $md5?$md5 eq ''?undef:$md5=~m/^-$|^unknown$|^Unidentified Threat$/i?undef:$md5:undef; $fileSize=defined $fileSize?$fileSize eq ''?undef:$fileSize=~m/^-$|^unknown$|^Unidentified Threat$/i?undef:$fileSize=~m/^.[0-9]+$/?$fileSize:undef:undef; my $server_domainName=$outer->{'Binary'}->{'Server_Properties'}->{'Domain_Name'}->{'text'}; my $server_hostName=$outer->{'Binary'}->{'Server_Properties'}->{'Host_Name'}->{'text'}; my $server_ip=$outer->{'Binary'}->{'Server_Properties'}->{'IP'}->{'text'}; my $server_ISP=$outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'ISP'}->{'text'}; my $server_numBinaries=$outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'Number_Hosted_Binaries'}->{'text'}; my $server_city=$outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'City'}->{'text'} if exists $outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'City'}->{'text'}; my $server_country=$outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'Country'}->{'text'} if exists $outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'Country'}->{'text'}; my $server_zipCode=$outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'Zip_Code'}->{'text'} if exists $outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'Zip_Code'}->{'text'}; my $server_region=$outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'Region'}->{'text'} if exists $outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'Region'}->{'text'}; my $server_numSitesHosted=$outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'Number_Hosted_Sites'}->{'text'} if exists $outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'Number_Hosted_Sites'}->{'text'}; my $webServer=$outer->{'Binary'}->{'Server_Properties'}->{'ISP_Data'}->{'Web_Server_Info'}->{'text'}; $server_domainName=defined $server_domainName?$server_domainName eq ''?undef:$server_domainName=~m/^-$|^unknown$/i?undef:$server_domainName:undef; $server_hostName=defined $server_hostName?$server_hostName eq ''?undef:$server_hostName=~m/^-$|^unknown$/i?undef:$server_hostName:undef; $server_ip=defined $server_ip?$server_ip eq ''?undef:$server_ip=~m/^-$|^unknown$/i?undef:$server_ip:undef; $server_ISP=defined $server_ISP?$server_ISP eq ''?undef:$server_ISP=~m/^-$|^unknown$/i?undef:$server_ISP:undef; $server_numBinaries=defined $server_numBinaries?$server_numBinaries eq ''?'1':$server_numBinaries=~m/^-$|^unknown$/i?'1':$server_numBinaries=~m/^.[0-9]+$/?$server_numBinaries:'1':'1'; $server_zipCode=defined $server_zipCode?$server_zipCode eq ''?undef:$server_zipCode=~m/^-$|^unknown$/i?undef:$server_zipCode:undef; $server_city=defined $server_city?$server_city eq ''?undef:$server_city=~m/^-$|^unknown$/i?undef:$server_city:undef; $server_region=defined $server_region?$server_region eq ''?undef:$server_region=~m/^-$|^unknown$/i?undef:$server_region:undef; $server_country=defined $server_country?$server_country eq ''?undef:$server_country=~m/^-$|^unknown$/i?undef:$server_country:undef; $server_numSitesHosted=defined $server_numSitesHosted?$server_numSitesHosted eq ''?'1':$server_numSitesHosted=~m/^-$|^unknown$/i?'1':$server_numSitesHosted=~m/^.[0-9]+$/?$server_numSitesHosted:'1':'1'; $webServer=defined $webServer?$webServer eq ''?'unknown':$webServer=~m/^-$|^unknown$/i?'unknown':$webServer:'unknown'; $server_country =~ s/\s/_/g if defined $server_country; my (%avDetections,%threatTypes,%classes); next if !defined $outer->{'Binary'}->{'Class'}; foreach(keys $outer->{'Binary'}->{'Class'}) { $classes{$_}=1 if $outer->{'Binary'}->{'Class'}->{$_}->{'text'} == 1; } foreach(keys $outer->{'Binary'}->{'Anti-Virus'}) { $avDetections{$_}->{'Signature_Version'}=$outer->{'Binary'}->{'Anti-Virus'}->{$_}->{'Signature_Version'} unless $outer->{'Binary'}->{'Anti-Virus'}->{$_}->{'Signature_Version'} eq ''; $avDetections{$_}->{'Engine_Version'}=$outer->{'Binary'}->{'Anti-Virus'}->{$_}->{'Engine_Version'} unless $outer->{'Binary'}->{'Anti-Virus'}->{$_}->{'Engine_Version'} eq ''; $avDetections{$_}->{'Threat_Name'}=$outer->{'Binary'}->{'Anti-Virus'}->{$_}->{'Threat_Name'} unless $outer->{'Binary'}->{'Anti-Virus'}->{$_}->{'Threat_Name'} eq ''; } foreach(keys $outer->{'Binary'}->{'Type'}) { $threatTypes{$_}=1 if $outer->{'Binary'}->{'Type'}->{$_}->{'text'} == 1; } $triples .= qq| .\n| if defined $domainName; $triples .= qq| .\n| if defined $exploitType; $triples .= qq| "$exploitDescription" .\n| if defined $exploitDescription; $triples .= qq| "$inspectedTime"^^ .\n| if defined $inspectedTime; $triples .= qq| .\n| if defined $ip; $triples .= qq| .\n| if defined $hostName; $triples .= qq| .\n| if defined $referenceUrl; $triples .= qq| .\n| if defined $fileName; $triples .= qq| .\n| if defined $fileURL; $triples .= qq| .\n| if defined $md5; $triples .= qq| "$fileSize"^^ .\n| if defined $fileSize; $triples .= qq| .\n| if defined $pestName; $triples .= qq| "$webServer" .\n| if defined $webServer; $triples .= qq| .\n| if defined $server_domainName; $triples .= qq| .\n| if defined $server_hostName; $triples .= qq| .\n| if defined $server_ip; $triples .= qq| "$server_numSitesHosted"^^ .\n| if defined $server_numSitesHosted; $triples .= qq| "$server_numBinaries"^^ .\n| if defined $server_numBinaries; $triples .= qq| "$server_ISP" .\n| if defined $server_ISP; $triples .= qq| "$server_zipCode" .\n| if defined $server_zipCode; $triples .= qq| .\n| if defined $server_city; $triples .= qq| .\n| if defined $server_region; $triples .= qq| .\n| if defined $server_country; $triples .= qq| .\n| unless (!defined $server_country || !defined $server_region) || exists $countries{$server_country}->{'regions'}->{$server_region}; $triples .= qq| .\n| unless (!defined $server_region || !defined $server_city) || exists $countries{$server_country}->{'cities'}->{$server_city}; $triples .= qq| .\n| unless (!defined $server_city || !defined $server_zipCode) || exists $countries{$server_country}->{'zipcodes'}->{$server_zipCode}; $countries{$server_country}->{'regions'}->{$server_region}=1 if defined $server_region && defined $server_country; $countries{$server_country}->{'cities'}->{$server_city}=1 if defined $server_city && defined $server_country; $countries{$server_country}->{'zipcodes'}->{$server_zipCode}=1 if defined $server_zipCode && defined $server_country; $triples .= qq| .\n| if (defined $fileName && defined $pestName) && (!exists $avFiles{$pestName} || $avFiles{$pestName} ne $pestName); $avFiles{$pestName}=$fileName if defined $fileName && defined $pestName; foreach(keys %avDetections) { my $sig=$avDetections{$_}->{'Signature_Version'}; my $eng=$avDetections{$_}->{'Engine_Version'}; my $tn=$avDetections{$_}->{'Threat_Name'}; $tn =~ s/\s/_/g if defined $tn; $triples .= qq| .\n|; $triples .= qq| "$eng" .\n| if defined $eng; $triples .= qq| "$sig" .\n| if defined $sig; $triples .= qq| .\n| if defined $tn; $triples .= qq| .\n| unless !defined $fileName || exists $avDetails{$fileName}->{'avDetection'}->{$_}; $triples .= qq| .\n| unless (!defined $tn || !defined $fileName) || exists $avDetails{$fileName}->{'avThreatName'}->{$tn}; $avDetails{$fileName}->{'avDetection'}->{$_}=1 if defined $fileName; $avDetails{$fileName}->{'avThreatName'}->{$tn}=1 if defined $tn && defined $fileName; $avFiles{$tn}=$fileName if defined $fileName && defined $tn; } foreach(keys %threatTypes) { $triples .= qq| .\n|; $triples .= qq| .\n| unless !defined $fileName || exists $avDetails{$fileName}->{'avThreatType'}->{$_}; $avDetails{$fileName}->{'avThreatType'}->{$_}=1 if defined $fileName; } foreach(keys %classes) { $triples .= qq| .\n|; $triples .= qq| .\n| unless !defined $fileName || exists $avDetails{$fileName}->{'avThreatClass'}->{$_}; $avDetails{$fileName}->{'avThreatClass'}->{$_}=1 if defined $fileName; } $similar{$domainName}='domain' if defined $domainName; $similar{$hostName}='host' if defined $hostName; $similar{$fileName}='file' if defined $fileName; $similar{$pestName}='pest_name' if defined $pestName; $similar{$server_domainName}='domain' if defined $server_domainName; $similar{$server_hostName}='host' if defined $server_hostName; $recordCount++; } } $xml_converter = undef; print "FINISHED: $inFile\n"; return($triples); }