#!/usr/bin/perl $base = '/usr/local/stats/data'; $CHARS = '(<|>|\[|\])'; $| = 1; %states = ( 'bounced' => 'bounce', 'deferred' => 'queued', 'directory' => 'queued', 'delivered' => 'del_loc', 'dropped' => 'spam', 'internet' => 'del_rem', 'Handler' => 'err', 'forwarded' => 'forward', ); opendir(BASE, "$base") || die "Cant access $base\nReason: $!\n"; foreach $file ( grep(/mta2.*\.log$/, readdir(BASE)) ) { chomp($file); $real = "$base/$file"; $start = time; print "Processing: $real\n"; open(IN, "$real") || die "Cant read $real\nReason: $!\n"; while ( ) { next if (!/MsgTrace/); chomp(); # # Grab the msgid field, then take the line and stuff it into the id's array after cleaning the # extra chars from the ID # $id = $1 if (/msgid=([^:]+):/); $id =~ s/(^<|>$)//g; if (!$id || $id =~ /^(\s+|)$/) { $no_id++; next; } push(@{$data{$id}}, $_); } close(IN); print "Finished sorting by msgid in: " . (time - $start) . " seconds\n"; foreach $id (keys %data) { $s_id = time; @data = @{$data{$id}}; $r_count = grep(/received/, @data); if (!$r_count) { # # These are internal messages about queueing and such. We wont keep metrics on this # next; } elsif ($r_count >= 2) { # # For some reason we get multiple messages with the same ID.. Not sure how to deal with them # yet :P # $bad_windows++; next; } else { &process_it(@data); } @data = (); undef(%{$data{$id}}); } # END foreach id keys %data print "Processed: $real in: " . (time - $start) . " seconds\n"; } print "Totals for all data files.\n"; print < msg_ids EOF foreach $ip ( sort { $by_ips{$a} <=> $by_ips{$b} } keys %by_ips ) { next if ($by_ips{$ip} <= 499); if ($by_ips{$ip} <= 999) { printf("%15s : %s\n", $ip, $by_ips{$ip}); } else { printf("\n%15s : %s\n", $ip, $by_ips{$ip}); %tmp = %{$by_type{$ip}}; for (keys %tmp) { printf("%15s : %s\n", $_, $tmp{$_}); } } } sub Print_Line { my($char) = shift; if ($char) { print "$char" x 80 . "\n"; } else { print '*' x 80 . "\n"; } } sub process_it { my($rec_line,$from,$tmp,$count,$line,$type,$tmp_c,$total_c,$unknown, @in,@data,@line); @in = @_; ($rec_line) = grep(/received from internet:/, @in); ($from, $tmp) = ( split(/:/, $rec_line) )[3, -1]; $from =~ s/(fromhost=|$CHARS)//g; # # We convert the @ signs to @ signs, and get back how many times it happened in the string # in question. This way we know how many people the email went to. # $count = $tmp =~ s/@/@/g; # # Grab our data set now, Sometimes we have data sets with a single Error-Handler line, # other times we have error-handler, and an actual breakdown of what happened to the message # ie deferred, dropped, bounced, etc.. so if its only one line we leave it alone, else we # whack the error-handler lines # (@data) = grep(!/received from internet:/, @in); (@data) = grep(!/Error-Handler/, @data) if ( grep(/Error-Handler/, @data) && $data[1] ); foreach $line (@data) { @line = split(/:/, $line); if ($line[0] =~ /( |-)([a-zA-Z]+)$/) { $type = $states{$2}; warn "NO type: $line[0]\n" if ($type =~ /^(\s+|)$/); if ($type !~ /del_rem/) { $tmp_c++; } else { # # this is the same as the above, but we are counting how many people we sent to outside of # our mail system # $tmp_c += $line[$#line] =~ s/@/@/g; } } $by_type{$from}{$type} += $tmp_c; $total_c += $tmp_c; undef($tmp_c); } $by_ips{$from} += $total_c; }