#!/usr/bin/perl # # The Whizbang Directory Server Access Log Parser # # We often have to use multiple pipelines to do things like: # # - Find and print all operation entries (including the result line) that # have a certain characteristic, such as a result code or etime. # # - Find and print all connections that have certain characteristics, # such as a specific source IP or BIND DN. # # Using pipelines for this works fine functionally, but can result # in many passes through the access log. When the access log is # large, this can take a very long time. This script attempts to # perform such operations with only one pass through the log. # # The caveat is that the script must store the contents of an # operation or connection in a data structure until the script # determines whether to print the contents or not. So, in a worst # case scenario, if connection output has been requested, and the # script is unable to determine whether or not to print any of the # connections in the log until the very end, the perl process may # have the entire log stored in memory. # # Use Standard Getopts use Getopt::Std; use Time::Local; # # Minimize output buffering $| = 1; # Month abbreviations my %mon_abbr = ( Jan => '0', Feb => '1', Mar => '2', Apr => '3', May => '4', Jun => '5', Jul => '6', Aug => '7', Sep => '8', Oct => '9', Nov => '10', Dec => '11' ); my @abbr_mon = qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ); # Usage $usage = "$0 [-O ] [-S [DD/Mmm/yyyy:]hh:mm:ss] [-E [DD/Mmm/yyyy:]hh:mm:ss] [-vidh] expression [file] -O Output type - one of connection, operation, sort, stat, or echo - defaults to operation -v negative match (unused for sort, stat or echo) -i case-insensitive match (unused for sort, stat or echo) -I interval for stat reporting, default is 60 -S Start datetime for log processing -E End datetime for log processing -d debug script logging -h display this help expression - Expression to match (unused for sort, stat and echo) file - Access log to parse - if missing, we assume STDIN Notes: -O sort produces a sorted, delimited output of connections, only after EOF on the input access log. This means that all log input is stored in memory at the time printing starts. It also means that there is no rolling output such as that produced by -O connection or -O operation. So whereas tail -f piped to -O connection or -O operation produces output, tail -f access piped to -O sort will never produce output. -E and -S datetime arguments can be abbreviated by omitting the date portion. In this case, the first line of the log will be used to generate the datetime constraints. This means if you have a log that starts as 23:50 and you want to grab a time range from the following morning, you will need to specify the datetime in full on the command line. -O stat produces the following statistics: date_end - date at the end of the statistics gathering interval time_end - timestamp at the end of the statistics gathering interval conn_opn - number of connections opened during the statistics gathering interval conn_cls - number of connections closed during the statistics gathering interval tot_ops - number of operations initiated during the statistics gathering interval tot_res - number of results sent during the statistics gathering interval tot_bind - number of BIND operations initiated during the statistics gathering interval tot_srch - number of SRCH operations initiated during the statistics gathering interval tot_add - number of ADD operations initiated during the statistics gathering interval tot_mod - number of MOD operations initiated during the statistics gathering interval tot_del - number of DEL operations initiated during the statistics gathering interval etime_av - average etime of results sent during the statistics gathering interval etime_mx - maximum etime of results sent during the statistics gathering interval un_idx_s - number of unindexed search results completed during the statistics gathering interval nentries - number of entries sent during the statistics gathering interval Examples: 1) Print all operations from the large log file \"access.foo\" that have the expression \" BIND \" in them and also have the expression \" err=32 \" in them. Typically, this means all BIND operations that failed with err=32. cat access.foo \| $0 \" BIND \" \| $0 \" err=32 \" Note that while we are making a second pass through the output of the first iteration, an equivalent pipeline of grep, awk, and/or xargs would require many passes through the original, large log file. 2) Find all operations from the log file \"access.foo\" that have the expression \" ADD \", \" MOD \", or \" DEL \" in them, with an LDAP result code other than 0 or 50. $0 \" ADD | MOD | DEL \" access.foo \| $0 -v \" err=0 \| err=50 \" 3) Find all operations from the log file \"access.foo\" that have the expression \" etime=\" in them, where is not equal to zero. Typically, this returns all operations with etime of 1 second or greater. $0 \" etime=[^0]\" access.foo Note that this form can be significantly more efficient in how much memory it uses compared to the -v form. 4) Watch a live access log for wrong password logins. tail -f access \| $0 \" err=49 \" 5) Grab log output between two datetimes and echo it $0 -O echo -S 15/Nov/2008:19:00:00 -E 15/Nov/2008:19:59:59 access.foo 6) Examine workload statistics in one-hour increments $0 -O stat -I 3600 access.foo "; # # Process Options getopts('O:I:S:E:vidh'); # # Print help if ($opt_h) { print $usage; exit(0); } # Determine output type if ($opt_O) { if ($opt_O =~ /^connection$|^conn$|^c$/i) { $output_format = 'connection'; } elsif ($opt_O =~ /^operation$|^op$|^o$/i) { $output_format = 'operation'; } elsif ($opt_O =~ /^sort$|^so$/i) { $output_format = 'sort'; } elsif ($opt_O =~ /^statistics$|^statistic$|^stats$|^stat$|^st$/i) { $output_format = 'stat'; $stat_interval = defined($opt_I) ? $opt_I : '60'; } elsif ($opt_O =~ /^e|^echo$/i) { $output_format = 'echo'; } else { print "Unknown output type: $opt_O\n$usage"; exit(2); } } else { $output_format = 'operation'; } verbose("Output format is $output_format\n"); # # Parse date range arguments if ($opt_S =~ /^(\d+)\/([A-Za-z]+)\/(\d+)\:(\d+):(\d+):(\d+)$/) { $lower_dy = $1; $lower_mo = $2; $lower_yr = $3; $lower_hour = $4; $lower_min = $5; $lower_sec = $6; } elsif ($opt_S =~ /^(\d+):(\d+):(\d+)$/) { $lower_hour = $1; $lower_min = $2; $lower_sec = $3; } else { if ($opt_S) { print "Invalid date format.\n$usage"; exit(2); } } if ($opt_E =~ /^(\d+)\/([A-Za-z]+)\/(\d+)\:(\d+):(\d+):(\d+)$/) { $upper_dy = $1; $upper_mo = $2; $upper_yr = $3; $upper_hour = $4; $upper_min = $5; $upper_sec = $6; } elsif ($opt_E =~ /^(\d+):(\d+):(\d+)$/) { $upper_hour = $1; $upper_min = $2; $upper_sec = $3; } else { if ($opt_E) { print "Invalid date format.\n$usage"; exit(2); } } # Date range calculations my $lower_epoch; my $upper_epoch; # If we have start and/or end dates specified including YYYY MM DD, calculate lower and/or upper epoch times if ($opt_S) { if ($lower_yr) { $lower_epoch = timelocal($lower_sec,$lower_min,$lower_hour,$lower_dy,$mon_abbr{$lower_mo},$lower_yr - '1900'); } } if ($opt_E) { if ($upper_yr) { $upper_epoch = timelocal($upper_sec,$upper_min,$upper_hour,$upper_dy,$mon_abbr{$upper_mo},$upper_yr - '1900'); } } # Process arguments if ($output_format =~ /^connection$|^operation$/) { # Process argv0 as expression unless ($ARGV[0]) { die "$output_format output requires a filter expression\n$usage"; } $filt_expr = $ARGV[0]; if ($ARGV[1]) { $in_file = $ARGV[1]; verbose("Processing file via argv1: $in_file\n"); open(ACCESS_LOG,"$in_file") or die "Failed to open $in_file\: $!"; } else { verbose("Processing file via STDIN\n"); open(ACCESS_LOG,"-") or die "Failed to get STDIN\: $!"; } } elsif ($output_format =~ /^sort$|^echo$|^stat$/) { # Process argv0 as file if ($ARGV[0]) { $in_file = $ARGV[0]; verbose("Processing file via argv0: $in_file\n"); open(ACCESS_LOG,"$in_file") or die "Failed to open $in_file\: $!"; } else { verbose("Processing file via STDIN\n"); open(ACCESS_LOG,"-") or die "Failed to get STDIN\: $!"; } } # Do operation output type processing if ($output_format eq 'operation') { my @skipping; my @printing; my %pending; while () { my $log_line = $_; verbose("Processing line: $log_line"); # Use a regular expression to parse the log line unless ($log_line =~ /^\[(\d+)\/([A-Za-z]+)\/(\d+)\:(\d+):(\d+):(\d+) ([+|-]\d+)\] conn\=([-]*\d+) op\=([-]*\d+) msgId\=([-]*\d+) \- (.+)$/) { # We didn't match the regex verbose("Line failed to parse:\n$log_line"); next; } # We captured date, time, connection, and operation data my $day = $1; my $month = $2; my $year = $3; my $hour = $4; my $minute = $5; my $second = $6; my $tz_offset = $7; my $conn_number = $8; my $op_number = $9; my $msgid_number = $10; my $log_payload = $11; # # Date-time constraints # If we don't have a date yet, take it from the log if ($opt_S) { unless ($lower_epoch) { verbose("Lower bound not present. Taking from log.\n"); $lower_epoch = timelocal($lower_sec,$lower_min,$lower_hour,$day,$mon_abbr{$month},$year - '1900'); } } if ($opt_E) { unless ($upper_epoch) { verbose("Upper bound not present. Taking from log.\n"); $upper_epoch = timelocal($upper_sec,$upper_min,$upper_hour,$day,$mon_abbr{$month},$year - '1900'); } } # Calculate the current log time in epoch $current_epoch = timelocal($second,$minute,$hour,$day,$mon_abbr{$month},$year - '1900'); # Is the current line in the correct range if ((($opt_S) && ($current_epoch < $lower_epoch)) || (($opt_E) && ($current_epoch > $upper_epoch))) { next; } # # Here we decide what to do with the operation. We have three data structures, # @skipping, @printing and %pending. Elements of @skipping, @printing and keys # to %pending are $conn_number:$op_number:$msgid_number. # # In the case of a standard match, when a line is associated # with an operation in @printing, we simply print it. When a line that is not # already associated with an operation being printed matches our printing # criteria, we add the operation to @printing and check if it's in %pending. # If the newly matched operation was in %pending, we print out its previous lines # and remove it from %pending. # # In the case of a negative match, when a line is associated with an operation # in @skipping, we do not print it and go to the next line. If a line fails # to match negatively, its operation key goes into @skipping, because unless # every line matches negatively, we will not print the operation. If the line # has an entry in %pending, that entry is removed. If a line matches negatively # and its operation is not in @skipping, it is either added to %pending as a new # or appended to its operation's entry if it already exists. Once the final line # of the operation is parsed, if all lines matched negatively, the operation is # printed and purged from %pending. If we reach the end of the log or a server # restart, pending operations are printed. # # Compute the operation key my $operation_key; if (($op_number eq '-1') || ($msgid_number eq '-1')) { # Treat all internal operations as a single operation $operation_key = $conn_number . ':' . 'INTERNAL'; } else { $operation_key = $conn_number . ':' . $op_number . ':' . $msgid_number; } # # If this is a server restart, blow away old operation data if (($operation_key eq '0:INTERNAL') && ($log_payload =~ /connection from/)) { verbose("Server restart detected. Initializing operation state.\n"); # If we are doing negative matching, print any pending operations if ($opt_v) { foreach $pending_key (keys(%pending)) { print $pending{$pending_key}; } } undef @skipping; undef @printing; undef %pending; } # # Set up a flag to say if this line is a result undef my $this_is_a_result; # Check if the line is a result if (($log_payload =~ /^RESULT |^closed\.$|^UNBIND$|^ABANDON /)) { $this_is_a_result = 'true'; verbose("This line is a result.\n"); } # Set up a flag to say if this operation is already on the skip list undef my $skip_this; # Set up a flag to say if this operation is already on the print list undef my $this_is_printing; # Set up a flag to say if this operation is already on the pending list undef my $this_is_pending; # # Check if the operation is already in @skipping if (@skipping) { for $i (0..$#skipping) { verbose("skip list slot $i - $skipping[$i]\n"); if ($skipping[$i] eq $operation_key) { $skip_this = 'true'; verbose("This operation is already marked for skipping.\n"); # If this is a result, remove the operation from @skipping if ($this_is_a_result) { splice(@skipping,$i,1); verbose("The operation has been removed from the skipping list.\n"); } last; } } } # # Check if the operation is already in @printing if (@printing) { for $i (0..$#printing) { verbose("print list slot $i - $printing[$i]\n"); if ($printing[$i] eq $operation_key) { $this_is_printing = 'true'; verbose("This operation is already marked for printing.\n"); # If this is a result, remove the operation from @printing if ($this_is_a_result) { splice(@printing,$i,1); verbose("The operation has been removed from the printing list.\n"); } last; } } } # # If the operation is already on the print list, print the line and stop processing it if ($this_is_printing) { print $log_line; next; } # If the operation is already on the skip list, skip the line and stop processing it if ($skip_this) { next; } # # Check if there is a pending entry for this operation if (%pending) { if (exists($pending{$operation_key})) { $this_is_pending = 'true'; } } # # Check if the operation matches our criteria. At this point, all this means is a # pattern match against the current log line. # Set up a flag to say whether the line matched undef my $this_is_a_match; if ($opt_v) { # Process as a negative match if ($opt_i) { # Process case-insensitive if ($log_line !~ /$filt_expr/i) { verbose("Line matched.\n"); $this_is_a_match = 'true'; } } else { # Process case-sensitive if ($log_line !~ /$filt_expr/) { verbose("Line matched.\n"); $this_is_a_match = 'true'; } } } else { # Process as a positive match if ($opt_i) { # Process case-insensitive if ($log_line =~ /$filt_expr/i) { verbose("Line matched.\n"); $this_is_a_match = 'true'; } } else { # Process case-sensitive if ($log_line =~ /$filt_expr/) { verbose("Line matched.\n"); $this_is_a_match = 'true'; } } } if ($this_is_a_match) { # This line matches our criteria # # The -v behavior is different. We expect the operation to be returned # if every line in the operation matches the expression negatively, i.e., # only if the expression is missing from every line in the operation. if ($opt_v) { # Negative matching behavior - this line matched the expression # negatively. # In this case, we never put anything on @printing, because # we never print before we have collected all lines associated # with the operation. Once we have all lines we can print. # # If this is a result, check for a pending entry and print the operation if (($this_is_a_result) && ($this_is_pending)) { print $pending{$operation_key}; print $log_line; delete $pending{$operation_key}; } # If this is a result with no pending entry, just print the line. elsif ($this_is_a_result) { print $log_line; } # If this is a not a result, add it to %pending or append to its # existing key. else { if ($this_is_pending) { $pending{$operation_key} .= $log_line; } else { $pending{$operation_key} = $log_line; } } } else { # Standard matching behavior # Push the operation key onto @printing unless this is a result unless ($this_is_a_result) { verbose("Pushing $operation_key onto the printing list.\n"); push(@printing,$operation_key); } # # If there is a pending entry for this operation print it and remove it if ($this_is_pending) { print $pending{$operation_key}; delete $pending{$operation_key}; } # # Print this line print $log_line; } } else { # This line does not match our criteria # if ($opt_v) { # Negative non-matching behavior # Push the operation key onto @skipping unless this is a result unless ($this_is_a_result) { verbose("Pushing $operation_key onto the skipping list.\n"); push(@skipping,$operation_key); } # If this a pending operation, remove from the pending hash if ($this_is_pending) { delete $pending{$operation_key}; } } else { # Positive non-matching behavior # If this is a result on a pending operation, remove from the pending hash if (($this_is_a_result) && ($this_is_pending)) { delete $pending{$operation_key}; } # If this is not a result, but is pending, append to the pending scalar elsif ($this_is_pending) { $pending{$operation_key} .= $log_line; } # If this is not a result, and is not pending, create the pending entry else { $pending{$operation_key} = $log_line; } } } } # If we are doing negative matching, print any pending operations if ($opt_v) { foreach $pending_key (keys(%pending)) { print $pending{$pending_key}; } } } # Do connection output type processing if ($output_format eq 'connection') { my @skipping; my @printing; my %pending; while () { my $log_line = $_; verbose("Processing line: $log_line"); # Use a regular expression to parse the log line unless ($log_line =~ /^\[(\d+)\/([A-Za-z]+)\/(\d+)\:(\d+):(\d+):(\d+) ([+|-]\d+)\] conn\=([-]*\d+) op\=([-]*\d+) msgId\=([-]*\d+) \- (.+)$/) { # We didn't match the regex verbose("Line failed to parse:\n$log_line"); next; } # We captured date, time, connection, and operation data my $day = $1; my $month = $2; my $year = $3; my $hour = $4; my $minute = $5; my $second = $6; my $tz_offset = $7; my $conn_number = $8; my $op_number = $9; my $msgid_number = $10; my $log_payload = $11; # # Date-time constraints # If we don't have a date yet, take it from the log if ($opt_S) { unless ($lower_epoch) { verbose("Lower bound not present. Taking from log.\n"); $lower_epoch = timelocal($lower_sec,$lower_min,$lower_hour,$day,$mon_abbr{$month},$year - '1900'); } } if ($opt_E) { unless ($upper_epoch) { verbose("Upper bound not present. Taking from log.\n"); $upper_epoch = timelocal($upper_sec,$upper_min,$upper_hour,$day,$mon_abbr{$month},$year - '1900'); } } # Calculate the current log time in epoch $current_epoch = timelocal($second,$minute,$hour,$day,$mon_abbr{$month},$year - '1900'); # Is the current line in the correct range if ((($opt_S) && ($current_epoch < $lower_epoch)) || (($opt_E) && ($current_epoch > $upper_epoch))) { next; } # # Here we decide what to do with the connection. We have three data structures, # @skipping, @printing and %pending. Elements of @skipping, @printing and keys # to %pending are $conn_number. # # In the case of a standard match, when a line is associated # with a connection in @printing, we simply print it. When a line that is not # already associated with a connection being printed matches our printing # criteria, we add the connection to @printing and check if it's in %pending. # If the newly matched connection was in %pending, we print out its previous lines # and remove it from %pending. # # In the case of a negative match, when a line is associated with a connection # in @skipping, we do not print it and go to the next line. If a line fails # to match negatively, its connection key goes into @skipping, because unless # every line matches negatively, we will not print the connection. If the line # has an entry in %pending, that entry is removed. If a line matches negatively # and its connection is not in @skipping, it is either added to %pending as new # or appended to its connection's entry if it already exists. Once the final line # of the connection is parsed, if all lines matched negatively, the connection is # printed and purged from %pending. If we reach the end of the log or a server # restart, pending connections are printed. # # Compute the connection key my $connection_key; $connection_key = $conn_number; # # If this is a server restart, blow away old operation data if (($connection_key eq '0') && ($log_payload =~ /connection from/)) { verbose("Server restart detected. Initializing connection state.\n"); # If we are doing negative matching, print any pending connections if ($opt_v) { foreach $pending_key (keys(%pending)) { print $pending{$pending_key}; } } undef @skipping; undef @printing; undef %pending; } # # Set up a flag to say if this line is a close undef my $this_is_a_close; # Check if the line is a close if (($op_number eq '-1') && ($msgid_number eq '-1') && ($log_payload eq 'closed.')) { $this_is_a_close = 'true'; verbose("This line is a connection close.\n"); } # Set up a flag to say if this connection is already on the skip list undef my $skip_this; # Set up a flag to say if this connection is already on the print list undef my $this_is_printing; # Set up a flag to say if this connection is already on the pending list undef my $this_is_pending; # # Check if the connection is already in @skipping if (@skipping) { for $i (0..$#skipping) { verbose("print list slot $i - $skipping[$i]\n"); if ($skipping[$i] eq $connection_key) { $skip_this = 'true'; verbose("This connection is already marked for skipping.\n"); last; } } } # # Check if the connection is already in @printing if (@printing) { for $i (0..$#printing) { verbose("print list slot $i - $printing[$i]\n"); if ($printing[$i] eq $connection_key) { $this_is_printing = 'true'; verbose("This connection is already marked for printing.\n"); # If this is a close, remove the connection from @printing if ($this_is_a_close) { splice(@printing,$i,1); verbose("The connection has been removed from the printing list.\n"); } last; } } } # # If the connection is already on the print list, print the line and stop processing it if ($this_is_printing) { print $log_line; next; } # If the connection is already on the skip list, skip the line and stop processing it if ($skip_this) { next; } # # Check if there is a pending entry for this connection if (%pending) { if (exists($pending{$connection_key})) { $this_is_pending = 'true'; } } # # Check if the connection matches our criteria. At this point, all this means is a # pattern match against the current log line. # Set up a flag to say whether the line matched undef my $this_is_a_match; if ($opt_v) { # Process as a negative match if ($opt_i) { # Process case-insensitive if ($log_line !~ /$filt_expr/i) { verbose("Line matched.\n"); $this_is_a_match = 'true'; } } else { # Process case-sensitive if ($log_line !~ /$filt_expr/) { verbose("Line matched.\n"); $this_is_a_match = 'true'; } } } else { # Process as a positive match if ($opt_i) { # Process case-insensitive if ($log_line =~ /$filt_expr/i) { verbose("Line matched.\n"); $this_is_a_match = 'true'; } } else { # Process case-sensitive if ($log_line =~ /$filt_expr/) { verbose("Line matched.\n"); $this_is_a_match = 'true'; } } } if ($this_is_a_match) { # This line matches our criteria # # The -v behavior is different. We expect the connection to be returned # if every line in the connection matches the expression negatively, i.e., # only if the expression is missing from every line in the connection. if ($opt_v) { # Negative matching behavior - this line matched the expression # negatively. # In this case, we never put anything on @printing, because # we never print before we have collected all lines associated # with the connection. Once we have all lines we can print. # # If this is a close, check for a pending entry and print the connection if (($this_is_a_close) && ($this_is_pending)) { print $pending{$connection_key}; print $log_line; delete $pending{$connection_key}; } # If this is a close with no pending entry, just print the line. elsif ($this_is_a_close) { print $log_line; } # If this is a not a close, add it to %pending or append to its # existing key. else { if ($this_is_pending) { $pending{$connection_key} .= $log_line; } else { $pending{$connection_key} = $log_line; } } } else { # Standard matching behavior # Push the connection key onto @printing unless this is a close unless ($this_is_a_close) { verbose("Pushing $connection_key onto the printing list.\n"); push(@printing,$connection_key); } # # If there is a pending entry for this connection print it and remove it if ($this_is_pending) { print $pending{$connection_key}; delete $pending{$connection_key}; } # # Print this line print $log_line; } } else { # This line does not match our criteria # if ($opt_v) { # Negative non-matching behavior # Push the connection key onto @skipping unless this is a close unless ($this_is_a_close) { verbose("Pushing $connection_key onto the skipping list.\n"); push(@skipping,$connection_key); } # If this a pending connection, remove from the pending hash if ($this_is_pending) { delete $pending{$connection_key}; } } else { # Positive non-matching behavior # If this is a close on a pending connection, remove from the pending hash if (($this_is_a_close) && ($this_is_pending)) { delete $pending{$connection_key}; } # If this is not a close, but is pending, append to the pending scalar elsif ($this_is_pending) { $pending{$connection_key} .= $log_line; } # If this is not a close, and is not pending, create the pending entry else { $pending{$connection_key} = $log_line; } } } } # If we are doing negative matching, print any pending connections if ($opt_v) { foreach $pending_key (keys(%pending)) { print $pending{$pending_key}; } } } # Do sort output type processing if ($output_format eq 'sort') { my %connections; while () { my $log_line = $_; verbose("Processing line: $log_line"); # Use a regular expression to parse the log line unless ($log_line =~ /^\[(\d+)\/([A-Za-z]+)\/(\d+)\:(\d+):(\d+):(\d+) ([+|-]\d+)\] conn\=([-]*\d+) op\=([-]*\d+) msgId\=([-]*\d+) \- (.+)$/) { # We didn't match the regex verbose("Line failed to parse:\n$log_line"); next; } # We captured date, time, connection, and operation data my $day = $1; my $month = $2; my $year = $3; my $hour = $4; my $minute = $5; my $second = $6; my $tz_offset = $7; my $conn_number = $8; my $op_number = $9; my $msgid_number = $10; my $log_payload = $11; # # Date-time constraints # If we don't have a date yet, take it from the log if ($opt_S) { unless ($lower_epoch) { verbose("Lower bound not present. Taking from log.\n"); $lower_epoch = timelocal($lower_sec,$lower_min,$lower_hour,$day,$mon_abbr{$month},$year - '1900'); } } if ($opt_E) { unless ($upper_epoch) { verbose("Upper bound not present. Taking from log.\n"); $upper_epoch = timelocal($upper_sec,$upper_min,$upper_hour,$day,$mon_abbr{$month},$year - '1900'); } } # Calculate the current log time in epoch $current_epoch = timelocal($second,$minute,$hour,$day,$mon_abbr{$month},$year - '1900'); # Is the current line in the correct range if ((($opt_S) && ($current_epoch < $lower_epoch)) || (($opt_E) && ($current_epoch > $upper_epoch))) { next; } # # Here we decide what to do with the log entry. We have a data structure, # %connections, which is a hash. $conn_number is the key to %connections. # # Compute the connection key $connection_key = $conn_number; # # Set up a flag to say if this line is a connection open undef my $this_is_an_open; # # Set up a flag to say if this line is a result undef my $this_is_a_result; # # Set up a flag to say if this connection is closing undef my $this_is_closing; # # Set up a flag to say if this line is a close undef my $this_is_a_close; # # Check if the line is a result if (($log_payload =~ /connection from/) && ($op_number eq '-1') && ($msgid_number eq '-1')) { $this_is_an_open = 'true'; verbose("This line is a connection open.\n"); $operation_key = 'OPEN'; } # Check if the connection is closing elsif (($msgid_number eq '-1') && ($log_payload =~ /closing/)) { $this_is_closing = 'true'; verbose("This connection is closing.\n"); $operation_key = 'CLOSING'; } # Check if the line is a close elsif (($op_number eq '-1') && ($msgid_number eq '-1') && ($log_payload eq 'closed.')) { $this_is_a_close = 'true'; verbose("This line is a connection close.\n"); $operation_key = 'CLOSED'; } # Check if the line is an internal operation elsif (($op_number eq '-1') || ($msgid_number eq '-1')) { verbose("This line is an internal operation.\n"); $operation_key = 'INTERNAL'; } # Normal operation key else { $operation_key = $op_number . ':' . $msgid_number; } # Check if the line is a result if (($log_payload =~ /^RESULT /)) { $this_is_a_result = 'true'; verbose("This line is a result.\n"); } # Check for server restart if (($this_is_an_open) && ((exists($connections{$connection_key})) || ($connection_key eq '0'))) { # Server restart detected verbose("Server restart detected.\n"); my @conn_nums = sort { $a <=> $b } (keys %connections); foreach $conn_num (@conn_nums) { print "********************************************************************************\n"; print $connections{$conn_num}; delete $connections{$conn_num}; } undef %connections; } # Aggregate by connection $connections{$connection_key} .= $log_line; verbose("Adding the line to connection $connection_key\n"); } my @conn_nums = sort { $a <=> $b } (keys %connections); foreach $conn_num (@conn_nums) { print "******************************************************************************************\n"; print $connections{$conn_num}; } } # Do stat output type processing # ## Date Computations ## Convert from standard date to epoch ## $epoch_date= timelocal($l_s,$l_mi,$l_h,$l_d,$mon_abbr{$l_mo},$l_y - '1900'); ## Convert from epoch to standard date ## ($p_s,$p_mi,$p_h,$p_d,$p_mo,$p_y) = localtime($epoch_date); ## $p_mo = $abbr_mon[$p_mo]; ## $p_y += 1900; # if ($output_format eq 'stat') { # Set up stat interval $STAT_INTERVAL = defined($opt_I) ? $opt_I : '60'; # Set up variables my $start_epoch; my $end_epoch; my $current_epoch; my $open_count = 0; my $close_count = 0; my $op_count = 0; my $result_count = 0; my $bind_count =0; my $srch_count = 0; my $add_count = 0; my $mod_count = 0; my $del_count = 0; my $etime_max = 0; my $etime_tot = 0; my $etime_avg; my $unindexed_count = 0; my $entries_returned = 0; my @rpt_header = qw(lg_date_end time_end conn_opn conn_cls tot_ops tot_res tot_bind tot_srch tot_add tot_mod tot_del etime_av etime_mx un_idx_s nentries); printf("%-12s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",@rpt_header); while () { my $log_line = $_; verbose("Processing line: $log_line"); # Use a regular expression to parse the log line unless ($log_line =~ /^\[(\d+)\/([A-Za-z]+)\/(\d+)\:(\d+):(\d+):(\d+) ([+|-]\d+)\] conn\=([-]*\d+) op\=([-]*\d+) msgId\=([-]*\d+) \- (.+)$/) { # We didn't match the regex verbose("Line failed to parse:\n$log_line"); next; } # We captured date, time, connection, and operation data my $l_d = $1; my $l_mo = $2; my $l_y = $3; my $l_h = $4; my $l_mi = $5; my $l_s = $6; my $tz_offset = $7; my $conn_number = $8; my $op_number = $9; my $msgid_number = $10; my $log_payload = $11; # # Here we decide what to do with the log entry. # # Calculate the current log time in epoch $current_epoch = timelocal($l_s,$l_mi,$l_h,$l_d,$mon_abbr{$l_mo},$l_y - '1900'); # Date-time constraints # If we don't have a date yet, take it from the log if ($opt_S) { unless ($lower_epoch) { verbose("Lower bound not present. Taking from log.\n"); $lower_epoch = timelocal($lower_sec,$lower_min,$lower_hour,$l_d,$mon_abbr{$l_mo},$l_y - '1900'); } } if ($opt_E) { unless ($upper_epoch) { verbose("Upper bound not present. Taking from log.\n"); $upper_epoch = timelocal($upper_sec,$upper_min,$upper_hour,$l_d,$mon_abbr{$l_mo},$l_y - '1900'); } } # Is the current line in the correct range if ((($opt_S) && ($current_epoch < $lower_epoch)) || (($opt_E) && ($current_epoch > $upper_epoch))) { next; } # If this the first line of the log get the start time in epoch unless ($start_epoch) { $start_epoch = $current_epoch; verbose("Starting stats interval at $start_epoch seconds from epoch\n"); $end_epoch = $start_epoch + $STAT_INTERVAL - '1'; ($end_s,$end_mi,$end_h,$end_d,$end_mo,$end_y) = localtime($end_epoch); verbose("Current stats interval will end at the end of the second $end_epoch seconds from epoch - $end_d\/$end_mo\/$end_y\:$end_h\:$end_mi\:$end_s\n"); } # If the date is after the ending timestamp print stats and reset time markers and stats counters if ($current_epoch > $end_epoch) { if ($result_count > 0) { $etime_avg = $etime_tot / $result_count; } else { $etime_avg = '0'; } printf("%02d\/%3s\/%4d %02d\:%02d\:%02d %8d %8d %8d %8d %8s %8s %8s %8s %8s %8.3f %8.1f %8d %8s\n", $end_d,$abbr_mon[$end_mo],$end_y + '1900',$end_h,$end_mi,$end_s,$open_count,$close_count,$op_count,$result_count,$bind_count,$srch_count,$add_count,$mod_count,$del_count,$etime_avg,$etime_max,$unindexed_count,$entries_returned); $start_epoch = $end_epoch + '1'; verbose("Starting stats interval at $start_epoch seconds from epoch\n"); $end_epoch = $start_epoch + $STAT_INTERVAL - '1'; ($end_s,$end_mi,$end_h,$end_d,$end_mo,$end_y) = localtime($end_epoch); verbose("Current stats interval will end at the end of the second $end_epoch seconds from epoch - $end_d\/$end_mo\/$end_y\:$end_h\:$end_mi\:$end_s\n"); $open_count = 0; $close_count = 0; $op_count = 0; $result_count = 0; $bind_count = 0; $srch_count = 0; $add_count = 0; $mod_count = 0; $del_count = 0; $etime_max = 0; $etime_avg = 0; $etime_tot = 0; $unindexed_count = 0; $entries_returned = 0; } # Parse the log payload and increment counters if (($log_payload =~ /^RESULT (.+) nentries=(\d+) etime=(.+?) (.+)$/) || ($log_payload =~ /^RESULT (.+) nentries=(\d+) etime=(.+?)$/)) { $result_count++; $entries_returned += $2; $etime_tot += $3; if ($3 > $etime_max) { $etime_max = $3; } if ($4 =~ /notes\=U/) { $unindexed_count++; } } elsif ($log_payload =~ /^closed\.$/) { $close_count++; } elsif ($log_payload =~ /connection from /) { $open_count++; } elsif ($log_payload =~ /^ADD /) { $add_count++; $op_count++; } elsif ($log_payload =~ /^MOD /) { $mod_count++; $op_count++; } elsif ($log_payload =~ /^DEL /) { $del_count++; $op_count++; } elsif ($log_payload =~ /^SRCH /) { $srch_count++; $op_count++; } elsif ($log_payload =~ /^BIND /) { $bind_count++; $op_count++; } elsif ($log_payload =~ /^EXT /) { $op_count++; } else { unless ($log_payload =~ /^UNBIND$|^ABANDON |closing/) { verbose("Unknown log entry: $log_payload"); } } } # Print last set of stats if ($result_count > 0) { $etime_avg = $etime_tot / $result_count; } else { $etime_avg = '0'; } printf("%02d\/%3s\/%4d %02d\:%02d\:%02d %8d %8d %8d %8d %8s %8s %8s %8s %8s %8.3f %8.1f %8s %8s\n", $end_d,$abbr_mon[$end_mo],$end_y + '1900',$end_h,$end_mi,$end_s,$open_count,$close_count,$op_count,$result_count,$bind_count,$srch_count,$add_count,$mod_count,$del_count,$etime_avg,$etime_max,$unindexed_count,$entries_returned); } # Do echo output type processing if ($output_format eq 'echo') { while () { my $log_line = $_; verbose("Processing line: $log_line"); # Use a regular expression to parse the log line unless ($log_line =~ /^\[(\d+)\/([A-Za-z]+)\/(\d+)\:(\d+):(\d+):(\d+) ([+|-]\d+)\] conn\=([-]*\d+) op\=([-]*\d+) msgId\=([-]*\d+) \- (.+)$/) { # We didn't match the regex verbose("Line failed to parse:\n$log_line"); next; } # We captured date, time, connection, and operation data my $l_d = $1; my $l_mo = $2; my $l_y = $3; my $l_h = $4; my $l_mi = $5; my $l_s = $6; my $tz_offset = $7; my $conn_number = $8; my $op_number = $9; my $msgid_number = $10; my $log_payload = $11; # Date-time constraints # If we don't have a date yet, take it from the log if ($opt_S) { unless ($lower_epoch) { verbose("Lower bound not present. Taking from log.\n"); $lower_epoch = timelocal($lower_sec,$lower_min,$lower_hour,$l_d,$mon_abbr{$l_mo},$l_y - '1900'); } } if ($opt_E) { unless ($upper_epoch) { verbose("Upper bound not present. Taking from log.\n"); $upper_epoch = timelocal($upper_sec,$upper_min,$upper_hour,$l_d,$mon_abbr{$l_mo},$l_y - '1900'); } } # Calculate the current log time in epoch $current_epoch = timelocal($l_s,$l_mi,$l_h,$l_d,$mon_abbr{$l_mo},$l_y - '1900'); # Is the current line in the correct range if ((($opt_S) && ($current_epoch < $lower_epoch)) || (($opt_E) && ($current_epoch > $upper_epoch))) { next; } # Here we decide what to do with the log entry. print $log_line; } } # Close Access Log if ($in_file) { verbose("Closing file $in_file\n"); close(ACCESS_LOG) or die "Failed to close $in_file\: $!"; } # A subroutine for verbose logging sub verbose { my $verbose_msg = $_[0]; if ($opt_d) { print STDERR $verbose_msg; } }