127.0.0.1 - - [15/Jun/2003:13:05:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:05:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:06:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:08:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:10:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:15:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:18:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:20:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:25:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:35:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:50:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:04:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:04:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:10:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:20:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
####
my ($start , $old , %count);
while ( ) {
my ($time , $file) = (split / /)[3,6] or next;
...
$old = (0 == ($time - $start) % $period) ? $time : $old;
push @{ $count{$old}->{$time} }, 1;
}
####
#!/usr/local/bin/perl -w
use strict;
=head1 Black Box Model
Given the data...
127.0.0.1 - - [15/Jun/2003:13:05:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:05:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:06:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:08:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:10:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:15:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:18:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:20:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:25:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:35:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:50:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:04:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:04:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:10:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:20:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
...with 15 minute interval (period size of I<15>, unit of I), output
is...
Time Total pages Avg pages Min pages Max pages
-------------------------------------------------------------------
15/Jun/2003:13:05:00 6 1.2 1 2
15/Jun/2003:13:20:00 2 1.0 1 1
15/Jun/2003:13:35:00 1 1.0 1 1
15/Jun/2003:13:50:00 3 1.3 1 2
15/Jun/2003:14:20:00 1 1.0 1 1
=cut
use Time::CTime;
use Time::ParseDate;
use constant SEC_PER_MINUTE => 60;
use constant MIN_PER_HOUR => 60;
use constant SEC_PER_HOUR => SEC_PER_MINUTE * MIN_PER_HOUR;
# options
my ($period_size , $unit , $log) =
(15 , 'minute' , 'access_log_modified');
# skip unwanted files
my $filter =
sub {
my $re = qr/ [.] (?: js | css | gif ) $/x;
return ($_[0] =~ m/$re/) ? 1 : 0;
};
my $period = period_in_seconds($period_size , $unit);
show_stat( collect_count($log , $period , $filter) );
=head1 C
Given a hash reference of array references with keys as the time (in
seconds), prints the time in human parsable time, and basic statistics
for each array reference.
=cut
sub show_stat {
my %parsed = %{ +shift };
my @keys = sort { $a <=> $b } keys %parsed;
printf "%-20s %11s %10s %9s %9s\n%s\n"
, 'Time' , 'Total pages' , 'Avg pages' , 'Min pages' , 'Max pages'
, '-' x (20 + 11 + 10 + 9 + 9 + (2 * 4));
foreach my $k (@keys) {
printf "%20s %11d %9.1f %9d %9d\n"
, strftime( "%d/%b/%Y:%H:%M:%S" , localtime $k)
, @{ basic_stat( $parsed{$k} ) };
}
}
=head1 C<$hash_of_array_ref = collect_count($file_name , $period , $code_ref)>
Given a file name and time period (in seconds), returns hash reference
with time in seconds as keys and array reference containing hits for
each time value in the given period.
Optional third parameter, a code reference (that takes file name and
returns true), will be used to filter out the unwanted files if given.
=cut
sub collect_count {
my ($log , $period , $filter) = @_;
open(LOGFILE, '<' , $log) || die "Cannot read from $log: $!\n";
my ($start , $old , %count);
$filter = sub { 0; } unless $filter;
while ( ) {
my ($time , $file) = (split / /)[3,6] or next;
next if $filter->($file);
next if $time !~ m/ \[ (.+?) \] /x;
$time = parsedate($1);
$start = $time unless defined $start;
$old = (0 == ($time - $start) % $period) ? $time : $old;
push @{ $count{$old}->{$time} }, 1;
}
close(LOGFILE) || die "Could not close $log: $!\n";
return \%count;
}
=head1 C<$array_ref = basic_stat($hash_of_array_ref)>
Given a hash reference of array references, returns an array reference
composed of size, average, minimum, maximum based on the sizes of each
array reference passed.
It may return C values if passed hash is empty.
=cut
sub basic_stat {
my $collection = shift;
my @raw = map scalar @{ $_ } , values %{$collection};
my ($size , $avg , $min , $max) = (scalar @raw);
return [ $size , $avg , $min , $max ]
unless $size;
$avg =
sub { my $sum; $sum += $_ foreach @raw; return $sum; }->() / $size;
$min = $max = $raw[0];
foreach ( @raw )
{ $min = $_ if $min > $_;
$max = $_ if $max < $_;
}
return [ $size , $avg , $min , $max ];
}
=head1 C<$period = period_in_seconds($period_size , $unit)>
Given period size and time unit, basically matching...
C
...returns the period in seconds.
If period size is not I, returns 1.
=cut
sub period_in_seconds {
my ($size , $unit) = @_;
return 1 unless $size;
$size = abs($size);
my $multiplier =
$unit =~ m/^ hour | hr /ix
? SEC_PER_HOUR
: $unit =~ m/^min/i
? SEC_PER_MINUTE
: 1;
return $size * $multiplier;
}
__DATA__
127.0.0.1 - - [15/Jun/2003:13:05:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:05:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:06:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:08:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:10:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:15:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:18:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:20:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:25:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:35:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:13:50:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:04:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:04:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:10:00] -0100 "GET /xxxx HTTP/1.1" 200 34906
127.0.0.1 - - [15/Jun/2003:14:20:00] -0100 "GET /xxxx HTTP/1.1" 200 34906