#!/usr/bin/perl -w
package Report;
use strict;
use Compress::Zlib;
my $log_dir="/path/to/logs";
my $cache_dir="/path/to/log_cache";
my $flt_dir = 'flt';
my $map_dir = 'map';
my @servers=("server1", "server2", "server3", "server4", "server5", "s
+erver6");
my %cache_map = (
'all' => '-total.cache.gz',
'www' => '-www.cache.gz',
'channel1' => '-channel1.cache.gz',
'channel2' => '-channel2.cache.gz',
'channel3' => '-channel3.cache.gz');
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = {};
$self->{TAG} = ();
$self->{FLT} = ();
$self->{FLT_FILE} = 'standard';
$self->{DAILY} = 0;
$self->{VERBOSE} = 0;
$self->{HITS} = 0;
$self->{LINES} = 0;
my @date = localtime(time - (24 * 60 * 60));
my $yesterday = sprintf("%d-%02d-%02d", $date[5] + 1900, $date
+[4] + 1, $date[3]);
$self->{START} = $self->{END} = $yesterday;
bless ($self, $class);
return $self;
}
sub _check_tags
{
my $self = shift;
if (!defined($self->{TAG})) { push @{$self->{TAG}}, ('all') }
foreach (@{$self->{TAG}}) { if (!exists($cache_map{$_})) { die
+ "Unrecognized tag: $_\n" } }
}
sub _check_dates
{
my $self = shift;
if ($self->{START} !~ /^\d+-\d+-\d+$/ || $self->{END} !~ /^\d+
+-\d+-\d+$/ || $self->{START} gt $self->{END})
{ die "Invalid date or date range: ($self->{START}) ->
+ ($self->{END})" }
}
sub _load_filter
{
my $self = shift;
open(FILTER, "<$flt_dir/$self->{FLT_FILE}");
while(<FILTER>)
{
if (!/^#/)
{
my ($action, $flag, $pattern) = /(accept|rejec
+t)\s(=|!)\s(.*)/;
if (defined($action) && defined($flag) &&defin
+ed($pattern)) { push @{$self->{FLT}}, [ qr/$pattern/, $flag, $action
+] }
}
}
close(FILTER);
}
sub init
{
my $self = shift;
while (defined($_ = shift))
{
if ($_ eq '-t' || $_ eq '--tag') { push @{$self
+->{TAG}}, shift or die "Missing tag!\n" }
elsif ($_ eq '-s' || $_ eq '--start') { $self->{STAR
+T} = shift or die "Missing start date!\n" }
elsif ($_ eq '-e' || $_ eq '--end') { $self->{END}
+ = shift or die "Missing end date!\n" }
elsif ($_ eq '-d' || $_ eq '--date') { $self->{STAR
+T} = $self->{END} = shift or die "Missing date!\n" }
elsif ($_ eq '-f' || $_ eq '--filter') { $self->{FLT_
+FILE} = shift or die "Missing filter!" }
elsif ($_ eq '-a' || $_ eq '--daily') { $self->{DAIL
+Y} = 1 }
elsif ($_ eq '-v' || $_ eq '--verbose') { $self->{VERB
+OSE}++ }
}
_check_tags($self);
_check_dates($self);
_load_filter($self);
}
sub get_tags { my $self = shift; return @{$self->{TAG}} }
sub get_start_date { my $self = shift; return $self->{START} }
sub get_end_date { my $self = shift; return $self->{END} }
sub get_filter
{
my $self = shift;
return wantarray ? @{$self->{FILTER}} : $self->{FLT_FILE};
}
sub set_tags(@)
{
my $self = shift;
@{$self->{TAG}} = @_;
_check_tags();
}
sub set_date
{
my ($self, $type, $val) = @_;
if ($type eq 'START') { $self->{START} = $val }
elsif ($type eq 'END') { $self->{END} = $val }
else { $self->{START} = $self->{END} = $val }
_check_dates();
}
sub set_filter
{
my ($self, $filter) = @_;
$self->{FLT_FILE} = $filter;
if (exists($self->{FLT})) { delete $self->{FLT} }
_load_filter($self);
}
sub hook
{
my $self = shift;
my $hookhdl = "HOOK_" . shift;
my $hooksub = shift;
$self->{$hookhdl} = $hooksub;
}
sub load_map
{
my ($self, $map_file) = @_;
my @re_map;
open(MAPFILE, "<$map_dir/$map_file");
while(<MAPFILE>)
{
if (!/^#/)
{
my ($pattern, $val) = /(.*)\t+(.*)/;
if (defined($pattern) && defined($val)) { push
+ @re_map, [qr/$pattern/, $val] }
}
}
close(MAPFILE);
return @re_map;
}
sub commanum {
my ($self,$num) = @_;
my $i;
my $tmp_len;
my $final_num = "";
my $remaining;
$tmp_len = length($num);
if ($tmp_len <= 3) { return $num; }
$i = 3;
while ($i < $tmp_len)
{
$final_num = "," . substr($num, -$i, 3) . $final_num ;
$remaining = substr($num, 0, $tmp_len - $i);
$i += 3;
}
return $remaining . $final_num;
}
sub run
{
my $self = shift;
if ($self->{VERBOSE}) { print STDERR "Using dates: $self->{STA
+RT} -> $self->{END}\n" }
if ($self->{VERBOSE} > 1) { print STDERR "Filters:\n"; print S
+TDERR "$$_[2]\t$$_[1]\t$$_[0]\n" foreach @{$self->{FLT}} }
opendir (CACHEDIR, $cache_dir)
or die "Couldn't open directory: $!\n";
my @filelist = sort(readdir CACHEDIR);
closedir CACHEDIR;
my $tag_list = '(' . join('|', @cache_map{@{$self->{TAG}}}) .
+')';
my $file_pat = qr/$tag_list$/;
my $flt_size = scalar @{$self->{FLT}};
foreach (@filelist[2 .. $#filelist])
{
if (/$file_pat/)
{
my ($year, $month, $day, $cur_tag) = /(\d{4})-
+(\d{2})-(\d{2})-(\w+)/;
my $date_stamp = "$year-$month-$day";
if ($date_stamp ge $self->{START} && $date_sta
+mp le $self->{END})
{
if ($self->{VERBOSE}) { print STDERR "
+grabbing $cache_dir/$_\n" }
my $gz = gzopen("$cache_dir/$_", "rb")
+;
# pay attention here - we drop the fil
+ename out of $_ in favor of data lines
while ($gz->gzreadline($_) > 0)
{
my $i = 0;
my $match = 0;
my ($views, $url) = /([0-9]+)\
+t(\S+)/;
while ($i < $flt_size && !$mat
+ch)
{
if (($self->{FLT}[$i][
+1] eq '=' && $url =~ /$self->{FLT}[$i][0]/) ||
($self->{FLT}[
+$i][1] eq '!' && $url !~ /$self->{FLT}[$i][0]/))
{
if ($self->{FL
+T}[$i][2] eq 'accept')
{
&{$sel
+f->{HOOK_PROC}}($year, $month, $day, $cur_tag, $url, $views);
}
$match = 1;
}
$i++;
}
$self->{LINES}++;
if ($self->{VERBOSE} > 2 && $s
+elf->{LINES}% 10000 == 0) { print STDERR "$self->{LINES} lines proces
+sed\n"; }
}
$gz->gzclose();
}
}
}
}
1;
</readmore>
In reply to Report.pm
by vxp