#!/usr/bin/perl -w package Report; use strict; use Compress::Zlib; my $log_dir="/path/to/logs"; my $cache_dir="/path/to/log_cache"; my $flt_dir = 'flt'; my $map_dir = 'map'; my @servers=("server1", "server2", "server3", "server4", "server5", "server6"); my %cache_map = ( 'all' => '-total.cache.gz', 'www' => '-www.cache.gz', 'channel1' => '-channel1.cache.gz', 'channel2' => '-channel2.cache.gz', 'channel3' => '-channel3.cache.gz'); sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = {}; $self->{TAG} = (); $self->{FLT} = (); $self->{FLT_FILE} = 'standard'; $self->{DAILY} = 0; $self->{VERBOSE} = 0; $self->{HITS} = 0; $self->{LINES} = 0; my @date = localtime(time - (24 * 60 * 60)); my $yesterday = sprintf("%d-%02d-%02d", $date[5] + 1900, $date[4] + 1, $date[3]); $self->{START} = $self->{END} = $yesterday; bless ($self, $class); return $self; } sub _check_tags { my $self = shift; if (!defined($self->{TAG})) { push @{$self->{TAG}}, ('all') } foreach (@{$self->{TAG}}) { if (!exists($cache_map{$_})) { die "Unrecognized tag: $_\n" } } } sub _check_dates { my $self = shift; if ($self->{START} !~ /^\d+-\d+-\d+$/ || $self->{END} !~ /^\d+-\d+-\d+$/ || $self->{START} gt $self->{END}) { die "Invalid date or date range: ($self->{START}) -> ($self->{END})" } } sub _load_filter { my $self = shift; open(FILTER, "<$flt_dir/$self->{FLT_FILE}"); while() { if (!/^#/) { my ($action, $flag, $pattern) = /(accept|reject)\s(=|!)\s(.*)/; if (defined($action) && defined($flag) &&defined($pattern)) { push @{$self->{FLT}}, [ qr/$pattern/, $flag, $action ] } } } close(FILTER); } sub init { my $self = shift; while (defined($_ = shift)) { if ($_ eq '-t' || $_ eq '--tag') { push @{$self->{TAG}}, shift or die "Missing tag!\n" } elsif ($_ eq '-s' || $_ eq '--start') { $self->{START} = shift or die "Missing start date!\n" } elsif ($_ eq '-e' || $_ eq '--end') { $self->{END} = shift or die "Missing end date!\n" } elsif ($_ eq '-d' || $_ eq '--date') { $self->{START} = $self->{END} = shift or die "Missing date!\n" } elsif ($_ eq '-f' || $_ eq '--filter') { $self->{FLT_FILE} = shift or die "Missing filter!" } elsif ($_ eq '-a' || $_ eq '--daily') { $self->{DAILY} = 1 } elsif ($_ eq '-v' || $_ eq '--verbose') { $self->{VERBOSE}++ } } _check_tags($self); _check_dates($self); _load_filter($self); } sub get_tags { my $self = shift; return @{$self->{TAG}} } sub get_start_date { my $self = shift; return $self->{START} } sub get_end_date { my $self = shift; return $self->{END} } sub get_filter { my $self = shift; return wantarray ? @{$self->{FILTER}} : $self->{FLT_FILE}; } sub set_tags(@) { my $self = shift; @{$self->{TAG}} = @_; _check_tags(); } sub set_date { my ($self, $type, $val) = @_; if ($type eq 'START') { $self->{START} = $val } elsif ($type eq 'END') { $self->{END} = $val } else { $self->{START} = $self->{END} = $val } _check_dates(); } sub set_filter { my ($self, $filter) = @_; $self->{FLT_FILE} = $filter; if (exists($self->{FLT})) { delete $self->{FLT} } _load_filter($self); } sub hook { my $self = shift; my $hookhdl = "HOOK_" . shift; my $hooksub = shift; $self->{$hookhdl} = $hooksub; } sub load_map { my ($self, $map_file) = @_; my @re_map; open(MAPFILE, "<$map_dir/$map_file"); while() { if (!/^#/) { my ($pattern, $val) = /(.*)\t+(.*)/; if (defined($pattern) && defined($val)) { push @re_map, [qr/$pattern/, $val] } } } close(MAPFILE); return @re_map; } sub commanum { my ($self,$num) = @_; my $i; my $tmp_len; my $final_num = ""; my $remaining; $tmp_len = length($num); if ($tmp_len <= 3) { return $num; } $i = 3; while ($i < $tmp_len) { $final_num = "," . substr($num, -$i, 3) . $final_num ; $remaining = substr($num, 0, $tmp_len - $i); $i += 3; } return $remaining . $final_num; } sub run { my $self = shift; if ($self->{VERBOSE}) { print STDERR "Using dates: $self->{START} -> $self->{END}\n" } if ($self->{VERBOSE} > 1) { print STDERR "Filters:\n"; print STDERR "$$_[2]\t$$_[1]\t$$_[0]\n" foreach @{$self->{FLT}} } opendir (CACHEDIR, $cache_dir) or die "Couldn't open directory: $!\n"; my @filelist = sort(readdir CACHEDIR); closedir CACHEDIR; my $tag_list = '(' . join('|', @cache_map{@{$self->{TAG}}}) . ')'; my $file_pat = qr/$tag_list$/; my $flt_size = scalar @{$self->{FLT}}; foreach (@filelist[2 .. $#filelist]) { if (/$file_pat/) { my ($year, $month, $day, $cur_tag) = /(\d{4})-(\d{2})-(\d{2})-(\w+)/; my $date_stamp = "$year-$month-$day"; if ($date_stamp ge $self->{START} && $date_stamp le $self->{END}) { if ($self->{VERBOSE}) { print STDERR "grabbing $cache_dir/$_\n" } my $gz = gzopen("$cache_dir/$_", "rb"); # pay attention here - we drop the filename out of $_ in favor of data lines while ($gz->gzreadline($_) > 0) { my $i = 0; my $match = 0; my ($views, $url) = /([0-9]+)\t(\S+)/; while ($i < $flt_size && !$match) { if (($self->{FLT}[$i][1] eq '=' && $url =~ /$self->{FLT}[$i][0]/) || ($self->{FLT}[$i][1] eq '!' && $url !~ /$self->{FLT}[$i][0]/)) { if ($self->{FLT}[$i][2] eq 'accept') { &{$self->{HOOK_PROC}}($year, $month, $day, $cur_tag, $url, $views); } $match = 1; } $i++; } $self->{LINES}++; if ($self->{VERBOSE} > 2 && $self->{LINES}% 10000 == 0) { print STDERR "$self->{LINES} lines processed\n"; } } $gz->gzclose(); } } } } 1;