blah blah blah GET /some/path/to/file.htm blah blah
####
[root@devel3 root]# cat ./simple_log_parse.pl
#!/usr/bin/perl -w
use strict;
my $LOG_PATH = '/var/log/httpd/access_log';
my @FIND = qw(
/modperl/
/cgi-bin/
/images/
);
my $re = join '|', map{quotemeta}@FIND;
$re = qr/$re/;
my @logs = glob("$LOG_PATH*");
my %hash;
my $total = 0;
for my $log(@logs) {
print "Processing $log\n";
open LOG, $log or die "Can't open $log $!\n";
while () {
$total++;
next unless m/(?:GET|POST) ($re)/;
$hash{$1}++;
}
close LOG;
}
print "\n\nResults\n";
for ( keys %hash ) {
printf "%-20s %8d/%-8d (%.2f%%)\n", $_, $hash{$_}, $total, (100*$hash{$_}/$total);
}
[root@devel3 root]# ./simple_log_parse.pl
Processing /var/log/httpd/access_log
Processing /var/log/httpd/access_log.1
Processing /var/log/httpd/access_log.2
Processing /var/log/httpd/access_log.3
Processing /var/log/httpd/access_log.4
Processing /var/log/httpd/access_log.5
Processing /var/log/httpd/access_log.6
Results
/images/ 170212/376847 (45.17%)
/modperl/ 186210/376847 (49.41%)
/cgi-bin/ 5366/376847 (1.42%)
[root@devel3 root]#
##
##
[root@devel3 root]# cat ./simple_log_parse2.pl
#!/usr/bin/perl -w
use strict;
my $LOG_PATH = '/var/log/httpd/access_log';
my @logs = glob("$LOG_PATH*");
my %hash;
my $total = 0;
for my $log(@logs) {
print "Processing $log\n";
open LOG, $log or die "Can't open $log $!\n";
while () {
$total++;
next unless m/(?:GET|POST) ([^\s]+)/;
my $path = $1;
($path) = split /\?/, $path;
$path =~ s![^/]+$!!;
$hash{$path}++;
}
close LOG;
}
print "\n\nResults\n";
for ( sort { $hash{$b} <=> $hash{$a} } keys %hash ) {
printf "%-20s %8d/%-8d (%.2f%%)\n", $_, $hash{$_}, $total, (100*$hash{$_}/$total);
}
[root@devel3 root]#