in reply to Re: Re: Pulling by regex II
in thread Pulling by regex II
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Re: Re: Re: Pulling by regex II
by mkent (Acolyte) on Dec 15, 2002 at 20:54 UTC | |
#!/usr/local/bin/perl -slwT
use strict;
use warnings;
use Date::Manip;
use CGI qw/:standard/;
# Make sure neither we, nor any of our submodules compromise security
# by calling unpathed programs.
$ENV{PATH} = "/bin:/usr/bin";
$ENV{IFS}="";
# Use CGI to print our headers
print header, "\n\n";
my %referers = ();
# Retrieve and security-check parameters
my $hour = param('hour');
my $minute = param('minute');
if ($hour !~ /^\d\d?$/) { die('Invalid hour'); }
if ($minute !~ /^\d\d?$/) { die('Invalid minute'); }
# Get date object for our check point
my $check_date = ParseDate("${hour}hours ${minute}minutes ago");
# File handling, one line at a time
open(FH,"datafile.html") || die('Could not open log file');
while (my $line = <FH>) {
next if ($line !~ /^\S+ \S \S \(\S+) \S+\ "^"+" \d+ \d+ "(^"+)"/);
my $line_date = ParseDate($1);
# Check to see if the line date is in the range we're after
next unless Date_Cmp($line_date, $check_date)>0;
# If the referer is new, we set to 1 entry, otherwise increment (incrementing undef doesn't work well)
if (!$referers{$2}) {
$referers{$2}=1;
} else {
$referers{$2}++;
}
}
close(FH);
my $row = 0;
# Sort our referers by the number of hits
for (sort {$referers{$b} <=> $referers{$a}} keys %referers) {
# break out after the tenth one
last if $row++==10;
print "$_: ".$referers{$_}."\n";
}
Recent data:
68.22.179.211 - - 15/Dec/2002:14:52:13 -0500 "GET /images/69.gif HTTP/1.1" 200
1348 "http://www.indystar.com/print/articles/6/008596-6466-040.html" "Mozilla/4
.0 (compatible; MSIE 5.5; Windows 98)" | [reply] |
by PhiRatE (Monk) on Dec 16, 2002 at 10:54 UTC | |
| [reply] |
by mkent (Acolyte) on Dec 19, 2002 at 16:51 UTC | |
Sorry it took me a little while to reply, still feeling my way around this site! | [reply] |