in reply to Re: Re: Re: Pulling by regex II
in thread Pulling by regex II
#!/usr/local/bin/perl -slwT use strict; use warnings; use Date::Manip; use CGI qw/:standard/; # Make sure neither we, nor any of our submodules compromise security # by calling unpathed programs. $ENV{PATH} = "/bin:/usr/bin"; $ENV{IFS}=""; # Use CGI to print our headers print header, "\n\n"; my %referers = (); # Retrieve and security-check parameters my $hour = param('hour'); my $minute = param('minute'); if ($hour !~ /^\d\d?$/) { die('Invalid hour'); } if ($minute !~ /^\d\d?$/) { die('Invalid minute'); } # Get date object for our check point my $check_date = ParseDate("${hour}hours ${minute}minutes ago"); # File handling, one line at a time open(FH,"datafile.html") || die('Could not open log file'); while (my $line = <FH>) { next if ($line !~ /^\S+ \S \S \(\S+) \S+\ "^"+" \d+ \d+ "(^"+)"/); my $line_date = ParseDate($1); # Check to see if the line date is in the range we're after next unless Date_Cmp($line_date, $check_date)>0; # If the referer is new, we set to 1 entry, otherwise increment (incrementing undef doesn't work well) if (!$referers{$2}) { $referers{$2}=1; } else { $referers{$2}++; } } close(FH); my $row = 0; # Sort our referers by the number of hits for (sort {$referers{$b} <=> $referers{$a}} keys %referers) { # break out after the tenth one last if $row++==10; print "$_: ".$referers{$_}."\n"; }
Recent data:
68.22.179.211 - - 15/Dec/2002:14:52:13 -0500 "GET /images/69.gif HTTP/1.1" 200
1348 "http://www.indystar.com/print/articles/6/008596-6466-040.html" "Mozilla/4
.0 (compatible; MSIE 5.5; Windows 98)"
141.154.123.193 - - 15/Dec/2002:14:52:13 -0500 "GET /images/header_aod2_01.gif
HTTP/1.0" 200 2011 "http://www.indystar.com/print/articles/2/008227-9652-031.ht
ml" "Mozilla/4.79 en (Windows NT 5.0; U)"
141.154.123.193 - - 15/Dec/2002:14:52:13 -0500 "GET /images/header_aod2_15.gif
HTTP/1.0" 200 4162 "http://www.indystar.com/print/articles/2/008227-9652-031.ht
ml" "Mozilla/4.79 en (Windows NT 5.0; U)"
141.154.123.193 - - 15/Dec/2002:14:52:13 -0500 "GET /images/header_aod2_10.gif
HTTP/1.0" 200 3034 "http://www.indystar.com/print/articles/2/008227-9652-031.ht
ml" "Mozilla/4.79 en (Windows NT 5.0; U)"
141.154.123.193 - - 15/Dec/2002:14:52:13 -0500 "GET /images/go_blue.gif HTTP/1
.0" 200 133 "http://www.indystar.com/print/articles/2/008227-9652-031.html" "Moz
illa/4.79 en (Windows NT 5.0; U)"
141.154.123.193 - - 15/Dec/2002:14:52:13 -0500 "GET /images/aod_searchend2.gif
HTTP/1.0" 200 186 "http://www.indystar.com/print/articles/2/008227-9652-031.htm
l" "Mozilla/4.79 en (Windows NT 5.0; U)"
24.79.125.220 - - 15/Dec/2002:14:52:13 -0500 "GET /images/coheader2_aod_08.gif
HTTP/1.1" 304 - "http://www.indystar.com/forums/showthread.php?s=&postid=177044
" "Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; Win 9x 4.90; Q312461)"
24.79.125.220 - - 15/Dec/2002:14:52:13 -0500 "GET /images/coheader2_aod_10.gif
HTTP/1.1" 304 - "http://www.indystar.com/forums/showthread.php?s=&postid=177044
" "Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; Win 9x 4.90; Q312461)"
141.154.123.193 - - 15/Dec/2002:14:52:13 -0500 "GET /images/email.gif HTTP/1.0
" 200 138 "http://www.indystar.com/print/articles/2/008227-9652-031.html" "Mozil
la/4.79 en (Windows NT 5.0; U)"
66.149.178.96 - - 15/Dec/2002:14:52:14 -0500 "GET /forums/showthread.php?s=&po
stid=177042 HTTP/1.1" 200 7302 "-" "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1
.0.1) Gecko/20021003"
24.79.125.220 - - 15/Dec/2002:14:52:14 -0500 "GET /images/coheader2_aod_11.gif
HTTP/1.1" 200 954 "http://www.indystar.com/forums/showthread.php?s=&postid=1770
44" "Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; Win 9x 4.90; Q312461)"
|
---|
Replies are listed 'Best First'. | |
---|---|
Re: Re: Re: Re: Re: Pulling by regex II
by PhiRatE (Monk) on Dec 16, 2002 at 10:54 UTC | |
by mkent (Acolyte) on Dec 19, 2002 at 16:51 UTC |