1: Nothing special, just some Friday night excitment, a script to take a apache referer log and print some information about the search engines and terms that people are using to reach your website.
2:
3: I'd be curious about better ways to do this & existing scripts that do this better....
4:
5: #!/usr/local/bin/perl -w
6:
7: use strict;
8:
9: #ref.txt comes from:
10: # 'ls /var/log/apache/referr.*.gz | xargs zcat >> ~/ref.txt'
11: # 'cat /var/log/apache/referer.log >> ~/ref.txt'
12:
13: open FH,"./ref.txt" || die "$!";
14: my @lines=<FH>;
15: close (FH);
16:
17: # none of these are search engines
18: my $ignore_hosts=
19: qr(\Qlctc.org\E|\Qgradetheprof.net\E|\Q10.0.0\E|perlmonks);
20: # qr((\Qlctc.org\E)|(\Qgradetheprof.net\E)|(\Q10.0.0\E)|(perlmonks));
21:
22:
23: my $garbage=
24: qr(%..|.=);
25:
26: my %hosts;
27: my $search_phrase;
28: my %search_phrases;
29: my %search_words;
30: my @words;
31: my $word;
32:
33: foreach (@lines){
34: # if there is a query string
35: # and it isn't from our CGI
36: if ((m/\?/) && (m/\+/) && ($_!~m/$ignore_hosts/) ){
37: m/
38: (http:..)
39: ([a-z.]*) #hostname
40: (.*\?) #bit before query string
41: (.*) # search string
42: (-\>.*) # page refered to
43: /xi;
44:
45:
46: $hosts{$2}++;
47:
48: $search_phrase=$4;
49: $search_phrase=~s/$garbage//g;
50: $search_phrase=~s/&.*$//;
51: $search_phrase=~s/\+/ /g;
52: $search_phrase=lc($search_phrase);
53:
54: $search_phrases{$search_phrase}++;
55:
56: @words=split(/ /,$search_phrase);
57: foreach $word (@words){
58: $search_words{$word}++;
59: }
60:
61: }
62: }
63:
64: foreach (sort (keys %hosts)){
65: print "$hosts{$_} searches from $_\n";
66: }
67:
68:
69:
70: print "\n search words:\n";
71: foreach my $key (sort { $search_words{$b} <=> $search_words{$a} } keys %search_words) {
72: print "$search_words{$key} $key\n";
73: }
74:
75: print "\n search phrases:\n";
76: foreach my $key (sort { $search_phrases{$b} <=> $search_phrases{$a} } keys %search_phrases) {
77: print "$search_phrases{$key} $key\n";
78: }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: parse refer log
by merlyn (Sage) on Sep 29, 2001 at 12:23 UTC | |
by mandog (Curate) on Sep 29, 2001 at 23:26 UTC |