Nothing special, just some Friday night excitment, a script to take a apache referer log and print some information about the search engines and terms that people are using to reach your website. I'd be curious about better ways to do this & existing scripts that do this better.... #!/usr/local/bin/perl -w use strict; #ref.txt comes from: # 'ls /var/log/apache/referr.*.gz | xargs zcat >> ~/ref.txt' # 'cat /var/log/apache/referer.log >> ~/ref.txt' open FH,"./ref.txt" || die "$!"; my @lines=; close (FH); # none of these are search engines my $ignore_hosts= qr(\Qlctc.org\E|\Qgradetheprof.net\E|\Q10.0.0\E|perlmonks); # qr((\Qlctc.org\E)|(\Qgradetheprof.net\E)|(\Q10.0.0\E)|(perlmonks)); my $garbage= qr(%..|.=); my %hosts; my $search_phrase; my %search_phrases; my %search_words; my @words; my $word; foreach (@lines){ # if there is a query string # and it isn't from our CGI if ((m/\?/) && (m/\+/) && ($_!~m/$ignore_hosts/) ){ m/ (http:..) ([a-z.]*) #hostname (.*\?) #bit before query string (.*) # search string (-\>.*) # page refered to /xi; $hosts{$2}++; $search_phrase=$4; $search_phrase=~s/$garbage//g; $search_phrase=~s/&.*$//; $search_phrase=~s/\+/ /g; $search_phrase=lc($search_phrase); $search_phrases{$search_phrase}++; @words=split(/ /,$search_phrase); foreach $word (@words){ $search_words{$word}++; } } } foreach (sort (keys %hosts)){ print "$hosts{$_} searches from $_\n"; } print "\n search words:\n"; foreach my $key (sort { $search_words{$b} <=> $search_words{$a} } keys %search_words) { print "$search_words{$key} $key\n"; } print "\n search phrases:\n"; foreach my $key (sort { $search_phrases{$b} <=> $search_phrases{$a} } keys %search_phrases) { print "$search_phrases{$key} $key\n"; }