1: Nothing special, just some Friday night excitment, a script to take a apache referer log and print some information about the search engines and terms that people are using to reach your website. 
   2: 
   3: I'd be curious about better ways to do this & existing scripts that do this better....
   4: 
   5: #!/usr/local/bin/perl -w
   6: 
   7: use strict;
   8: 
   9: #ref.txt comes from:
  10: # 'ls /var/log/apache/referr.*.gz | xargs zcat >> ~/ref.txt'
  11: # 'cat /var/log/apache/referer.log >> ~/ref.txt'
  12: 
  13: open FH,"./ref.txt" || die "$!";
  14: my @lines=<FH>;
  15: close (FH);
  16: 
  17: # none of these are search engines
  18: my $ignore_hosts=
  19:     qr(\Qlctc.org\E|\Qgradetheprof.net\E|\Q10.0.0\E|perlmonks);
  20: #   qr((\Qlctc.org\E)|(\Qgradetheprof.net\E)|(\Q10.0.0\E)|(perlmonks));
  21: 
  22: 
  23: my $garbage=
  24:   qr(%..|.=);
  25: 
  26: my %hosts;
  27: my $search_phrase;
  28: my %search_phrases;
  29: my %search_words;
  30: my @words;
  31: my $word;
  32: 
  33: foreach (@lines){
  34: 	# if there is a query string
  35: 	# and it isn't from our CGI
  36: 	if ((m/\?/) && (m/\+/) 	&& ($_!~m/$ignore_hosts/) ){
  37: 		m/
  38: 			(http:..) 
  39: 			([a-z.]*)	#hostname
  40: 			(.*\?)	 	#bit before query string
  41: 	 		(.*)		# search string
  42: 			(-\>.*)		# page refered to
  43: 		/xi;
  44: 
  45: 
  46: 		$hosts{$2}++;
  47: 
  48: 		$search_phrase=$4;
  49: 		$search_phrase=~s/$garbage//g;
  50: 		$search_phrase=~s/&.*$//;
  51: 		$search_phrase=~s/\+/ /g;
  52: 		$search_phrase=lc($search_phrase);
  53: 
  54: 		$search_phrases{$search_phrase}++;
  55: 
  56: 		@words=split(/ /,$search_phrase);
  57: 		foreach $word (@words){
  58: 			$search_words{$word}++;
  59: 		}		
  60: 		
  61: 	}
  62: }
  63: 
  64: foreach (sort (keys %hosts)){
  65: 	print "$hosts{$_} searches from $_\n";
  66: }
  67: 
  68: 
  69: 
  70: print "\n search words:\n";
  71:   foreach my $key (sort { $search_words{$b} <=> $search_words{$a} } keys %search_words) {
  72:       print  "$search_words{$key} $key\n";
  73:   }
  74: 
  75: print "\n search phrases:\n";
  76:   foreach my $key (sort { $search_phrases{$b} <=> $search_phrases{$a} } keys %search_phrases) {
  77:       print  "$search_phrases{$key} $key\n";
  78:   }

Replies are listed 'Best First'.
Re: parse refer log
by merlyn (Sage) on Sep 29, 2001 at 12:23 UTC
      Thank you. I'm sure I will learn a great deal from the comparision. :->



      --mandog