# Specify where the Apache log file resides # NOTE: Change this for your server LOG_FILE="/var/log/httpd/access_log" # NOTE: Change this if you don't have access to /tmp TMP_DIR=/tmp # Output http header info echo "Content-type: text/html" echo # Output http body (an html document) echo "" echo "" echo "" echo "" echo "Tracking Google Content Ads CGI Script" echo "" echo "" echo "" # Use tmp files to process the data sid=`date +%H%M%S` TMP_COUNT=$TMP_DIR/count_$sid.txt TMP_CONTENT=$TMP_DIR/content_$sid.txt TMP_SITES=$TMP_DIR/sites_$sid.txt TMP_CPAGES=$TMP_DIR/cpages_$sid.txt # Make sure file exists if test -f $LOG_FILE then # Simple check to verify it's an Apache combined log file # count.txt file should have only 1 line that reads "7" head -5 $LOG_FILE |awk -F\" '{print NF}' |sort -u > $TMP_COUNT if test `wc -l $TMP_COUNT |awk '{print $1}'` -lt 2 then # need to test for 7 fields (using " as field separator) if test `cat $TMP_COUNT` -eq 7 then # File is ok to process echo "" else echo "
$LOG_FILE is not in the Apache combined log format!" echo "
Expecting 7 fields split by a \" mark." echo "
There are `cat $TMP_COUNT` such fields." echo "
Here's the first line of your file:" echo "
" head -1 $LOG_FILE echo "
" echo "" echo "" rm $TMP_COUNT exit fi else echo "
$LOG_FILE is not in the Apache combined log format!" echo "" echo "" rm $TMP_COUNT exit fi else echo "
$LOG_FILE is not a readable file or does not exist! Try again." echo "" echo "" rm $TMP_COUNT exit fi # NOTE: Comment this informational block out if you like echo "Processing LOG_FILE: `echo $LOG_FILE | awk -F/ '{print $NF}'` ..." echo "
" echo "Length: `wc -l $LOG_FILE |awk '{print $1}'` lines" echo "
" echo "Start: `head -5 $LOG_FILE |awk '{print $4,$5}' |grep : |head -1`" echo "
" echo "End: `tail -5 $LOG_FILE |awk '{print $4,$5}' |grep : |tail -1`" # Find hits from Google content ads grep googlesyndication\.com $LOG_FILE \ |awk '{print $1,$7,$11}' |sort -u |grep client= |grep "\&url=" \ > $TMP_CONTENT # Exit if no content ads found if test -s $TMP_CONTENT then echo "

Hits from Google Content Ads:

" else echo "

" echo "No content ads found!" echo "" echo "" rm $TMP_COUNT exit fi # Analyze content ads cat $TMP_CONTENT \ |awk -F\&url= '{print $2}' |awk -F\& '{print $1}' \ |sed 's/\"//g s/\+/ /g s/$/X/ s/%2[Bb]/+/g s/%2[Cc]/,/g s/%2[Dd]/-/g s/%2[Ee]/./g s/%2[Ff]/\//g s/%25/\%/g s/%26/\&/g s/%27/\`/g s/%28/(/g s/%29/)/g s/%40/@/g s/%7[Cc]/|/g s/%3[Ff]/?/g s/%3[Dd]/=/g s/%3[Aa]/:/g' \ > $TMP_SITES # Count hits sort -u $TMP_SITES | \ while read page do echo `grep -c "$page" $TMP_SITES` `echo $page |awk -F\/ '{print $3}'` $page >> $TMP_CPAGES done # Display content ads echo "" echo " " echo "
" echo " " echo " " sort -n $TMP_CPAGES |sed 's/X$//' \ |awk '{print $1""}' \ |sed 's/^/
HitsDomainPage Where Content Ad Is (or was) Displayed
"$2""$3"
/' echo "
" echo "
" echo "" echo "
Total hits: `wc -l $TMP_SITES |awk '{print $1}'`" echo "
Unique hits: `wc -l $TMP_CPAGES |awk '{print $1}'`" echo "
" # Help Text echo "

How To Use This Information:" echo "
    " echo "
  1. Click on the page where your Google ad was displayed." echo "
  2. If you don't want your ad displayed on the site, copy the domain and..." echo "
  3. Paste the domain into Google's Site Exclusion feature at the Campaign level." echo "
" # Copyright 2005 Apogee Web Consulting LLC echo "
Copyright © 2005 \"Apogee" # Text version of copyright # echo "

Copyright © 2005 Apogee Web Consulting LLC" echo "" echo "" # Clean up rm $TMP_CONTENT rm $TMP_SITES rm $TMP_CPAGES rm $TMP_COUNT