1: #!/usr/bin/perl -w
   2: use strict;
   3: 
   4: # small script to merge two or more apache logfiles so that entries
   5: # appear in the right chronological order, optionally adding a describing
   6: # field to every entry in the resulting combined logfile.
   7: 
   8: # useful for merging files to generate combined statistics for more than one
   9: # server/vhost/etc.
  10: 
  11: # syntax logmerge.pl [-o outfile] [[-r|-i] -d desc1,...,descN] [infiles]
  12: 
  13: # -o output   : file to write log to. Assume STDOUT if omitted.
  14: # -r          : Insert descriptions into request header (/a.html becomes 
  15: #               /description/a.html
  16: # -i          : insert descriptions as the first field of every log entry
  17: # -d descN    : one entry for each logfile describing it (i.e. sv1,sv2,etc.
  18: #               if there are more logfiles than entries, empty descriptions
  19: #               will be inserted for some logfiles (- for -i, nothing for -r)
  20: # -f file     : Read the list of logfiles to merge from file, use - for STDIN
  21: # logfiles    : two or more logfiles containing data in CLF or similar log
  22: #               formats (the first four fields need to look right, change 
  23: #               the regex in the code if that's not the case)
  24: 
  25: # example : find -name "access.log" | ./logmerge -f -
  26: 
  27: # If you have a _large_ number of logfiles (say, more than your system allows
  28: # a perl script to open), you might have to invoke the program several times 
  29: # in a tree-like structure.
  30: 
  31: # 2001 by Eike Frost (mailto:Eike.Frost@gmx.de)
  32: 
  33: use Getopt::Std;
  34: use IO::File;
  35: use IO::Handle;
  36: use Time::Local;
  37: 
  38: getopts ('o:ri:d:f:');
  39: our ($opt_o,$opt_r,$opt_i,$opt_d,$opt_f);
  40: my (@filelist, @logfiles, @descs, $output, $finished, %timehash);
  41: 
  42: # prepare list of logfiles
  43: if ($opt_f) {
  44:   my $filelist = new IO::File ($opt_f) or die ($opt_f.' '.$!);
  45:   @filelist = <$filelist>;
  46:   undef $filelist;
  47: } else {
  48:   @filelist = @ARGV;
  49: }
  50: (scalar @filelist eq 0) and die ("no files to merge\n");
  51: 
  52: # read descriptions
  53: $opt_d and (@descs = split ",", $opt_d);
  54: 
  55: # open logfiles
  56: @logfiles = map (new IO::File ($_) || die ($_ . ' ' . $!), @filelist);
  57: 
  58: # open outputfile
  59: ($opt_o and $output = new IO::File ($opt_o, "w")) 
  60:  or ($output = new IO::File and $output->fdopen (fileno(STDOUT),"w"));
  61: 
  62: # convert the timefield from the logfile to a unix timestamp for serializing
  63: sub converttounixtime ($) {
  64:   my $logtime = shift;
  65:   my %months = ('Jan'=>1,'Feb'=>2,'Mar'=>3,'Apr'=>4,'May'=>5,'Jun'=>6,
  66:                 'Jul'=>7,'Aug'=>8,'Sep'=>9,'Oct'=>10,'Nov'=>11,'Dec'=>12);
  67:   my ($day,$month,$year,$hour,$minute,$second,$adjust) = unpack "A2xA3xA4xA2xA2xA2xA*", $logtime;
  68:   my $timestamp = timegm ($second,$minute,$hour,$day,$months{$month},$year);
  69:   $timestamp += $adjust*.6*60;
  70:   return $timestamp;
  71: }
  72: 
  73: # insert a logentry into the timehash
  74: sub insertfrom ($) {
  75:   my $number = shift;
  76:   my $lf = $logfiles[$number];
  77:   my $logline = <$lf> || return;
  78:   $logline =~ /[^ ]+ [^ ]+ [^ ]+ \[([^\]]+)\].*/;
  79:   my $timestamp = $1;
  80:   while (defined $timehash{$timestamp}) { $timestamp .= 'a'; }
  81:   $timehash{$timestamp} = [$logline, $number];
  82: }
  83: 
  84: # write out a logentry to the outfile
  85: # inserting description as specified per -i, -r, and -d if possible
  86: sub writelog ($) {                   
  87:   my $key = shift;
  88:   my $towrite = $timehash{$key}[0]; 
  89:   if ($opt_r && (defined $descs[$timehash{$key}[1]])) {
  90:     $towrite =~ s/([^"]+"[^ ]+ )(.*)/$1\/$descs[$timehash{$key}[1]]$2/g;
  91:   } elsif ($opt_i) {
  92:     if (defined $descs[$timehash{$key}[1]]) {
  93:       $towrite = $descs[$timehash{$key}[1]] . " " . $towrite; 
  94:     } else {
  95:       $towrite = "- " . $towrite; 
  96:    }
  97:   }
  98:   return print $output $towrite;;
  99: }
 100: 
 101: # fill the initial timehash;
 102: for (my $counter=0; $counter < scalar @logfiles; $counter++) { 
 103:   insertfrom ($counter);
 104: }
 105: 
 106: # main loop, finishes when every logfile eof'ed
 107: $finished = scalar keys %timehash;
 108: while ($finished > 0) {
 109:     my $oldest = (sort keys %timehash)[0];
 110:     writelog ($oldest) or die $!;
 111:     insertfrom ($timehash{$oldest}[1]);
 112:     delete $timehash{$oldest};
 113:     $finished = scalar keys %timehash;
 114: }

Replies are listed 'Best First'.
Re: merge apache logfiles cronologically
by blakem (Monsignor) on Aug 29, 2001 at 02:35 UTC
    Looks good. For an alternate solution to this problem (logical log file being split up among numerous physical machines) you should check outmod_log_spread. You can use it to generate a merged logfile in real time. Enjoy.

    -Blake