1: #!/usr/bin/perl -w
2: use strict;
3:
4: # small script to merge two or more apache logfiles so that entries
5: # appear in the right chronological order, optionally adding a describing
6: # field to every entry in the resulting combined logfile.
7:
8: # useful for merging files to generate combined statistics for more than one
9: # server/vhost/etc.
10:
11: # syntax logmerge.pl [-o outfile] [[-r|-i] -d desc1,...,descN] [infiles]
12:
13: # -o output : file to write log to. Assume STDOUT if omitted.
14: # -r : Insert descriptions into request header (/a.html becomes
15: # /description/a.html
16: # -i : insert descriptions as the first field of every log entry
17: # -d descN : one entry for each logfile describing it (i.e. sv1,sv2,etc.
18: # if there are more logfiles than entries, empty descriptions
19: # will be inserted for some logfiles (- for -i, nothing for -r)
20: # -f file : Read the list of logfiles to merge from file, use - for STDIN
21: # logfiles : two or more logfiles containing data in CLF or similar log
22: # formats (the first four fields need to look right, change
23: # the regex in the code if that's not the case)
24:
25: # example : find -name "access.log" | ./logmerge -f -
26:
27: # If you have a _large_ number of logfiles (say, more than your system allows
28: # a perl script to open), you might have to invoke the program several times
29: # in a tree-like structure.
30:
31: # 2001 by Eike Frost (mailto:Eike.Frost@gmx.de)
32:
33: use Getopt::Std;
34: use IO::File;
35: use IO::Handle;
36: use Time::Local;
37:
38: getopts ('o:ri:d:f:');
39: our ($opt_o,$opt_r,$opt_i,$opt_d,$opt_f);
40: my (@filelist, @logfiles, @descs, $output, $finished, %timehash);
41:
42: # prepare list of logfiles
43: if ($opt_f) {
44: my $filelist = new IO::File ($opt_f) or die ($opt_f.' '.$!);
45: @filelist = <$filelist>;
46: undef $filelist;
47: } else {
48: @filelist = @ARGV;
49: }
50: (scalar @filelist eq 0) and die ("no files to merge\n");
51:
52: # read descriptions
53: $opt_d and (@descs = split ",", $opt_d);
54:
55: # open logfiles
56: @logfiles = map (new IO::File ($_) || die ($_ . ' ' . $!), @filelist);
57:
58: # open outputfile
59: ($opt_o and $output = new IO::File ($opt_o, "w"))
60: or ($output = new IO::File and $output->fdopen (fileno(STDOUT),"w"));
61:
62: # convert the timefield from the logfile to a unix timestamp for serializing
63: sub converttounixtime ($) {
64: my $logtime = shift;
65: my %months = ('Jan'=>1,'Feb'=>2,'Mar'=>3,'Apr'=>4,'May'=>5,'Jun'=>6,
66: 'Jul'=>7,'Aug'=>8,'Sep'=>9,'Oct'=>10,'Nov'=>11,'Dec'=>12);
67: my ($day,$month,$year,$hour,$minute,$second,$adjust) = unpack "A2xA3xA4xA2xA2xA2xA*", $logtime;
68: my $timestamp = timegm ($second,$minute,$hour,$day,$months{$month},$year);
69: $timestamp += $adjust*.6*60;
70: return $timestamp;
71: }
72:
73: # insert a logentry into the timehash
74: sub insertfrom ($) {
75: my $number = shift;
76: my $lf = $logfiles[$number];
77: my $logline = <$lf> || return;
78: $logline =~ /[^ ]+ [^ ]+ [^ ]+ \[([^\]]+)\].*/;
79: my $timestamp = $1;
80: while (defined $timehash{$timestamp}) { $timestamp .= 'a'; }
81: $timehash{$timestamp} = [$logline, $number];
82: }
83:
84: # write out a logentry to the outfile
85: # inserting description as specified per -i, -r, and -d if possible
86: sub writelog ($) {
87: my $key = shift;
88: my $towrite = $timehash{$key}[0];
89: if ($opt_r && (defined $descs[$timehash{$key}[1]])) {
90: $towrite =~ s/([^"]+"[^ ]+ )(.*)/$1\/$descs[$timehash{$key}[1]]$2/g;
91: } elsif ($opt_i) {
92: if (defined $descs[$timehash{$key}[1]]) {
93: $towrite = $descs[$timehash{$key}[1]] . " " . $towrite;
94: } else {
95: $towrite = "- " . $towrite;
96: }
97: }
98: return print $output $towrite;;
99: }
100:
101: # fill the initial timehash;
102: for (my $counter=0; $counter < scalar @logfiles; $counter++) {
103: insertfrom ($counter);
104: }
105:
106: # main loop, finishes when every logfile eof'ed
107: $finished = scalar keys %timehash;
108: while ($finished > 0) {
109: my $oldest = (sort keys %timehash)[0];
110: writelog ($oldest) or die $!;
111: insertfrom ($timehash{$oldest}[1]);
112: delete $timehash{$oldest};
113: $finished = scalar keys %timehash;
114: }