0: #!/usr/bin/perl -w
1: use strict;
2:
3: # small script to merge two or more apache logfiles so that entries
4: # appear in the right chronological order, optionally adding a describing
5: # field to every entry in the resulting combined logfile.
6:
7: # useful for merging files to generate combined statistics for more than one
8: # server/vhost/etc.
9:
10: # syntax logmerge.pl [-o outfile] [[-r|-i] -d desc1,...,descN] [infiles]
11:
12: # -o output : file to write log to. Assume STDOUT if omitted.
13: # -r : Insert descriptions into request header (/a.html becomes
14: # /description/a.html
15: # -i : insert descriptions as the first field of every log entry
16: # -d descN : one entry for each logfile describing it (i.e. sv1,sv2,etc.
17: # if there are more logfiles than entries, empty descriptions
18: # will be inserted for some logfiles (- for -i, nothing for -r)
19: # -f file : Read the list of logfiles to merge from file, use - for STDIN
20: # logfiles : two or more logfiles containing data in CLF or similar log
21: # formats (the first four fields need to look right, change
22: # the regex in the code if that's not the case)
23:
24: # example : find -name "access.log" | ./logmerge -f -
25:
26: # If you have a _large_ number of logfiles (say, more than your system allows
27: # a perl script to open), you might have to invoke the program several times
28: # in a tree-like structure.
29:
30: # 2001 by Eike Frost (mailto:Eike.Frost@gmx.de)
31:
32: use Getopt::Std;
33: use IO::File;
34: use IO::Handle;
35: use Time::Local;
36:
37: getopts ('o:ri:d:f:');
38: our ($opt_o,$opt_r,$opt_i,$opt_d,$opt_f);
39: my (@filelist, @logfiles, @descs, $output, $finished, %timehash);
40:
41: # prepare list of logfiles
42: if ($opt_f) {
43: my $filelist = new IO::File ($opt_f) or die ($opt_f.' '.$!);
44: @filelist = <$filelist>;
45: undef $filelist;
46: } else {
47: @filelist = @ARGV;
48: }
49: (scalar @filelist eq 0) and die ("no files to merge\n");
50:
51: # read descriptions
52: $opt_d and (@descs = split ",", $opt_d);
53:
54: # open logfiles
55: @logfiles = map (new IO::File ($_) || die ($_ . ' ' . $!), @filelist);
56:
57: # open outputfile
58: ($opt_o and $output = new IO::File ($opt_o, "w"))
59: or ($output = new IO::File and $output->fdopen (fileno(STDOUT),"w"));
60:
61: # convert the timefield from the logfile to a unix timestamp for serializing
62: sub converttounixtime ($) {
63: my $logtime = shift;
64: my %months = ('Jan'=>1,'Feb'=>2,'Mar'=>3,'Apr'=>4,'May'=>5,'Jun'=>6,
65: 'Jul'=>7,'Aug'=>8,'Sep'=>9,'Oct'=>10,'Nov'=>11,'Dec'=>12);
66: my ($day,$month,$year,$hour,$minute,$second,$adjust) = unpack "A2xA3xA4xA2xA2xA2xA*", $logtime;
67: my $timestamp = timegm ($second,$minute,$hour,$day,$months{$month},$year);
68: $timestamp += $adjust*.6*60;
69: return $timestamp;
70: }
71:
72: # insert a logentry into the timehash
73: sub insertfrom ($) {
74: my $number = shift;
75: my $lf = $logfiles[$number];
76: my $logline = <$lf> || return;
77: $logline =~ /[^ ]+ [^ ]+ [^ ]+ \[([^\]]+)\].*/;
78: my $timestamp = $1;
79: while (defined $timehash{$timestamp}) { $timestamp .= 'a'; }
80: $timehash{$timestamp} = [$logline, $number];
81: }
82:
83: # write out a logentry to the outfile
84: # inserting description as specified per -i, -r, and -d if possible
85: sub writelog ($) {
86: my $key = shift;
87: my $towrite = $timehash{$key}[0];
88: if ($opt_r && (defined $descs[$timehash{$key}[1]])) {
89: $towrite =~ s/([^"]+"[^ ]+ )(.*)/$1\/$descs[$timehash{$key}[1]]$2/g;
90: } elsif ($opt_i) {
91: if (defined $descs[$timehash{$key}[1]]) {
92: $towrite = $descs[$timehash{$key}[1]] . " " . $towrite;
93: } else {
94: $towrite = "- " . $towrite;
95: }
96: }
97: return print $output $towrite;;
98: }
99:
100: # fill the initial timehash;
101: for (my $counter=0; $counter < scalar @logfiles; $counter++) {
102: insertfrom ($counter);
103: }
104:
105: # main loop, finishes when every logfile eof'ed
106: $finished = scalar keys %timehash;
107: while ($finished > 0) {
108: my $oldest = (sort keys %timehash)[0];
109: writelog ($oldest) or die $!;
110: insertfrom ($timehash{$oldest}[1]);
111: delete $timehash{$oldest};
112: $finished = scalar keys %timehash;
113: }
In reply to merge apache logfiles chronologically by ejf
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |