1: #!/usr/bin/perl
2:
3: #############################
4: #
5: # Version 2.0
6: #
7: # A simple multi-format log parser which is intended to
8: # to be used as a filter. Could be faster, but it does
9: # allow you to define a pretty output format.
10: #
11: # Author: Chris Jensen
12: #
13: # Update:
14: #
15: # - If log format is unspecified, an attempt is
16: # made to determine the closest matching format
17: # by analyzing a log entry.
18: #
19: # - Reduced amount of code; Sub-formats defined
20: # similar to log formats; Minor changes.
21: #
22:
23: use Getopt::Long;
24:
25: my %optctl;
26: GetOptions (\%optctl, "type|t=s", "pattern|p=s");
27:
28:
29: my $log_formats = {
30: 'common' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+)}, [qw(h l u t r c b)] ],
31: 'virtual' => [ qr{(\S+) (\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+)}, [qw(v h l u t r c b)] ],
32: 'combined' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\"}, [qw(h l u t r c b R A)] ],
33: 'referer' => [ qr{(\S+) \-\> (\S+)}, [qw(R r)] ],
34: 'agent' => [ qr{(\S+)}, [qw(A)] ],
35: 'extended' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\" (\d+) (\d+)}, [qw(h l u t r c b R A P T)] ],
36: 'custom' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\" (\d+)}, [qw(h l u t r c b A R T)] ],
37: };
38:
39:
40: my $type = $optctl{type} || 'unknown';
41:
42: my $pattern = $optctl{pattern} or usage();
43:
44: my ($format, $control) = @{$log_formats->{$type}};
45:
46: my @pats;
47:
48: map { $_ =~ /^([^a-zA-Z\_\%]*)(.)/ && push(@pats, $2) } (split(/\%/, $pattern));
49:
50: my $outpat = $pattern . "\n";
51: $outpat =~ s/(\%[^a-zA-Z\_\%]*)([a-zA-Z\_])/$1s/g;
52:
53:
54: # Formats and sub-formats are now defined similarly
55:
56: my $sub_formats = {
57: 't' => [ qr{(\d+)\/(\w+)\/(\d+)\:(\d+)\:(\d+)\:(\d+)\s}, [qw(d m y H M S)] ],
58: 'r' => [ qr{(\w+)\s([^\?]*)\??([^\s]*)?\s(.*)}, [qw(a f q p)] ],
59: 'u' => [ qr{(\w*)\-(\w*)}, [qw(s i)] ],
60: 'R' => [ qr{.*\:\/\/([^\/]+)(\/.*)}, [qw(o F)] ]
61: };
62:
63:
64: while(<>) {
65:
66: # Attempt to automatically determine log type/format
67: # Pick the matching format with the most control entities
68:
69: if ($type eq 'unknown') {
70: my ($last, $t, $p);
71: while (($t, $p) = each(%{$log_formats})) {
72: my ($f, $c) = @{$p};
73: if (/$f/ && scalar @{$c} > $last) {
74: $last = scalar @{$c};
75: $format = $f;
76: $control = $c;
77: $type = $t;
78: }
79: }
80: die "Can't auto-determine log type\n" if ($type eq 'unknown');
81: }
82:
83: my @vals;
84: my %info;
85: if (/$format/) {
86: my $x = 0;
87:
88: foreach my $ctl (@{$control}) {
89: $info{$ctl} = ${++$x};
90:
91: my ($sfmt, $sctl) = @{$sub_formats->{$ctl}};
92:
93: if (defined($sfmt)) {
94: my $y = 0;
95: $info{$ctl} =~ /$sfmt/ &&
96: map { $info{$_} = ${++$y} } @{$sctl};
97: }
98: }
99:
100: map { push(@vals, $info{$_}) } @pats;
101:
102: printf $outpat, @vals;
103: }
104: }
105:
106:
107: sub usage {
108:
109: print qq{
110: usage: logparse [-t=<type>] -p=<pattern>
111:
112: example: tail -50 access_log | logparse -t=extended -p="%H:%M %-15o %f"
113:
114: Formatting characters:
115:
116: v - The virtual host name/address
117: h - The host IP name/address
118: l - The remote logname
119: u - Remote User/Session
120: t - The time of the request
121: r - The full request
122: c - The HTTP code (302, 200, etc)
123: b - Bytes
124: R - Referrer string
125: A - User Agent string
126: P - Process ID
127: T - Time taken in seconds
128:
129: Request string breakdown:
130:
131: a - Action/Method (GET, POST, etc)
132: f - File path
133: q - Query string
134: p - HTTP protocol version
135:
136: Time of request breakdown:
137:
138: d - Day of the month
139: m - Month (Apr, May, etc)
140: y - Year
141: H - Hour
142: M - Minute
143: S - Second
144:
145: User Session breakdown:
146:
147: s - Session ID
148: i - User ID
149:
150: Referrer string breakdown:
151:
152: o - Host of referrer
153: F - File path of referrer
154:
155: };
156:
157: exit(0);
158:
159: }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Multi-Format Log Parser - Version 2.0
by grinder (Bishop) on Jan 16, 2002 at 17:07 UTC | |
by cjensen (Sexton) on Jan 18, 2002 at 23:05 UTC | |
by cjensen (Sexton) on Jan 23, 2002 at 06:08 UTC |