0: #!/usr/bin/perl
1:
2: #############################
3: #
4: # Version 2.0
5: #
6: # A simple multi-format log parser which is intended to
7: # to be used as a filter. Could be faster, but it does
8: # allow you to define a pretty output format.
9: #
10: # Author: Chris Jensen
11: #
12: # Update:
13: #
14: # - If log format is unspecified, an attempt is
15: # made to determine the closest matching format
16: # by analyzing a log entry.
17: #
18: # - Reduced amount of code; Sub-formats defined
19: # similar to log formats; Minor changes.
20: #
21:
22: use Getopt::Long;
23:
24: my %optctl;
25: GetOptions (\%optctl, "type|t=s", "pattern|p=s");
26:
27:
28: my $log_formats = {
29: 'common' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+)}, [qw(h l u t r c b)] ],
30: 'virtual' => [ qr{(\S+) (\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+)}, [qw(v h l u t r c b)] ],
31: 'combined' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\"}, [qw(h l u t r c b R A)] ],
32: 'referer' => [ qr{(\S+) \-\> (\S+)}, [qw(R r)] ],
33: 'agent' => [ qr{(\S+)}, [qw(A)] ],
34: 'extended' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\" (\d+) (\d+)}, [qw(h l u t r c b R A P T)] ],
35: 'custom' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\" (\d+)}, [qw(h l u t r c b A R T)] ],
36: };
37:
38:
39: my $type = $optctl{type} || 'unknown';
40:
41: my $pattern = $optctl{pattern} or usage();
42:
43: my ($format, $control) = @{$log_formats->{$type}};
44:
45: my @pats;
46:
47: map { $_ =~ /^([^a-zA-Z\_\%]*)(.)/ && push(@pats, $2) } (split(/\%/, $pattern));
48:
49: my $outpat = $pattern . "\n";
50: $outpat =~ s/(\%[^a-zA-Z\_\%]*)([a-zA-Z\_])/$1s/g;
51:
52:
53: # Formats and sub-formats are now defined similarly
54:
55: my $sub_formats = {
56: 't' => [ qr{(\d+)\/(\w+)\/(\d+)\:(\d+)\:(\d+)\:(\d+)\s}, [qw(d m y H M S)] ],
57: 'r' => [ qr{(\w+)\s([^\?]*)\??([^\s]*)?\s(.*)}, [qw(a f q p)] ],
58: 'u' => [ qr{(\w*)\-(\w*)}, [qw(s i)] ],
59: 'R' => [ qr{.*\:\/\/([^\/]+)(\/.*)}, [qw(o F)] ]
60: };
61:
62:
63: while(<>) {
64:
65: # Attempt to automatically determine log type/format
66: # Pick the matching format with the most control entities
67:
68: if ($type eq 'unknown') {
69: my ($last, $t, $p);
70: while (($t, $p) = each(%{$log_formats})) {
71: my ($f, $c) = @{$p};
72: if (/$f/ && scalar @{$c} > $last) {
73: $last = scalar @{$c};
74: $format = $f;
75: $control = $c;
76: $type = $t;
77: }
78: }
79: die "Can't auto-determine log type\n" if ($type eq 'unknown');
80: }
81:
82: my @vals;
83: my %info;
84: if (/$format/) {
85: my $x = 0;
86:
87: foreach my $ctl (@{$control}) {
88: $info{$ctl} = ${++$x};
89:
90: my ($sfmt, $sctl) = @{$sub_formats->{$ctl}};
91:
92: if (defined($sfmt)) {
93: my $y = 0;
94: $info{$ctl} =~ /$sfmt/ &&
95: map { $info{$_} = ${++$y} } @{$sctl};
96: }
97: }
98:
99: map { push(@vals, $info{$_}) } @pats;
100:
101: printf $outpat, @vals;
102: }
103: }
104:
105:
106: sub usage {
107:
108: print qq{
109: usage: logparse [-t=<type>] -p=<pattern>
110:
111: example: tail -50 access_log | logparse -t=extended -p="%H:%M %-15o %f"
112:
113: Formatting characters:
114:
115: v - The virtual host name/address
116: h - The host IP name/address
117: l - The remote logname
118: u - Remote User/Session
119: t - The time of the request
120: r - The full request
121: c - The HTTP code (302, 200, etc)
122: b - Bytes
123: R - Referrer string
124: A - User Agent string
125: P - Process ID
126: T - Time taken in seconds
127:
128: Request string breakdown:
129:
130: a - Action/Method (GET, POST, etc)
131: f - File path
132: q - Query string
133: p - HTTP protocol version
134:
135: Time of request breakdown:
136:
137: d - Day of the month
138: m - Month (Apr, May, etc)
139: y - Year
140: H - Hour
141: M - Minute
142: S - Second
143:
144: User Session breakdown:
145:
146: s - Session ID
147: i - User ID
148:
149: Referrer string breakdown:
150:
151: o - Host of referrer
152: F - File path of referrer
153:
154: };
155:
156: exit(0);
157:
158: } In reply to Multi-Format Log Parser - Version 2.0 by cjensen
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |