1: #!/usr/bin/perl
2:
3: #############################
4: #
5: # A simple multi-format log parser which is intended to
6: # to be used as a filter. Could be faster, but it does
7: # allow you to define a pretty output format.
8: #
9: # Author: Chris Jensen
10: #
11:
12: use Getopt::Long;
13:
14: my %optctl;
15: GetOptions (\%optctl, "type|t=s", "pattern|p=s");
16:
17:
18: my $log_formats = {
19:
20: 'common' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+)},
21: [ qw( _ h l u t r c b ) ] ],
22:
23:
24: 'virtual' => [ qr{(\S+) (\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+)},
25: [ qw( _ v h l u t r c b ) ] ],
26:
27: 'combined' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\"},
28: [ qw( _ h l u t r c b R A ) ] ],
29:
30: 'referer' => [ qr{(\S+) \-\> (\S+)},
31: [ qw( _ R r ) ] ],
32:
33: 'agent' => [ qr{(\S+)},
34: [ qw( _ A ) ] ],
35:
36: 'extended' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\" (\d+) (\d+)},
37: [ qw( _ h l u t r c b R A P T ) ] ],
38:
39: 'custom' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\" (\d+)},
40: [ qw( _ h l u t r c b A R T ) ] ],
41:
42: };
43:
44:
45: my $type = $optctl{type} || 'common';
46: my $pattern = $optctl{pattern} or usage();
47:
48: my ($format, $access) = @{$log_formats->{$type}};
49:
50:
51: foreach my $pat (split(/\%/, $pattern)) {
52: if ($pat =~ /^([^a-zA-Z\_\%]*)(.)/) {
53: push(@pats, $2);
54: }
55: }
56:
57: my $outpat = $pattern;
58: $outpat =~ s/(\%[^a-zA-Z\_\%]*)([a-zA-Z\_])/$1s/g;
59:
60:
61: my $qrtime = qr{(\d+)\/(\w+)\/(\d+)\:(\d+)\:(\d+)\:(\d+)\s};
62: my $qruri = qr{(\w+)\s([^\?]*)\??([^\s]*)?\s(.*)};
63: my $qrsession = qr{(\w*)\-(\w*)};
64: my $qrref = qr{.*\:\/\/([^\/]+)(\/.*)};
65:
66:
67: while(<STDIN>) {
68: my @vals;
69: my %info;
70: if (/$format/) {
71: for ($x = 0; $x <= $#{$access}; $x++) {
72: $info{$access->[$x]} = $$x;
73: if ($access->[$x] eq "r") {
74: $info{$access->[$x]} =~ /$qruri/;
75: $info{'a'} = $1;
76: $info{'f'} = $2;
77: $info{'q'} = $3;
78: $info{'p'} = $4;
79: }
80: if ($access->[$x] eq "t") {
81: $info{$access->[$x]} =~ /$qrtime/;
82: $info{'d'} = $1;
83: $info{'m'} = $2;
84: $info{'y'} = $3;
85: $info{'H'} = $4;
86: $info{'M'} = $5;
87: $info{'S'} = $6;
88: }
89: if ($access->[$x] eq "u") {
90: $info{$access->[$x]} =~ /$qrsession/;
91: $info{'s'} = $1;
92: $info{'i'} = $2;
93: }
94: if ($access->[$x] eq "R") {
95: $info{$access->[$x]} =~ /$qrref/;
96: $info{'o'} = $1;
97: $info{'F'} = $2;
98: }
99: }
100:
101: foreach my $pat (@pats) {
102: push(@vals, $info{$pat}) if exists($info{$pat});
103: }
104:
105: printf $outpat, @vals;
106: print "\n";
107: }
108: }
109:
110: sub usage {
111:
112: print qq{
113: usage: logparse [-t=<type>] -p=<pattern>
114:
115: example: tail -50 access_log | logparse -t=extended -p="%H:%M %-15o %f"
116:
117: Formatting characters:
118:
119: _ - The name of this filter script
120: v - The virtual host name/address
121: h - The host IP name/address
122: l - The remote logname
123: u - Remote User/Session
124: t - The time of the request
125: r - The full request
126: c - The HTTP code (302, 200, etc)
127: b - Bytes
128: R - Referrer string
129: A - User Agent string
130: P - Process ID
131: T - Time taken in seconds
132:
133: Request string breakdown:
134:
135: a - Action/Method (GET, POST, etc)
136: f - File path
137: q - Query string
138: p - HTTP protocol version
139:
140: Time of request breakdown:
141:
142: d - Day of the month
143: m - Month (Apr, May, etc)
144: y - Year
145: H - Hour
146: M - Minute
147: S - Second
148:
149: User Session breakdown:
150:
151: s - Session ID
152: i - User ID
153:
154: Referrer string breakdown:
155:
156: o - Host of referrer
157: F - File path of referrer
158:
159: };
160:
161: exit(0);
162:
163: }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Multi-format Log Parser
by tstock (Curate) on Oct 05, 2001 at 10:14 UTC | |
by cjensen (Sexton) on Oct 05, 2001 at 23:40 UTC | |
by tstock (Curate) on Oct 06, 2001 at 21:48 UTC | |
|
Re: Multi-format Log Parser
by mattr (Curate) on Nov 13, 2001 at 15:34 UTC | |
by blakem (Monsignor) on Nov 14, 2001 at 03:53 UTC |