0: #!/usr/bin/perl
1:
2: #############################
3: #
4: # A simple multi-format log parser which is intended to
5: # to be used as a filter. Could be faster, but it does
6: # allow you to define a pretty output format.
7: #
8: # Author: Chris Jensen
9: #
10:
11: use Getopt::Long;
12:
13: my %optctl;
14: GetOptions (\%optctl, "type|t=s", "pattern|p=s");
15:
16:
17: my $log_formats = {
18:
19: 'common' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+)},
20: [ qw( _ h l u t r c b ) ] ],
21:
22:
23: 'virtual' => [ qr{(\S+) (\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+)},
24: [ qw( _ v h l u t r c b ) ] ],
25:
26: 'combined' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\"},
27: [ qw( _ h l u t r c b R A ) ] ],
28:
29: 'referer' => [ qr{(\S+) \-\> (\S+)},
30: [ qw( _ R r ) ] ],
31:
32: 'agent' => [ qr{(\S+)},
33: [ qw( _ A ) ] ],
34:
35: 'extended' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\" (\d+) (\d+)},
36: [ qw( _ h l u t r c b R A P T ) ] ],
37:
38: 'custom' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\" (\d+)},
39: [ qw( _ h l u t r c b A R T ) ] ],
40:
41: };
42:
43:
44: my $type = $optctl{type} || 'common';
45: my $pattern = $optctl{pattern} or usage();
46:
47: my ($format, $access) = @{$log_formats->{$type}};
48:
49:
50: foreach my $pat (split(/\%/, $pattern)) {
51: if ($pat =~ /^([^a-zA-Z\_\%]*)(.)/) {
52: push(@pats, $2);
53: }
54: }
55:
56: my $outpat = $pattern;
57: $outpat =~ s/(\%[^a-zA-Z\_\%]*)([a-zA-Z\_])/$1s/g;
58:
59:
60: my $qrtime = qr{(\d+)\/(\w+)\/(\d+)\:(\d+)\:(\d+)\:(\d+)\s};
61: my $qruri = qr{(\w+)\s([^\?]*)\??([^\s]*)?\s(.*)};
62: my $qrsession = qr{(\w*)\-(\w*)};
63: my $qrref = qr{.*\:\/\/([^\/]+)(\/.*)};
64:
65:
66: while(<STDIN>) {
67: my @vals;
68: my %info;
69: if (/$format/) {
70: for ($x = 0; $x <= $#{$access}; $x++) {
71: $info{$access->[$x]} = $$x;
72: if ($access->[$x] eq "r") {
73: $info{$access->[$x]} =~ /$qruri/;
74: $info{'a'} = $1;
75: $info{'f'} = $2;
76: $info{'q'} = $3;
77: $info{'p'} = $4;
78: }
79: if ($access->[$x] eq "t") {
80: $info{$access->[$x]} =~ /$qrtime/;
81: $info{'d'} = $1;
82: $info{'m'} = $2;
83: $info{'y'} = $3;
84: $info{'H'} = $4;
85: $info{'M'} = $5;
86: $info{'S'} = $6;
87: }
88: if ($access->[$x] eq "u") {
89: $info{$access->[$x]} =~ /$qrsession/;
90: $info{'s'} = $1;
91: $info{'i'} = $2;
92: }
93: if ($access->[$x] eq "R") {
94: $info{$access->[$x]} =~ /$qrref/;
95: $info{'o'} = $1;
96: $info{'F'} = $2;
97: }
98: }
99:
100: foreach my $pat (@pats) {
101: push(@vals, $info{$pat}) if exists($info{$pat});
102: }
103:
104: printf $outpat, @vals;
105: print "\n";
106: }
107: }
108:
109: sub usage {
110:
111: print qq{
112: usage: logparse [-t=<type>] -p=<pattern>
113:
114: example: tail -50 access_log | logparse -t=extended -p="%H:%M %-15o %f"
115:
116: Formatting characters:
117:
118: _ - The name of this filter script
119: v - The virtual host name/address
120: h - The host IP name/address
121: l - The remote logname
122: u - Remote User/Session
123: t - The time of the request
124: r - The full request
125: c - The HTTP code (302, 200, etc)
126: b - Bytes
127: R - Referrer string
128: A - User Agent string
129: P - Process ID
130: T - Time taken in seconds
131:
132: Request string breakdown:
133:
134: a - Action/Method (GET, POST, etc)
135: f - File path
136: q - Query string
137: p - HTTP protocol version
138:
139: Time of request breakdown:
140:
141: d - Day of the month
142: m - Month (Apr, May, etc)
143: y - Year
144: H - Hour
145: M - Minute
146: S - Second
147:
148: User Session breakdown:
149:
150: s - Session ID
151: i - User ID
152:
153: Referrer string breakdown:
154:
155: o - Host of referrer
156: F - File path of referrer
157:
158: };
159:
160: exit(0);
161:
162: }
In reply to Multi-format Log Parser by cjensen
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |