#!/usr/bin/perl -wC0 use strict; use Encode; use Tie::File; use PerlIO::gzip; our $LOGDIR = "/var/log/apache-perl"; our $REPORT = "/var/www/analog/analog.html"; # 1. read input and normalize it. # 2. pipe it off to analog # 3. when analog finishes, fixup the silly encoding. # 2 has to actually start before 1 :) my $pid = open ANALOG, "| analog -" or die "can't start analog: $!"; $SIG{PIPE} = sub { die "sigpipe" }; # read logs foreach my $file (glob $LOGDIR . "/access.log*") { #print "Preprocessing $file\n"; # gzip autopop (see PerlIO::gzip) transparently handles gz or non-gz input open my $log, "<:gzip(autopop)", $file or die "can't read log $file: $!"; while(<$log>) { # normalize URI-escaped and variously encoded data to CP1255. s/(?:%|\\x)([0-9A-Fa-f]{2})/chr(hex($1))/eg; eval { $_ = Encode::decode('utf-8', $_, 1); 1 } or $_ = Encode::decode('cp1255', $_); $_ = Encode::encode('cp1255', $_); print ANALOG or die "analog: $!"; } } # let analog do its stuff close ANALOG or die "analog: $!"; waitpid $pid, 0; # let it finish # now 3 - fixup the silly encoding. tie my @report, 'Tie::File', $REPORT or die "Tie::File: $!"; FIXUP: foreach (@report) { # take care not to change the length of the line, so that the editing # may be done in-place. s{charset=ISO-8859-1"} {charset=CP-1255" } && last FIXUP; }