in reply to Re: Working with Access Logs
in thread Working with Access Logs
#!/usr/bin/perl use strict; use Data::Dumper; use Date::Calc ; use Number::Format; use Benchmark; my $start_time = new Benchmark; my $filename="D:/error_log1.txt"; my $completefilename= "D:/complete_details.xls"; my $accessfilename="D:/access_log.txt"; my $controlfilename="D:/control.txt"; open(CN, $controlfilename) || die "Can't open control file $con +trolfilename: $!\n"; open(CHO,">> $completefilename") || die "Can't open xls output file + $completefilename: $! \n"; open(EL,"$filename") || die "Can't open error log file $fil +ename: $! \n"; open(FH, $accessfilename) || die "Can't open access log file $a +ccessfilename: $!\n"; my $ctr=0; my %errorstrings=(); my %errorline=(); my %errortype=(); my %errordates=(); my %accesstomatch=(); my %fromurl=(); my %browservalue=(); my $accessdatetime; my ($dweek,$month,$day,$time,$year); my %exceptignore=(); # all lines that are not explicitly ignored my %alerterrors=(); # only lines that we think have been fixed my %distincterrors=(); my %accessstrings=(); my %acesslines=(); my @distinctaccess; my $index=0; ## use control file to populate pattern_* hash tables # # <command>|<fixdate>|<pattern_matchURL>|<pattern_matchFROM>|<pattern_ +matchBSR>|<pattern_comment> # # if a pattern matches an alert line but the error occured before the +date specified # then it should be treated as if it had mactehd an ignore line my %pattern_command = () ; # ignore / alert ## pattern_matchURL # string within error log [index for the +other hashes] my %pattern_matchFROM = () ; # string in access log (origniating URL) my %pattern_matchBSR = () ; # string in access log (calling browser) my %pattern_date = () ; # date fix applied my %pattern_comment = () ; # description of fix and or failure reaso +n while (<CN>) { chomp; my @temp = split /\|/, $_; die unless defined $temp[2]; $pattern_command{$temp[2]} = $temp[0]; $pattern_matchFROM{$temp[2]} = defined $temp[3] ? $temp[3] : ".*"; $pattern_matchBSR{$temp[2]} = defined $temp[4] ? $temp[4] : ".*"; $pattern_date{$temp[2]} = $temp[1]; $pattern_comment{$temp[2]} = defined $temp[5] ? $temp[5] : ".*"; } close CN; ####### Read the entire error_log into a group of arrays # my @errorlog = (); # the entry from the error log ... # the following hashes are indexed by this my %match_line = (); # the main part of the error entry my %match_string = (); # the string to look for in the access l +og my %match_datetime = (); # the time to match in the access log my %match_fromurl= (); # origintaing URL from access log my %match_browser=(); # calling browser from access log my %match_comment=(); # comment from control file my %match_count=(); # count from duplicates my %match_dupstr=(); # string to use to determine duplicates my $startnext=0; my $nexterrortime=""; my $movenext=0; my $accessctr=0; my %months=( Jan => '01', Feb => '02', Mar => '03', Apr => '04', May => '05', Jun => '06', Jul => '07', Aug => '08', Sep => '09', Oct => '10', Nov => '11', Dec => '12' ); while (<EL>) { chomp; push @errorlog, $_; /(\d\d:\d\d:\d\d)(.*?\])\s(\[.*?\])\s(\[.*?\])\s(.+)$/; $match_datetime{$_} = $1; my $mstring = $match_line{$_} = $5; $match_string{$_} = $mstring; } close EL; # match up the source URL and browser from the access log # # contentval[1] is file accessed # contentval[4] is original URL # contentval[7] is browser id JTO:while (<FH>) { $accessctr++; chomp; my $acessslogs=$_; my $accesstime= ""; my @contentval=m/(\")(.*?)(\")/og; m/\[(\d\d)\/(.+)\/(\d\d\d\d):(\d\d:\d\d:\d\d)\s/; my $accessdatetime="$3:$months{$2}:$1:$4"; my $getdifference=Get_Difference($accessdatetime,$nexterrortime) i +f ($movenext); next if($getdifference); $accesstime= $1 if m/(\d\d:\d\d:\d\d)\s/; for(my $j=$startnext;$j<=$#errorlog;$j++){ $movenext=0; $_=$errorlog[$j]; next if defined $match_fromurl{$_}; next unless $contentval[1] =~ m:$match_string{$_}\s:; if ($match_datetime{$_} eq $accesstime) { $match_fromurl{$_} = $contentval[4]; $match_browser{$_} = $contentval[7]; $startnext=$j; if($j<=$#errorlog){ $startnext++; $_=$errorlog[$startnext]; my @datevalues=m/(\[)(.*?)(\])/g; my ($dweek,$month,$day,$time,$year)=split(" ",$dateval +ues[1]); $nexterrortime="$year:$months{$month}:$day:$time"; $movenext=1; goto JTO; } ##!!! come out of the inner loop now } else { $match_fromurl{$_} = $contentval[4]; $match_browser{$_} = $contentval[7]; my @datevalues=m/(\[)(.*?)(\])/g; my ($dweek,$month,$day,$time,$year)=split(" ",$datevalues[1]); my $currenterrortime="$year:$months{$month}:$day:$time"; my $milddifference=Get_Difference($currenterrortime,$accessdat +etime,"1"); if($milddifference==1 || $milddifference==2 && $j<=$#errorlog) +{ $startnext=$j; $startnext++; $_=$errorlog[$startnext]; my @datevalues=m/(\[)(.*?)(\])/g; my ($dweek,$month,$day,$time,$year)=split(" ",$datevalues[ +1]); $nexterrortime="$year:$months{$month}:$day:$time"; $movenext=1; goto JTO; } ##!!! decide how to match where the times are not within the same seco +nd } } } ####### # create reduced list which supplements errorlog and contains # the unique error messages (unique means that the time is ignored # but the originating URL and browser are differentiated) # my %duplicates = (); my %reduced_log = (); foreach (@errorlog) { my $dupstr = $match_line{$_} . $match_fromurl{$_} . $match_browser +{$_}; $duplicates{$dupstr} = $_ unless defined $duplicates{$dupstr}; $reduced_log{$duplicates{$dupstr}}++; } ####### # at this point the hash tables match_* contain the information # that needs to be compared with the pattern_* hashes # derived from the control file. # # @errorlog contains pointers to all the messages # %reduced_log contains pointers to unique messages # %alerterros will contain pointers to only the alert messages # % exceptignore will conatin pointers to non-ignored messages ## to do -1- also match the originating URL and the browser ## -2- when a command of "alert" is found, process the date ## to decide whether to ignore or alert foreach (keys %reduced_log) { my $errstring = $_; my @datevalues=m/(\[)(.*?)(\])/g; my ($dweek,$month,$day,$time,$year)=split(" ",$datevalues[1]); my $command = ""; my $comment = ""; my $currenterrortime="$year:$months{$month}:$day:0:0:0"; my $difference=0; foreach ( keys %pattern_command ) { next if $command; my ($fday,$fmonth,$fyear)=split("/",$pattern_date{$_}); my $fixed_date="$fyear:$fmonth:$fday:0:0:0"; #Match from control file browser and from url with err and als +o #check if control file date is before or after to consider for + alert or ignore $command = $pattern_command{$_} if ( ($match_line{$errstring} +=~ m:^$_$:) && ($match_fromurl{$errstrin +g}=~ m:^$pattern_matchFROM{$_}$:) && ($match_browser{$errstrin +g}=~ m:^$pattern_matchBSR{$_}$:) ); $comment = $pattern_comment{$_} if $command; $difference=Get_Difference($fixed_date,$currenterrortime) if $ +command =~ /alert/; } $command = "unset" unless $command; $match_comment{$errstring} = $comment; $alerterrors{$errstring} ++ if ($command =~ /alert/ && $difference +==1); next if $command =~ /ignore/; $exceptignore{$errstring} ++; } ###### write the various separations print CHO "\n ***********************START OF ALL ERRORS ************* +********\n"; foreach (sort {$match_line{$a} cmp $match_line{$b} } keys %reduced +_log) { print CHO "$match_datetime{$_} \t $match_line{$_} \t $reduced_log{ +$_}" . " \t $match_fromurl{$_} \t $match_browser{$_} \t $match_comment{ +$_}\n"; } print CHO "\n ***********************START OF ALERT ERRORS************ +*********\n"; foreach (sort {$match_line{$a} cmp $match_line{$b} } keys %alerter +rors) { print CHO "$match_datetime{$_} \t $match_line{$_} \t $reduced_log{ +$_}" . " \t $match_fromurl{$_} \t $match_browser{$_} \t $match_comment{ +$_}\n"; } print CHO "\n ***********************START OF ALL EXCEPT IGNORE ERRORS +*********************\n"; foreach (sort {$match_line{$a} cmp $match_line{$b} } keys %excepti +gnore) { print CHO "$match_datetime{$_} \t $match_line{$_} \t $reduced_log{ +$_}" . " \t $match_fromurl{$_} \t $match_browser{$_} \t $match_comment{ +$_}\n"; } if($@){ print "ERROR OCCURED BECUASE OF =". Dumper($@); } close(CHO); #BELOW Method gives the difference in dates #if $diff_sec equals one than only the mild difference in seconds alon +e is returned #Else it returns 1 if difference is greater than 1 or 0 otherwise. sub Get_Difference{ my ($startdate,$enddate,$diff_sec)=@_; my ($start_y,$start_m,$start_d,$start_h,$start_min,$start_sec)=spl +it(":",$startdate); my ($end_y,$end_m,$end_d,$end_h,$end_min,$end_sec)=split(":",$endd +ate); my ($Dd,$Dh,$Dm,$Ds); eval{ ($Dd,$Dh,$Dm,$Ds)= Date::Calc::Delta_DHMS( $start_y,$start_m,$start_d,$start_h,$s +tart_min,$start_sec, $end_y,$end_m,$end_d,$end_h,$end_min,$ +end_sec ); }; if($@){ } if($diff_sec){ if($Dd+$Dh+$Dm==0){ return $Ds; }else{ return 0; } }else{ return 1 if($Dd+$Dh+$Dm+$Ds >0); return 0 unless($Dd+$Dh+$Dm+$Ds >0); } } my $end_time = new Benchmark; my $timediff = timediff($end_time, $start_time); print "\n the code took:",timestr($timediff),"\n";
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^3: Working with Access Logs
by marto (Cardinal) on Mar 01, 2006 at 09:24 UTC |