Category: Web and/or Networking
Author/Contact Info Brigitte Jellinek, bjelli@horus.at
Description:

Find out whats really going on between the browser and the webserver, and manipulate it. Needs an exisiting http proxy to do the actual work.

The script creates a directory to hold all the logfile it outputs: One main logfile, and one file per connection. The connection-files contain the request and the response.

This script is a direct descendant of my first ever perl script (written in 1993?) Today I'd write the socket stuff with IO::Socket instead, but I'm too lazy to change it now.

#!/usr/bin/perl -w
#
# logging proxy for http                 by bjelli@horus.at
#
# Usage: loggingproxy port dir
#
#    needs an exisiting http proxy to do the actual work
#
#    will create dir with one file per connection handled,
#    and one main file.  
#    
#    can't do Keep-Alive.  HTTP-headers for Keep-Alive
#    are removed
# ------------------------------------------------------------------

use strict;

my $UPSTREAM_PROXY = "where.your.proxy.is.at";

# ------------------------------------------------------------------

@ARGV == 2 
  or die("Usage: $0 port directory\nLogging Proxy for HTTP\n");
my ($port, $logdir) = @ARGV;

# ------------------------------------------------------------------
# strange stuff needed for old style sockets used by many subs
my $sockaddr = 'S n a4 x8';    
my $AF_INET = 2;        
my $SOCK_STREAM = 1;        
my ($name, $aliases, $proto) = getprotobyname('tcp');

initlog();    # creates global file handle LOG;
initsocket();    # creates global socket S
my ($proxyserv) = (gethostbyname($UPSTREAM_PROXY))[4];

# ------------------------------------------------------------------
# Main Loop: listen on S, fork a child for each connection.
#         connection has its own socket NS
# ------------------------------------------------------------------
for (my $con = 1; ; $con++) {
    my($addr,$af,$port,$inetaddr,@inetaddr,$child);

    print LOG "--Listening for connection no $con ....\n";
    ($addr = accept(NS, S)) || die $!;
    ($af, $port, $inetaddr) = unpack($sockaddr, $addr);
    @inetaddr = unpack("C4", $inetaddr);

    if (($child = fork()) == 0) {
        handleconnection($con);
        close(NS);
        exit;
    } else {
        print LOG "ok, connection (from $af, $port, ", 
            join(".", @inetaddr), 
            ") will be logged to conn.$con.log\n";
    }

}

# ------------------------------------------------------------------
# Set up the directory,
# set up the main logfile - LOG will be used globally
# ------------------------------------------------------------------
sub initlog {

    -d $logdir or mkdir($logdir,0777);

    die ("can't write to directory $logdir\n")
        unless (-e $logdir and -d $logdir and -w $logdir);


    open(LOG, ">$logdir/main.log") 
        || die "not logging to '$logdir/main.log' because of $!\n";

    select(LOG); $| = 1; select(STDOUT);

    print LOG "-" x 80;
    print LOG "\n--logserver on port $port started at ";
    my $date = `date`;
    print LOG "$date";
}

# ------------------------------------------------------------------
# set up the main socket S to listen on the port specified 
# ------------------------------------------------------------------
sub initsocket {

    if ($port !~ /^\d+$/) {
        ($name, $aliases, $port) = getservicebyport($port, 'tcp');
    }

    print "Port = $port\n";

    my $this = pack ($sockaddr, $AF_INET, $port, "\0\0\0\0");

    select(NS); $| = 1; select(STDOUT);

    socket(S, $AF_INET, $SOCK_STREAM, $proto)     
         || die " socket: $!";
    bind(S, $this)                     
         || die "   bind: $!";
    listen(S,5)                    
         || die "connect: $!";

    select(S); $| =1; select(STDOUT);
}

# ------------------------------------------------------------------
# get one page through the upstream proxy
# ------------------------------------------------------------------
sub getme {
    my($query,$con) = @_;
    my($response);


    my($this)  =pack ($sockaddr, $AF_INET, 20000+$con, "\0\0\0\0");
    my($server)=pack($sockaddr,$AF_INET,8080,$proxyserv);


    local(*P);
    socket(P, $AF_INET, $SOCK_STREAM, $proto) 
        || die "socket($AF_INET, $SOCK_STREAM, $proto): $!";
    bind(P,$this) 
        || die "bind: $!";
    connect(P,$server) 
        || die "connect: $!";
    select(P);  $| = 1; select(STDOUT);

    print P $query;

    while(<P>) {
        $response .= $_;
        print LOG;
    }

    close P;

    $response;
}


# ------------------------------------------------------------------
# handle one connection
# ------------------------------------------------------------------
sub handleconnection {
    my($con) = @_;

    local(*LOG);
    open(LOG, ">$logdir/conn.$con.log");

    my($query);
    # first gather up all the Headers from the request 
    # into $query
    while(<NS>) {
        if (/Keep-Alive/) {
            # print LOG "# $_";
        } else {
            # print LOG "$_";
            $query .= $_ unless /Keep-Alive/;
        }
        last if /^\s*$/;
    }

    # to handle POST: read even more data from the request
    # and add to $query
    if ($query=~/POST/ and $query=~/Content-Length:\s*(\d+)/i) {
        my($postdata);
        read (NS, $postdata, $1);
        print LOG $postdata;
        $query .= $postdata;
    }

    # This is the place where you can mess with the query.
    # e.g. pretend you were referred from somewhere:
    # $query =~ s(\n)(\nReferer: http://www.binet.is/\n);

    print LOG $query;        # log the request
    my $response = getme($query,$con); 
    print NS $response;         # and send it back

}
Replies are listed 'Best First'.
Re: loggingproxy - HTTP Proxy for logging and messing with HTTP
by strredwolf (Chaplain) on Mar 08, 2001 at 12:41 UTC
    A similar one is here of my own devizing. Check it out, all it needs is netpipes.

    --
    $Stalag99{"URL"}="http://stalag99.keenspace.com";