| Category: | Internet / World Wide Web / Servers / Proxy |
| Author/Contact Info | Monsieur Champs champs@users.sourceforge.net |
| Description: | This is an old (quite old) project of a Rewriting Proxy for the Mod_Perl Apache Extension. I wrote it initialli by myself, and for the second try I had a BIG help from Darren Chamberlain, my old perl mentor. (Thanks Darren!) The principle is very simple: this extension acts as a CGI, accepting requests from clients inside an isolated network, using a dedicated route (maybe a ADSL connection) to access the internet, grab the requested page, rewrite it so all the links points back to the Rewriting Proxy Server and send it to the client. I also tried to correct some badly writen html code and close tags that Micro$hit uses left open. This is the (almost) very first ALPHA version, and I no more have an Apache / Mod_Perl enabled server to continue my development... (this makes me sad...) |
############################################################
package Apache::Proxy::MyCookies;
############################################################
############################################################
# This is the Apache's Mod_Perl Rewriting Proxy MyProxy
# It's a lightweight proxy for use were other proxies
# are not appliable or desirable.
# Copyright (C) 2000
# Monsieur Champs <champs@users.sourceforge.net>
# This program is free software; you can redistribute
# it and/or modify it under the terms of the GNU
# General Public License as published by the Free
# Software Foundation; either version 2 of the
# License, or (at your option) any later version.
# This program is distributed in the hope that it
# will be useful, but WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# You should have received a copy of the GNU General
# Public License along with this program; if not,
# write to the Free Software Foundation, Inc., 59
# Temple Place - Suite 330, Boston, MA 02111-1307, USA.
############################################################
# This package have the function of provide some
# function overrides for the HTTP::Cookies object, for
# help me built a cookie system for the mod_perl
# rewriting proxy MyProxy, my first open-source
# project...
############################################################
# Tell me when i say bullshit...
use strict;
# get the FLOCK_* constants
use Fcntl ':flock';
# My parent...
use HTTP::Cookies;
# Utilities that my parent uses... (oh, dear!)
# This should not be here...
use HTTP::Date qw(str2time time2str);
use HTTP::Headers::Util qw(split_header_words join_header_words);
# This will handle calls for methods that i'm not overridding...
use vars qw/ @ISA /;
@ISA = ( 'HTTP::Cookies' );
############################################################
# new()
# This sub overrides the HTTP::Cookies::new() sub just
# for making the right object, and respect the oop
# conventions that we use in perl.
############################################################
sub new{
my $type = shift;
my $self = new HTTP::Cookies( @_ );
return bless $self, $type;
}
############################################################
# save()
# This sub overrides the HTTP::Cookies::save() sub,
# for just add some more rigid file locking control.
sub save{
my $self = shift;
my $file = shift || $self->{'file'} || return;
local(*FILE);
open(FILE, ">$file") or die "Can't open $file: $!";
flock FILE, LOCK_EX;
print FILE "#LWP-Cookies-1.0\n";
print FILE $self->as_string(!$self->{ignore_discard});
flock FILE, LOCK_UN;
close(FILE);
}
############################################################
# load()
# This sub overrides the HTTP::Cookies::load() sub
# just for add some rigid file locking control...
############################################################
sub load{
my $self = shift;
my $file = shift || $self->{'file'} || return;
local(*FILE, $_);
local $/ = "\n"; # make sure we got standard record separator
open(FILE, $file) or return;
flock FILE, LOCK_SH;
my $magic = <FILE>;
unless ($magic =~ /^\#LWP-Cookies-(\d+\.\d+)/) {
warn "$file does not seem to contain cookies";
return;
}
while (<FILE>) {
next unless s/^Set-Cookie3:\s*//;
chomp;
my $cookie;
for $cookie (split_header_words($_)) {
my($key,$val) = splice(@$cookie, 0, 2);
my %hash;
while (@$cookie) {
my $k = shift @$cookie;
my $v = shift @$cookie;
$hash{$k} = $v;
}
my $version = delete $hash{version};
my $path = delete $hash{path};
my $domain = delete $hash{domain};
my $port = delete $hash{port};
my $expires = str2time(delete $hash{expires});
my $path_spec = exists $hash{path_spec};
delete $hash{path_spec};
my $secure = exists $hash{secure};
delete $hash{secure};
my $discard = exists $hash{discard};
delete $hash{discard};
my @array =
( $version, $val, $port, $path_spec,
$secure, $expires, $discard );
push(@array, \%hash) if %hash;
$self->{COOKIES}{$domain}{$path}{$key} = \@array;
}
}
flock FILE, LOCK_UN;
close(FILE);
}
1;
############################################################
package Apache::Proxy::MyProxy;
############################################################
############################################################
# This is the Apache's Mod_Perl Rewriting Proxy MyProxy
# It's a lightweight proxy for use were other proxies
# are not appliable or desirable.
# Copyright (C) 2000
# Monsieur Champs <champs@users.sourceforge.net>
# This program is free software; you can redistribute
# it and/or modify it under the terms of the GNU
# General Public License as published by the Free
# Software Foundation; either version 2 of the
# License, or (at your option) any later version.
# This program is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even
# the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
# You should have received a copy of the GNU General
# Public License along with this program; if not,
# write to the Free Software Foundation, Inc., 59
# Temple Place - Suite 330, Boston, MA 02111-1307, USA.
############################################################
# The rewriter's core is here...
use lib '/home/champs/src';
# A URL base do MyProxy:
use constant MY_PROXY_BASE_URL => 'http://proxy.example.com/MyProxy/';
use constant CACHE_ROOT => '/var/cache/myproxy/';
use constant COOKIE => 'MyProxyCookieJar';
# This is for mod_perl...
use Apache ();
use Apache::Constants qw/ :common /;
use Apache::Proxy::URLRewriter;
# This is for the use agent...
use HTTP::Request;
use HTTP::Response;
use HTTP::Headers;
use LWP::UserAgent;
# Prevents calls to rflush() at every print...
# Defensive programming added by Champs(01.09.2000)
local $|=0;
############################################################
# Sub Handler manipulates the Apache's request object ($R)
# to figure out a new web page to the user...
# By Champs (champs@tbn.com.br), 15.08.2000.
############################################################
sub handler{
# Retrieve the request object...
my $r = shift;
# Creates a new User Agent for this request...
my $ua = new LWP::UserAgent();
# Gets the page from the internet.
my $resp = &retrievePage( $r, $ua );
# Stuff standard headers into the response...
&processHeaders( $resp, $r );
# Send headers back to the client...
$r->send_http_header();
# Rewrite and send the resulting page back to the client.
$r->print(
$resp->content()?
new Apache::Proxy::URLRewriter( $resp, MY_PROXY_BASE_URL ) :
$resp->error_as_HTML
);
# this will make Apache Happy...
return OK;
}
############################################################
# processHeaders
# This sub makes the dirty job of processing and setting
# up the http headers into the current Apache's request
# object.
############################################################
sub processHeaders($$){
my( $resp, $r ) = ( shift, shift );
my( $key, $value );
foreach ( split( /\n/, $resp->headers()->as_string() ) ) {
( $key, $value ) = /([^:]+):(.*)/io;
if ( $key =~ /Content-Type/io ) {
$r->content_type( $value );
} elsif ( $key =~ /no-cache/io ) {
$r->no_cache(1);
}elsif( $key =~ /Cookie/io ){
next; # Cookies are processed separatelly...
} else {
$r->header_out( $key => $value );
}
}
}
############################################################
# retrievePage( Apache::Request )
# This sub retrieves the requested page, and returns a
# HTTP::Response Object for the result.
############################################################
sub retrievePage($$){
# this is for help debug...
use Apache::Log;
# Process cookies...
use Apache::Proxy::MyCookies;
# Fetch the Apache::Request object...
my $r = shift;
# recover the user agent
my $ua = shift;
# create a new log object...
my $log = $r->log;
# Get the Headers from the request...
my %headers = $r->headers_in();
# Mimics the client's web browser...
$ua->agent( $headers{'User-Agent'} );
# Build a Headers Object...
my $header = new HTTP::Headers( %headers );
# TODO: Melhorar isso aqui, para que os manehs possam
# instalar em qualquer lugar...
# Select the path_info from the Apache's request...
my $uri = $r->uri();
$uri =~ s!.*/MyProxy/!!o;
# remove the slash from the beginning of the path_info information..
+.
$uri =~ s|^/+||iog;
# Build a cookie...
my $cookie = new Apache::Proxy::MyCookies( file => CACHE_ROOT.$r->ge
+t_remote_host() );
# Load cookie if exists a file with the cookie...
$cookie->load() if -e CACHE_ROOT.$r->get_remote_host();
# Now, make a Request object...
my $req = new HTTP::Request ( $r->method(), $uri, $header );
# Put the cookie into the request...
$cookie->add_cookie_header( $req );
my @cookies = $req->headers->header( 'Cookie' );
# If there is any aditional contents on the Apache::request object,
# put it into the request. Hope that works...
{
my $content;
if( $content = $r->content ){
$req->content( $content );
}
}
# request the page, gets response...
my $resp = $ua->request( $req );
# extract the response from it...
$cookie->extract_cookies( $resp );
# Save the cookie back to disk...
$cookie->save();
# ...And send it to the remote web server. Hope that he understand..
+.
return $resp;
}
############################################################
1;
############################################################
package Apache::Proxy::URLRewriter;
############################################################
# This is the Apache's Mod_Perl Rewriting Proxy MyProxy
# It's a lightweight proxy for use were other proxies
# are not appliable or desirable.
# Copyright (C) 2000
# Monsieur Champs <champs@users.sourceforge.net>
# This program is free software; you can redistribute
# it and/or modify it under the terms of the GNU
# General Public License as published by the Free
# Software Foundation; either version 2 of the
# License, or (at your option) any later version.
# This program is distributed in the hope that it
# will be useful, but WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# You should have received a copy of the GNU General
# Public License along with this program; if not,
# write to the Free Software Foundation, Inc., 59
# Temple Place - Suite 330, Boston, MA 02111-1307, USA.
############################################################
# This package has the function of rewrite all the
# hyper links in a given web page, making them absolute.
# It also will rewrite the links, prefixing it with a
# user-defined string (possibly a call to another url),
# and rebuild the entire page, returning it as a scalar.
############################################################
use strict;
############################################################
# NEW
# makes the dirty job.
# Receives as parameters (in that order):
# . a HTTP::Response object, with the page to rewrite.
# . a URI object to prefix the page's hyperlinks with, or
# undef() to no prefix at all.
# It returns the new page as a scalar.
############################################################
sub new($$$){
use HTTP::Response;
use URI;
use HTML::TokeParser;
# Retrieve parameters.
my( $class, $response, $prefix ) = ( shift, shift, shift );
# Get the content...
my $content = $response->content;
# Make a new HTML::TokeParser
my $parser = new HTML::TokeParser( \$content );
# This is for holding the page 'till we can send it away...
my $web_page = "";
# Run througt the web page, making changes...
while( my $token_ref = $parser->get_token() ){
next unless my @tokens = @{$token_ref};
if( $tokens[0] =~ /S/ ){
$web_page .= isLink( $tokens[2] ) ?
&rebuild_tag( $response, $prefix, @tokens ) : $tokens[4];
}
elsif( $tokens[0] =~ /E/ ){ $web_page .= $tokens[2]; }
elsif( $tokens[0] =~ /T/ ){ $web_page .= $tokens[1]; }
elsif( $tokens[0] =~ /C/ ){ $web_page .= $tokens[1]; }
elsif( $tokens[0] =~ /D/ ){ $web_page .= $tokens[1]; }
elsif( $tokens[0] =~ /PI/){ $web_page .= $tokens[1]; }
else{ warn "WARNING: Unknow token found at __FILE__, linha __LINE_
+_.\n"; }
}
return $web_page;
}
############################################################
# ISLINK (PRIVATE)
# This method makes a simple lookup to the hyperlink'able
# html attributes.
# It receives a parameter a hash reference
# (with the attributes), or an array reference (with pairs
# name=value as the atributes) or a plain array (with the
# attributes in it).
# This method returns "TRUE" in the case this attribute set
# needs to be rewrited, and undef() otherwise.
############################################################
sub isLink{
my %attr = ref($_[0]) =~ /HASH/io ? %{$_[0]} : ref($_) =~ /ARRAY/ ?
+@{$_[0]} : @_;
return "TRUE"
if(
exists $attr{ href } or
exists $attr{ src } or
exists $attr{ base } or
exists $attr{ action }
);
return undef;
}
############################################################
# ABSLINK (PRIVATE)
# This method turns a (possibly) relative URL into an
# absolute one, and returns it as a string.
# This method uses as parameters the HTTP::Response object
# that has brought the hyper link's web page and a plain text
# URL to be made absolute.
############################################################
sub absLink{
use URI;
my $response = shift;
my $url = shift;
return new_abs URI( $url, $response->base() )->canonical()->as_strin
+g();
}
############################################################
# REBUILD_TAG (PRIVATE)
# This method rebuilds the entire html tag, returning it
# to the caller, as a plain text string.
# It takes as parameters the HTTP::Response object that
# has owns the tag (to make the tag absolute),
# the desired prefix (if any, or undef otherwise) and an
# array holding the tag's tokens, as parsed by the
# HTML::TokeParser object.
############################################################
sub rebuild_tag{
my ( $response, $prefix, @tokens ) = @_;
return undef unless $response and @tokens;
my $attrStr = "";
my %attrs = %{$tokens[2]};
while( my ( $key, $val ) = each %attrs ){
if( $key =~ /href|base|src|action/io ){
$attrStr .= $prefix ?
"$key=\"$prefix".&absLink( $response, $val ).'" ':
"$key=\"".&absLink( $response, $val ).'" ';
}else{
$attrStr .= "$key=\"$val\" ";
}
}
$attrStr =~ s/\ $//og;
return "<$tokens[1] $attrStr>";
}
############################################################
1;
|
|
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Apache Mod_Perl Rewritting Proxy
by monsieur_champs (Curate) on May 31, 2003 at 19:05 UTC |