############################################################ package Apache::Proxy::MyCookies; ############################################################ ############################################################ # This is the Apache's Mod_Perl Rewriting Proxy MyProxy # It's a lightweight proxy for use were other proxies # are not appliable or desirable. # Copyright (C) 2000 # Monsieur Champs <champs@users.sourceforge.net> # This program is free software; you can redistribute # it and/or modify it under the terms of the GNU # General Public License as published by the Free # Software Foundation; either version 2 of the # License, or (at your option) any later version. # This program is distributed in the hope that it # will be useful, but WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # You should have received a copy of the GNU General # Public License along with this program; if not, # write to the Free Software Foundation, Inc., 59 # Temple Place - Suite 330, Boston, MA 02111-1307, USA. ############################################################ # This package have the function of provide some # function overrides for the HTTP::Cookies object, for # help me built a cookie system for the mod_perl # rewriting proxy MyProxy, my first open-source # project... ############################################################ # Tell me when i say bullshit... use strict; # get the FLOCK_* constants use Fcntl ':flock'; # My parent... use HTTP::Cookies; # Utilities that my parent uses... (oh, dear!) # This should not be here... use HTTP::Date qw(str2time time2str); use HTTP::Headers::Util qw(split_header_words join_header_words); # This will handle calls for methods that i'm not overridding... use vars qw/ @ISA /; @ISA = ( 'HTTP::Cookies' ); ############################################################ # new() # This sub overrides the HTTP::Cookies::new() sub just # for making the right object, and respect the oop # conventions that we use in perl. ############################################################ sub new{ my $type = shift; my $self = new HTTP::Cookies( @_ ); return bless $self, $type; } ############################################################ # save() # This sub overrides the HTTP::Cookies::save() sub, # for just add some more rigid file locking control. sub save{ my $self = shift; my $file = shift || $self->{'file'} || return; local(*FILE); open(FILE, ">$file") or die "Can't open $file: $!"; flock FILE, LOCK_EX; print FILE "#LWP-Cookies-1.0\n"; print FILE $self->as_string(!$self->{ignore_discard}); flock FILE, LOCK_UN; close(FILE); } ############################################################ # load() # This sub overrides the HTTP::Cookies::load() sub # just for add some rigid file locking control... ############################################################ sub load{ my $self = shift; my $file = shift || $self->{'file'} || return; local(*FILE, $_); local $/ = "\n"; # make sure we got standard record separator open(FILE, $file) or return; flock FILE, LOCK_SH; my $magic = <FILE>; unless ($magic =~ /^\#LWP-Cookies-(\d+\.\d+)/) { warn "$file does not seem to contain cookies"; return; } while (<FILE>) { next unless s/^Set-Cookie3:\s*//; chomp; my $cookie; for $cookie (split_header_words($_)) { my($key,$val) = splice(@$cookie, 0, 2); my %hash; while (@$cookie) { my $k = shift @$cookie; my $v = shift @$cookie; $hash{$k} = $v; } my $version = delete $hash{version}; my $path = delete $hash{path}; my $domain = delete $hash{domain}; my $port = delete $hash{port}; my $expires = str2time(delete $hash{expires}); my $path_spec = exists $hash{path_spec}; delete $hash{path_spec}; my $secure = exists $hash{secure}; delete $hash{secure}; my $discard = exists $hash{discard}; delete $hash{discard}; my @array = ( $version, $val, $port, $path_spec, $secure, $expires, $discard ); push(@array, \%hash) if %hash; $self->{COOKIES}{$domain}{$path}{$key} = \@array; } } flock FILE, LOCK_UN; close(FILE); } 1; ############################################################ package Apache::Proxy::MyProxy; ############################################################ ############################################################ # This is the Apache's Mod_Perl Rewriting Proxy MyProxy # It's a lightweight proxy for use were other proxies # are not appliable or desirable. # Copyright (C) 2000 # Monsieur Champs <champs@users.sourceforge.net> # This program is free software; you can redistribute # it and/or modify it under the terms of the GNU # General Public License as published by the Free # Software Foundation; either version 2 of the # License, or (at your option) any later version. # This program is distributed in the hope that it will # be useful, but WITHOUT ANY WARRANTY; without even # the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # You should have received a copy of the GNU General # Public License along with this program; if not, # write to the Free Software Foundation, Inc., 59 # Temple Place - Suite 330, Boston, MA 02111-1307, USA. ############################################################ # The rewriter's core is here... use lib '/home/champs/src'; # A URL base do MyProxy: use constant MY_PROXY_BASE_URL => 'http://proxy.example.com/MyProxy/'; use constant CACHE_ROOT => '/var/cache/myproxy/'; use constant COOKIE => 'MyProxyCookieJar'; # This is for mod_perl... use Apache (); use Apache::Constants qw/ :common /; use Apache::Proxy::URLRewriter; # This is for the use agent... use HTTP::Request; use HTTP::Response; use HTTP::Headers; use LWP::UserAgent; # Prevents calls to rflush() at every print... # Defensive programming added by Champs(01.09.2000) local $|=0; ############################################################ # Sub Handler manipulates the Apache's request object ($R) # to figure out a new web page to the user... # By Champs (champs@tbn.com.br), 15.08.2000. ############################################################ sub handler{ # Retrieve the request object... my $r = shift; # Creates a new User Agent for this request... my $ua = new LWP::UserAgent(); # Gets the page from the internet. my $resp = &retrievePage( $r, $ua ); # Stuff standard headers into the response... &processHeaders( $resp, $r ); # Send headers back to the client... $r->send_http_header(); # Rewrite and send the resulting page back to the client. $r->print( $resp->content()? new Apache::Proxy::URLRewriter( $resp, MY_PROXY_BASE_URL ) : $resp->error_as_HTML ); # this will make Apache Happy... return OK; } ############################################################ # processHeaders # This sub makes the dirty job of processing and setting # up the http headers into the current Apache's request # object. ############################################################ sub processHeaders($$){ my( $resp, $r ) = ( shift, shift ); my( $key, $value ); foreach ( split( /\n/, $resp->headers()->as_string() ) ) { ( $key, $value ) = /([^:]+):(.*)/io; if ( $key =~ /Content-Type/io ) { $r->content_type( $value ); } elsif ( $key =~ /no-cache/io ) { $r->no_cache(1); }elsif( $key =~ /Cookie/io ){ next; # Cookies are processed separatelly... } else { $r->header_out( $key => $value ); } } } ############################################################ # retrievePage( Apache::Request ) # This sub retrieves the requested page, and returns a # HTTP::Response Object for the result. ############################################################ sub retrievePage($$){ # this is for help debug... use Apache::Log; # Process cookies... use Apache::Proxy::MyCookies; # Fetch the Apache::Request object... my $r = shift; # recover the user agent my $ua = shift; # create a new log object... my $log = $r->log; # Get the Headers from the request... my %headers = $r->headers_in(); # Mimics the client's web browser... $ua->agent( $headers{'User-Agent'} ); # Build a Headers Object... my $header = new HTTP::Headers( %headers ); # TODO: Melhorar isso aqui, para que os manehs possam # instalar em qualquer lugar... # Select the path_info from the Apache's request... my $uri = $r->uri(); $uri =~ s!.*/MyProxy/!!o; # remove the slash from the beginning of the path_info information.. +. $uri =~ s|^/+||iog; # Build a cookie... my $cookie = new Apache::Proxy::MyCookies( file => CACHE_ROOT.$r->ge +t_remote_host() ); # Load cookie if exists a file with the cookie... $cookie->load() if -e CACHE_ROOT.$r->get_remote_host(); # Now, make a Request object... my $req = new HTTP::Request ( $r->method(), $uri, $header ); # Put the cookie into the request... $cookie->add_cookie_header( $req ); my @cookies = $req->headers->header( 'Cookie' ); # If there is any aditional contents on the Apache::request object, # put it into the request. Hope that works... { my $content; if( $content = $r->content ){ $req->content( $content ); } } # request the page, gets response... my $resp = $ua->request( $req ); # extract the response from it... $cookie->extract_cookies( $resp ); # Save the cookie back to disk... $cookie->save(); # ...And send it to the remote web server. Hope that he understand.. +. return $resp; } ############################################################ 1; ############################################################ package Apache::Proxy::URLRewriter; ############################################################ # This is the Apache's Mod_Perl Rewriting Proxy MyProxy # It's a lightweight proxy for use were other proxies # are not appliable or desirable. # Copyright (C) 2000 # Monsieur Champs <champs@users.sourceforge.net> # This program is free software; you can redistribute # it and/or modify it under the terms of the GNU # General Public License as published by the Free # Software Foundation; either version 2 of the # License, or (at your option) any later version. # This program is distributed in the hope that it # will be useful, but WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # You should have received a copy of the GNU General # Public License along with this program; if not, # write to the Free Software Foundation, Inc., 59 # Temple Place - Suite 330, Boston, MA 02111-1307, USA. ############################################################ # This package has the function of rewrite all the # hyper links in a given web page, making them absolute. # It also will rewrite the links, prefixing it with a # user-defined string (possibly a call to another url), # and rebuild the entire page, returning it as a scalar. ############################################################ use strict; ############################################################ # NEW # makes the dirty job. # Receives as parameters (in that order): # . a HTTP::Response object, with the page to rewrite. # . a URI object to prefix the page's hyperlinks with, or # undef() to no prefix at all. # It returns the new page as a scalar. ############################################################ sub new($$$){ use HTTP::Response; use URI; use HTML::TokeParser; # Retrieve parameters. my( $class, $response, $prefix ) = ( shift, shift, shift ); # Get the content... my $content = $response->content; # Make a new HTML::TokeParser my $parser = new HTML::TokeParser( \$content ); # This is for holding the page 'till we can send it away... my $web_page = ""; # Run througt the web page, making changes... while( my $token_ref = $parser->get_token() ){ next unless my @tokens = @{$token_ref}; if( $tokens[0] =~ /S/ ){ $web_page .= isLink( $tokens[2] ) ? &rebuild_tag( $response, $prefix, @tokens ) : $tokens[4]; } elsif( $tokens[0] =~ /E/ ){ $web_page .= $tokens[2]; } elsif( $tokens[0] =~ /T/ ){ $web_page .= $tokens[1]; } elsif( $tokens[0] =~ /C/ ){ $web_page .= $tokens[1]; } elsif( $tokens[0] =~ /D/ ){ $web_page .= $tokens[1]; } elsif( $tokens[0] =~ /PI/){ $web_page .= $tokens[1]; } else{ warn "WARNING: Unknow token found at __FILE__, linha __LINE_ +_.\n"; } } return $web_page; } ############################################################ # ISLINK (PRIVATE) # This method makes a simple lookup to the hyperlink'able # html attributes. # It receives a parameter a hash reference # (with the attributes), or an array reference (with pairs # name=value as the atributes) or a plain array (with the # attributes in it). # This method returns "TRUE" in the case this attribute set # needs to be rewrited, and undef() otherwise. ############################################################ sub isLink{ my %attr = ref($_[0]) =~ /HASH/io ? %{$_[0]} : ref($_) =~ /ARRAY/ ? +@{$_[0]} : @_; return "TRUE" if( exists $attr{ href } or exists $attr{ src } or exists $attr{ base } or exists $attr{ action } ); return undef; } ############################################################ # ABSLINK (PRIVATE) # This method turns a (possibly) relative URL into an # absolute one, and returns it as a string. # This method uses as parameters the HTTP::Response object # that has brought the hyper link's web page and a plain text # URL to be made absolute. ############################################################ sub absLink{ use URI; my $response = shift; my $url = shift; return new_abs URI( $url, $response->base() )->canonical()->as_strin +g(); } ############################################################ # REBUILD_TAG (PRIVATE) # This method rebuilds the entire html tag, returning it # to the caller, as a plain text string. # It takes as parameters the HTTP::Response object that # has owns the tag (to make the tag absolute), # the desired prefix (if any, or undef otherwise) and an # array holding the tag's tokens, as parsed by the # HTML::TokeParser object. ############################################################ sub rebuild_tag{ my ( $response, $prefix, @tokens ) = @_; return undef unless $response and @tokens; my $attrStr = ""; my %attrs = %{$tokens[2]}; while( my ( $key, $val ) = each %attrs ){ if( $key =~ /href|base|src|action/io ){ $attrStr .= $prefix ? "$key=\"$prefix".&absLink( $response, $val ).'" ': "$key=\"".&absLink( $response, $val ).'" '; }else{ $attrStr .= "$key=\"$val\" "; } } $attrStr =~ s/\ $//og; return "<$tokens[1] $attrStr>"; } ############################################################ 1;
In reply to Apache Mod_Perl Rewritting Proxy by monsieur_champs
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |