#!/usr/bin/perl -T ###################################################################### +####### # w w w . l i t t l e f i s h . c a + # # + # # repo-proxy.pl + # # + # # + # # -------------------------------------------------------------------- +----- # # (C)2006 littlefish.ca, All Rights Reserved. + # # + # # This program is free software; you can redistribute it and/or + # # modify it under the terms of the GNU General Public License + # # as published by the Free Software Foundation; either version 2 + # # of the License, or (at your option) any later version. + # # + # # This program is distributed in the hope that it will be useful, + # # but WITHOUT ANY WARRANTY; without even the implied warranty of + # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # # GNU General Public License for more details. + # # + # # You should have received a copy of the GNU General Public License + # # along with this program; if not, write to the + # # Free Software Foundation, Inc. + # # 59 Temple Place - Suite 330 + # # Boston, MA 02111-1307, USA. + # # + # # -->A full copy of the GNU General Public License is included in LICE +NSE # # -------------------------------------------------------------------- +----- # # Need Support? Email us at support@littlefish.ca + # # -------------------------------------------------------------------- +----- # # Modification History: + # # 1.0.0: 03/18/2006 Initial Version + # ###################################################################### +####### # requires (core modules): # Fcntl; # Socket; my $ver = '0.01'; # Version 0.01 # # Repo-proxy runs (endlessly) as a mini webserver accepting connection +s on # port 1001. When requests are received from a package manager (Syna +ptic) # repo-proxy acts handles the file transfer between the repository so +urce # and the client. The requested file is saved in the process, so the + next # time a request comes for the same package, it can be dished from th +e local # copy instead of hitting the repositories again. # # The web server functionality is extremely limited!! Repo-proxy is n +ot # intended to be a full out webserver, just a simple file server for a + very # specific need. use strict; use warnings; use Fcntl qw(:DEFAULT :flock); use Socket; use POSIX; use constant DEBUG => 1; # Buffer size dynamically adjusted to match transfer speed use constant MAX_BUFSIZE => 32768; # 32K ; use constant MIN_BUFSIZE => 1024; # 1K ; # increments to size up buffer use constant INC_BUFSIZE => 2048; # 2K ; # number of bytes to read first to include the header use constant HTTP_MINHEADER => 500; # number of seconds to block on any activity use constant BLOCK_TIMEOUT => 2; # change this if you need to use a different port my $server_port = '10001'; # directory to store file copies my $repo_path = "/var/cache/repo-proxy"; if (! -e $repo_path) { warn "create repository directory $repo_path\n" if DEBUG; mkdir($repo_path) or die "Failed to create repository directory $r +epo_path: $!\n"; } # start the web server # open a socket to communicate through socket(SERVER, PF_INET, SOCK_STREAM, getprotobyname('tcp')) or die("Failed to create socket: $!"); # so we can restart our server quickly setsockopt(SERVER, SOL_SOCKET, SO_REUSEADDR, pack("l", 1)); # listen on all interfaces bind(SERVER, sockaddr_in($server_port, INADDR_ANY)) or die("Failed to bind to port $server_port: $!"); listen(SERVER, SOMAXCONN) or die("Failed to listen on port $server_port: $!"); # loop waiting for client connects while (my $client_address = accept(my $client, SERVER)) { # enable command buffering (autoflush) select((select($client), $| = 1)[0]); # set binary mode on output binmode $client; my ($port, $packed_ip) = sockaddr_in($client_address); my $dotted_quad = inet_ntoa($packed_ip); warn "accepted client: $dotted_quad\n" if DEBUG; # read the client request my $req_ref = get_request($client); if (!$req_ref) { close($client); next; } map {warn "request $_: $req_ref->{$_}\n"} sort keys %$req_ref if D +EBUG; # get the repository value my $source = $req_ref->{QUERY_STRING}; $source =~ s!^(\.*/+)+!!; warn "source: $source\n" if DEBUG; # strip the site from the URL and setup paths my $source_info = get_source_info($source, 'http', $repo_path); warn "repository_source: $source_info->{repository_source}\n" if D +EBUG; warn "distro: $source_info->{distro}\n" if DEBUG; # now proxy the request http_proxy($client, $source_info); close($client); } close(SERVER); exit; # get the http request from the client sub get_request { my $client = shift; my %request; # it really shouldn't take very long to receive a request # so don't kill the application waiting! my $result; eval { local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required alarm(BLOCK_TIMEOUT); # turn request values into a hash while (<$client>) { last if m/^\r\n$/; if (m!^get\s+(.+)\s+http/(.+)!i) { $request{REQUEST_METHOD} = 'GET'; $request{QUERY_STRING} = $1; $request{HTTP} = $2; next; } $request{uc($1)} = $2 if (m!^([^:]+):\s+(.+)!); } alarm 0; $result = 1; }; alarm 0; # prevent race condition return ($result ? \%request : 0); } # Source info can come from the URL, or an info file. # The info file is created when a repository reload is # requested. # The info file just makes it easier to parse requests # for RPMs without fretting over the path names. sub get_source_info { my ($base, $repository_type, $repo_path) = @_; my %info = ( source => $base, ); $base =~ s!^([^/]+)!! or die "Can't identify repository from sourc +e\n"; $info{repository_source} = $1; my $source_path = $info{source_path} = $base; $repo_path .= "/$1"; my $repo_info = "$repo_path/info_$repository_type.txt"; if (! -e $repo_path) { warn "create repository source directory $repo_path\n" if DEBU +G; mkdir($repo_path) or die "Failed to create repository source d +irectory $repo_path: $!\n"; } if (-e $repo_info) { # read the info file my ($fh, $msg) = openReadWithLock($repo_info); $fh or die "$msg\n"; while ($_ = <$fh>) { $info{$1} = $2 if m/^\s*([^#][^:]+):\s+(.+)/; } close($fh); $info{info_file} = $repo_info; } else { $base =~ s!^(.+)/base/!! or die "Can't identify distribution f +rom source\n"; my $distro = $info{pathbase} = $1; $distro =~ s!^.+/apt/!!; $info{distro} = $distro; # write a new info file my ($fh, $msg) = openWriteWithLock($repo_info); $fh or die "$msg\n"; map { print $fh "$_: $info{$_}\n" } qw(repository_source distr +o pathbase); close($fh); $info{info_file} = $repo_info; } # is this base information, or a package file my $pathbase = $info{pathbase}; $source_path =~ m!^\Q$pathbase\E/([^/]+)/(.+)! or die "Can't ident +ify base from source $pathbase\n"; # Don't check rpms for changes $info{skip_check} = $1 eq 'base' ? 0 : 1; my $repo_base = "$repo_path/$1"; $info{base_file} = "$repo_base/$2"; warn "base path $1 $2\n" if DEBUG; if (! -e $repo_base) { warn "create repository source base directory $repo_base\n" if + DEBUG; mkdir($repo_base) or die "Failed to create repository source b +ase directory $repo_base: $!\n"; } return \%info; } # retreive the requested file from the repository source, or a local c +opy sub http_proxy { my $client = shift; my $info = shift; my $force_reload = shift || 0; my $skip_check = shift || $info->{skip_check}; my $local_file = $info->{base_file}; $force_reload++ unless -e $local_file; # connect to http source if we're forcing a reload # don't connect to the http source if we're skipping the check if ($force_reload or !$skip_check) { my ($socket, $code, $tags_ref, $header_ref, $overflow_ref) = open_header_source($info->{repository_source}, $info->{sou +rce_path}); # check request code if ($code != 200) { warn "Unexpected status from source: $code\n" if DEBUG; # return local file if we've got it if (-e $local_file) { warn "return local copy $local_file\n" if DEBUG; close($socket); my ($local_fh, $code, $tags_ref, $header_ref, $overflo +w_ref) = open_header_local($local_file); return http_transfer_file_local($client, $local_fh, $t +ags_ref, $header_ref, $overflow_ref); } else { # pass on error to client return http_transfer_unknown($client, $socket, $tags_r +ef, $header_ref, $overflow_ref); } } if ($force_reload) { warn "get fresh copy from repository source\n" if DEBUG; # return the repository request return http_transfer_file_source($client, $socket, $tags_r +ef, $header_ref, $overflow_ref, $local_file); } # we already have this file. Maybe it needs updating? warn "found local copy $local_file\n" if DEBUG; my ($local_fh, $local_code, $local_tags_ref, $local_header_ref +, $local_overflow_ref) = open_header_local($local_file); # Etags are unique aren't they? if ($tags_ref->{ETag} eq $local_tags_ref->{ETag}) { warn "local and repository are the same (returning local c +opy)\n" if DEBUG; # repository file and local copy are the same close($socket); return http_transfer_file_local($client, $local_fh, $local +_tags_ref, $local_header_ref, $local_overflow_ref); } # new file in repository warn "new file in repository source\n" if DEBUG; # done with readonly, release the lock close($local_fh); # return the repository request return http_transfer_file_source($client, $socket, $tags_ref, +$header_ref, $overflow_ref, $local_file); } # return the local copy (don't check source) warn "return local copy (skip check) $local_file\n" if DEBUG; my ($local_fh, $code, $tags_ref, $header_ref, $overflow_ref) = open_header_local($local_file); return http_transfer_file_local($client, $local_fh, $tags_ref, $he +ader_ref, $overflow_ref); } sub get_http_header { my $fh = shift; # pull the header off the response my $code = 0; my $header = ''; my %tags; my $buffer; # this should be enough to grab the header sysread($fh, $buffer, HTTP_MINHEADER) or die "failed to get the ht +tpd header: $!\n"; # pull header lines out of the buffer my $skip = 0; foreach (split(/\n/,$buffer)) { $header .= "$_\n"; $skip += (length($_) + 1); last if m!^\r!; if (m!^HTTP/\d+\.\d+\s+(\d+)!) { $code = $1; warn "$_\n" if DEBUG; warn "CODE=$code\n" if DEBUG; next; } $tags{$1} = $2 if m!^([^:]+):\s+([^\r]+)!; warn "tag: $1 value: $2\n" if DEBUG; } warn "header length: " . length($header) . "\n" if DEBUG; my $overflow = substr($buffer, $skip); # warn "first line: " . substr($buffer,$skip - 2, 10) . "\n"; return $fh, $code, \%tags, \$header, \$overflow; } sub open_header_source { my $site = shift; my $path = shift; warn "open tcp connection to $site\n" if DEBUG; # open a tcp socket to the repository socket(my $socket, PF_INET, SOCK_STREAM, getprotobyname('tcp')); # connect with timeout my $failed = 1; eval { # set a signal to die if the timeout is reached local $SIG{ALRM} = sub { die "alarm\n" }; # modem response should be quick! alarm 1; # 1 seconds connect($socket, sockaddr_in(80, inet_aton($site))) or die("Couldn't connect to $site:80 : $!"); alarm 0; $failed = 0; }; alarm 0; # prevent race condition die("Couldn't connect to $site:80 : Socket Timeout") if $failed; # enable command buffering (autoflush) select((select($socket), $| = 1)[0]); # send the page request print $socket join("\015\012", "GET $path HTTP/1.1", "Host: $site", "User-Agent: repo-proxy/$ver", # "Connection: close", "", ""); # pull the header off the response warn "get header\n" if DEBUG; # TODO handle chunked encoding? # TODO handle 100 continue? return get_http_header($socket); } sub open_header_local { my $local_file = shift; my ($local_fh, $msg) = openReadWithLock($local_file); $local_fh or die "$msg\n"; return get_http_header($local_fh); } sub http_transfer_file_source { my ($client, $socket, $tags, $header, $overflow_ref, $local_file) += @_; my $length = $tags->{'Content-Length'}; my $bufsize = MAX_BUFSIZE; my $buffer; my $more_bytes; #this is our first copy warn "create/replace local copy $local_file\n" if DEBUG; my ($local_fh, $msg) = openWriteWithLock($local_file); $local_fh or die "$msg\n"; # now return the request print $client $$header . $$overflow_ref; print $local_fh $$header . $$overflow_ref; my $bytes_transfered = length($$overflow_ref); # adjust the buffer size dynamically to match the bytes read if ($bytes_transfered < $length) { while (defined($more_bytes = sysread($socket, $buffer, $bufsiz +e))) { if ($more_bytes) { print $client $buffer; print $local_fh $buffer; $bytes_transfered += $more_bytes; last if $bytes_transfered >= $length; } if ($more_bytes < $bufsize) { # adjust buffer size to match what we just received $bufsize = $more_bytes > MIN_BUFSIZE ? $more_bytes : M +IN_BUFSIZE; # warn "stalled: bytes_transfered $bytes_transfered ($ +more_bytes - $bufsize)\n" if DEBUG; next; } # increase the buffer size (if possible) if ($bufsize < MAX_BUFSIZE) { $bufsize += INC_BUFSIZE; $bufsize = MAX_BUFSIZE if $bufsize > MAX_BUFSIZE; } # warn "transfered full buffer: $more_bytes (next: $bufsize +)\n" if DEBUG; } } close($socket); close($local_fh); warn "bytes_transfered: $bytes_transfered\n" if DEBUG; } # pass on unknown request status # same as http_transfer_file_source, only no save to local file sub http_transfer_unknown { my ($client, $socket, $tags, $header, $overflow_ref) = @_; my $length = $tags->{'Content-Length'}; my $bufsize = MAX_BUFSIZE; my $buffer; my $more_bytes; # pass on the request print $client $$header . $$overflow_ref; my $bytes_transfered = length($$overflow_ref); # adjust the buffer size dynamically to match the bytes read if ($bytes_transfered < $length) { while (defined($more_bytes = sysread($socket, $buffer, $bufsiz +e))) { if ($more_bytes) { print $client $buffer; $bytes_transfered += $more_bytes; last if $bytes_transfered >= $length; } if ($more_bytes < $bufsize) { # adjust buffer size to match what we just received $bufsize = $more_bytes > MIN_BUFSIZE ? $more_bytes : M +IN_BUFSIZE; # warn "stalled: bytes_transfered $bytes_transfered ($ +more_bytes - $bufsize)\n" if DEBUG; next; } # increase the buffer size (if possible) if ($bufsize < MAX_BUFSIZE) { $bufsize += INC_BUFSIZE; $bufsize = MAX_BUFSIZE if $bufsize > MAX_BUFSIZE; } # warn "transfered full buffer: $more_bytes (next: $bufsize +)\n" if DEBUG; } } close($socket); warn "bytes_transfered: $bytes_transfered\n" if DEBUG; } # same as http_transfer_file_source only it doesn't copy source while +transfering sub http_transfer_file_local { my ($client, $fh, $tags, $header, $overflow_ref) = @_; my $length = $tags->{'Content-Length'}; my $buffer; my $more_bytes; # now return the request print $client $$header . $$overflow_ref; my $bytes_transfered = length($$overflow_ref); # adjust the buffer size dynamically to match the bytes read if ($bytes_transfered < $length) { while ($more_bytes = sysread($fh, $buffer, MAX_BUFSIZE)) { print $client $buffer; $bytes_transfered += $more_bytes; } } close($fh); warn "bytes_transfered: $bytes_transfered\n" if DEBUG; } # TODO dotlock? (or do we care) # non-blocking read lock sub openReadWithLock { my $file = shift; if (sysopen(my $fh, $file, O_RDONLY|O_CREAT)) { my $result; eval { local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n require +d alarm(BLOCK_TIMEOUT); $result = flock($fh, LOCK_SH); alarm 0; }; alarm 0; # prevent race condition return $fh if $result; # catch the error befor closing the fh my $msg = "Can't lock $file: $!"; close($fh); return 0, $msg; } return 0, "Failed to read file $file: $!"; } # non-blocking write lock sub openWriteWithLock { my $file = shift; if (sysopen(my $fh, $file, O_RDWR|O_CREAT)) { my $result; eval { local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n require +d alarm(BLOCK_TIMEOUT); $result = flock($fh, LOCK_EX); alarm 0; sysseek($fh, 0, 2) if $result; }; alarm 0; # prevent race condition return $fh if $result; # catch the error befor closing the fh my $msg = "Can't lock $file: $!"; close($fh); return 0, $msg; } return 0, "Failed to write file $file: $!"; }
In reply to Repo-proxy - Apt repository cacher by ruzam
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |