#!perl
use warnings; use strict;
# the host names, directory names, usernames below are changed
our $maindir = "/home/cmuna/a/hnwfb";
our $dldir = "/home/cmuna/export/hnwfb";
if ("jfhep" eq getpwuid($<)) {
$maindir = "/home/jfhep/a/hnwfb";
$dldir = "dl";
}
our $vardir = "var";
our $baseurl = "http://vphwec.example.com:780/hnwfb/";
our $http_netloc = "vphwec.example.com:780";
our $http_realm = "hnwfbmaster";
our $http_user = "hnwfb";
our $http_passfile = "secret/hnwfbhtpass";
our $maxcount_dir = 1024;
our $maxcount_file = 16*1024;
our $maxtotalsize = 1024*1024*1024;
use BSD::Resource ();
use 5.010;
use Fcntl ();
use IO::Handle ();
use LWP ();
use Date::Manip::Date ();
use Sys::Hostname ();
####
use XML::Twig ();
use Time::HiRes ();
####
chdir $maindir or die "error chdir main";
-O "." or die "error own main dir";
BSD::Resource::setrlimit(BSD::Resource::RLIMIT_AS(), 128*1024*1024, BSD::Resource::RLIM_INFINITY());
BSD::Resource::setrlimit(BSD::Resource::RLIMIT_DATA(), 48*1024*1024, BSD::Resource::RLIM_INFINITY());
BSD::Resource::setpriority(BSD::Resource::PRIO_PROCESS(), 0, 3) or die "error setpriority";
open our $LOCKH, "<", $vardir . "/mirrorhnwfb.lock" or die "error open lockfile";
flock $LOCKH, (Fcntl::LOCK_EX()|Fcntl::LOCK_NB()) or die "error locking lockfile: $!";
sub time_monotonic {
Time::HiRes::clock_gettime(Time::HiRes::CLOCK_MONOTONIC());
}
open our $LOG, ">>", $vardir . "/mirrorhnwfb.log" or die "open logfile";
autoflush $LOG;
sub wrlog {
chomp(my $t = join("", @_));
$t .= "\n";
print STDERR $t;
print $LOG $t;
}
my $DATE = Date::Manip::Date->new;
$DATE->config(setdate => "zone,UTC");
wrlog "Starting mirrorhnwfb on " . Sys::Hostname::hostname() . " at " .
$DATE->new_date("now")->printf("%O %Z") . ".";
eval {
sub escape {
my($t) = @_;
$t =~ s/([^\!\$\'-\*\,-9A-Za-z])/sprintf"%%%02X",ord($1)/ge;
$t;
}
sub unescape {
my($t) = @_;
$t =~ s/%(\w\w)/chr(hex($1))/ge;
$t;
}
our $LWP = LWP::UserAgent->new(
max_redirect => 7,
agent => q"mirrorhnwfb (1.0, jfhep@igluxz.example.com)",
timeout => 300,
);
our $http_pass;
{
open my $HTTP_PASS, "<", $http_passfile or die "error opening passfile";
chomp($http_pass = <$HTTP_PASS>);
}
$LWP->credentials($http_netloc, $http_realm, $http_user, $http_pass);
{
wrlog "getting allowmirror";
my $resp = $LWP->head($baseurl . "/.allowmirror");
if (!$resp->is_success) {
die "allowmirror http error: " . $resp->status_line;
}
wrlog "allowmirror ok.";
}
our @dir = ("/");
our %keepd = ("/" => 1);
our %keepf;
our $count_dir = 0;
our @file;
our @mkdir;
our $count_file = 0;
sub getdir {
my($dir) = @_;
my $edir = escape($dir);
$dir =~ m"\0" and die "invalid dir pathname 0 " . $edir;
$dir =~ m"\A\/" or die "invalid dir pathname 1 " . $edir;
$dir =~ m"\/\z" or die "invalid dir pathname 2 " . $edir;
$dir =~ m"/\." and die "invalid dir pathname 3 " . $edir;
if (1024 < length($dir)) {
wrlog "skipping directory with too long pathname " . $edir;
return;
}
my $dirs = $dir;
$dirs =~ s"\/+\z"";
$keepd{$dirs} = 1;
push @mkdir, $dir;
####
wrlog "getting directory " . $edir;
my $resp = $LWP->get($baseurl . $edir);
if ($resp->is_success) {
my $twig = XML::Twig->new;
if (!$twig->safe_parse($resp->content)) {
wrlog "error xml parsing directory listing of " . $edir . " as xml: " . $@;
return;
}
my($etitle) = $twig->findnodes("//title");
if (!$etitle || $etitle->text !~ /\A\s*Index\b/i) {
wrlog "direcotry listing has wrong title " . $edir;
return;
}
for my $ea ($twig->findnodes("//a")) {
my $href = $ea->att("href");
my $n = unescape($href);
if (defined($n) && $n !~ m"\A[\?\/]") {
#wrlog "found link from directory " . $edir . " : " . escape($n);
my $isdir = $n =~ s"\/+\z"";
my $abs = $dir . $n;
my $eabs = escape($abs);
####
if ($n =~ m"[\/\0]") {
wrlog "skipping that href for illegal characters in name " . $eabs;
} elsif (256 <= length($n)) {
wrlog "skipping that href for too long name component " . $eabs;
} elsif ($n =~ m"\A[\.\#\,\ ]" || $n =~ m"~\z" ||
$n =~ m"[^\ \!\$-\.0-\[\]-\~\x80-\xff]"
) {
wrlog "skipping that href for don't like name " . $eabs;
} elsif ($isdir) {
if ($maxcount_dir - 1 < $count_dir++) {
wrlog "skipping subdirectory because there are too many total dirs " . $eabs;
} else {
#wrlog "accepting subdirectory " . $eabs;
push @dir, $abs . "/";
}
} else {
if ($maxcount_file < $count_file++) {
wrlog "skipping file becuase there are too many total files " . $eabs;
} else {
#wrlog "accepting file " . $eabs;
push @file, $abs;
$keepf{$abs} = 1;
}
}
}
}
#wrlog "finished parsing directory " . $edir;
} else {
die "error getting directory " . $edir . " : " . $resp->status_line;
}
}
while (@dir) {
getdir(shift @dir);
}
wrlog "finished getting directory indexes";
sub checkname {
my($name) = @_;
$name =~ m"\0" and die "error: invalid name 0 " . escape($name);
$name =~ m"\/\.\.(?:/|\z)" and die "error: invalid name 2 " . escape($name);
}
our %mtime;
{
open my $MTIME, "<", $vardir . "/cur.mtime" or die "error opening cur.mtime: $!";
while (<$MTIME>) {
/\S/ or next; /\A#/ and next;
my($ename, $emtime) = split " ", $_;
my $name = unescape($ename);
$name =~ m"\0" and die "error: invalid name 0 in cur.mtime " . $ename;
$name =~ m"\A\/" or die "error: invalid name 1 in cur.mtime" . $ename;
$name =~ m"\/\.\.(?:/|\z)" and die "error: invalid name 2 in cur.mtime " . $ename;
my $mtime = unescape($emtime);
$mtime{$name} = $mtime;
}
close $MTIME or die "error closing cur.mtime: $!";
}
our $NMTIME;
{
open $NMTIME, ">", $vardir . "/new.mtime" or die "error opening new.mtime for write: $!";
for my $name (sort keys %mtime) {
checkname($name);
my $ename = escape($name);
$name =~ s"\/+\z"";
if (-e($dldir . "/" . $name)) {
print $NMTIME $ename . " " . escape($mtime{$name}) . "\n";
}
}
print $NMTIME "\n";
flush $NMTIME or die "error flusing 0 new.mtime: $!";
rename $vardir . "/old.mtime", $vardir . "/old2.mtime" or warn "warning renaming old.mtime to old2.mtime: $!";
rename $vardir . "/cur.mtime", $vardir . "/old.mtime" or die "error renaming cur.mtime to old.mtime: $!";
rename $vardir . "/new.mtime", $vardir . "/cur.mtime" or die "error renaming new.mtime to cur.mtime: $!";
}
our $cleancnt = 0;
our $totalsize = 0;
sub cleandir {
my($dir) = @_;
$cleancnt++;
checkname($dir);
$dir =~ m"\A\/" or die "error: invalid 1 dirname to clear " . escape($dir);
$dir =~ s"/+\z"";
my $adir = $dldir . $dir;
my $DIR;
if (!opendir $DIR, $adir) {
wrlog "warning: cannot opendir directory to clean " . escape($adir) . " : " . $!;
return;
}
while (my $file = readdir $DIR) {
"." eq $file || ".." eq $file and next;
my $afile = $adir . "/" . $file;
my $pfile = $dir . "/" . $file;
if ($file =~ m"^\.") {
wrlog "in cleanup, skipping dotfile " . $afile;
next;
}
my $isdir = -d $afile;
if ($isdir) {
cleandir($pfile);
if (!$keepd{$pfile}) {
wrlog "deleting old directory " . $pfile;
$keepf{$pfile} and wrlog "incidentally, that is now a non-dir file";
rmdir $afile or
wrlog "warning: could not delete old directory " . escape($afile) . " : $!";
}
} else {
if (!$keepf{$pfile}) {
wrlog "deleting old file " . $pfile;
$keepd{$pfile} and wrlog "incidentally, that is now a directory";
unlink $afile or
wrlog "warning: could not delete old file " . escape($afile) . " : $!";
} else {
$totalsize += abs(-s($afile));
}
}
}
closedir $DIR or wrlog "warning: error closedir directory to clean " . escape($adir);
}
wrlog "starting to clean up old files";
cleandir("/");
wrlog "finished cleanup of old files (recursed to " . $cleancnt . " dirs)";
wrlog "total size of old files is " . $totalsize . " bytes";
{
for my $dir (@mkdir) {
my $adir = $dldir . "/" . $dir;
if (!mkdir($adir) && !$!{EEXIST}) {
wrlog "warning: could not mkdir " . escape($dir) . " : " . $!;
}
}
}
sub getfile {
my($file) = @_;
checkname($file);
my $efile = escape($file);
$file =~ m"\A\/" or die "error: invalid filename 1 to get " . $efile;
my $afile = $dldir . "/" . $file;
my $mtime = $mtime{$file};
wrlog "getting file " . $efile . " : " . ($mtime ? "mtime " . $mtime : "new");
open my $PART, ">", $vardir . "/part" or die "error opening part file: $!";
binmode $PART or die "error binmoding part";
my $ondata = sub {
my($buf, $_resp, $_prot) = @_;
if ($maxtotalsize < ($totalsize += length($buf))) {
die "error: out of quota";
}
print $PART $buf or die "error writing data to part file: $!";
};
my $resp = $LWP->get(
$baseurl . $efile,
":content_cb" => $ondata,
($mtime ? ("If-Modified-Since" => $mtime) : ()),
);
0 <= (my $filesize = tell $PART) or die "error: tell part: $!";
close $PART or die "error closing part file: $!";
if (my $err = $resp->header("X-Died")) {
die $err;
} elsif (304 == $resp->code) {
wrlog "got not modified " . $efile;
} elsif (!$resp->is_success) {
wrlog "error downloading " . $efile . " : " . $resp->status_line;
} else {
my $nmtime = $resp->header("Last-Modified");
my $enmtime = escape($nmtime);
rmdir $afile or 1;
rename $vardir . "/part", $afile or die "error renaming part to " . escape($afile) . " : $!";
print $NMTIME $efile . " " . $enmtime . "\n";
flush $NMTIME or die "error flusing 0 new.mtime: $!";
wrlog "success downloading " . $efile . " : mtime " . $enmtime . " size " . $filesize;
}
}
for my $file (@file) {
getfile($file);
}
close $NMTIME or die "error closing new.mtime: $!";
wrlog "finished all work. total size is " . $totalsize . " bytes";
};
if (my $e = $@) {
wrlog $e;
}
wrlog "Exiting mirrorhnwfb at " .
$DATE->new_date("now")->printf("%O %Z") . ".";
__END__