if ($n =~ m"[\/\0]") { wrlog "skipping that href for illegal characters in name " . $eabs; } elsif (256 <= length($n)) { wrlog "skipping that href for too long name component " . $eabs; } elsif ($n =~ m"\A[\.\#\,\ ]" || $n =~ m"~\z" || $n =~ m"[^\ \!\$-\.0-\[\]-\~\x80-\xff]" ) { wrlog "skipping that href for don't like name " . $eabs; } elsif ($isdir) { if ($maxcount_dir - 1 < $count_dir++) { wrlog "skipping subdirectory because there are too many total dirs " . $eabs; } else { #wrlog "accepting subdirectory " . $eabs; push @dir, $abs . "/"; } } else { if ($maxcount_file < $count_file++) { wrlog "skipping file becuase there are too many total files " . $eabs; } else { #wrlog "accepting file " . $eabs; push @file, $abs; $keepf{$abs} = 1; } } } } #wrlog "finished parsing directory " . $edir; } else { die "error getting directory " . $edir . " : " . $resp->status_line; } } while (@dir) { getdir(shift @dir); } wrlog "finished getting directory indexes"; sub checkname { my($name) = @_; $name =~ m"\0" and die "error: invalid name 0 " . escape($name); $name =~ m"\/\.\.(?:/|\z)" and die "error: invalid name 2 " . escape($name); } our %mtime; { open my $MTIME, "<", $vardir . "/cur.mtime" or die "error opening cur.mtime: $!"; while (<$MTIME>) { /\S/ or next; /\A#/ and next; my($ename, $emtime) = split " ", $_; my $name = unescape($ename); $name =~ m"\0" and die "error: invalid name 0 in cur.mtime " . $ename; $name =~ m"\A\/" or die "error: invalid name 1 in cur.mtime" . $ename; $name =~ m"\/\.\.(?:/|\z)" and die "error: invalid name 2 in cur.mtime " . $ename; my $mtime = unescape($emtime); $mtime{$name} = $mtime; } close $MTIME or die "error closing cur.mtime: $!"; } our $NMTIME; { open $NMTIME, ">", $vardir . "/new.mtime" or die "error opening new.mtime for write: $!"; for my $name (sort keys %mtime) { checkname($name); my $ename = escape($name); $name =~ s"\/+\z""; if (-e($dldir . "/" . $name)) { print $NMTIME $ename . " " . escape($mtime{$name}) . "\n"; } } print $NMTIME "\n"; flush $NMTIME or die "error flusing 0 new.mtime: $!"; rename $vardir . "/old.mtime", $vardir . "/old2.mtime" or warn "warning renaming old.mtime to old2.mtime: $!"; rename $vardir . "/cur.mtime", $vardir . "/old.mtime" or die "error renaming cur.mtime to old.mtime: $!"; rename $vardir . "/new.mtime", $vardir . "/cur.mtime" or die "error renaming new.mtime to cur.mtime: $!"; } our $cleancnt = 0; our $totalsize = 0; sub cleandir { my($dir) = @_; $cleancnt++; checkname($dir); $dir =~ m"\A\/" or die "error: invalid 1 dirname to clear " . escape($dir); $dir =~ s"/+\z""; my $adir = $dldir . $dir; my $DIR; if (!opendir $DIR, $adir) { wrlog "warning: cannot opendir directory to clean " . escape($adir) . " : " . $!; return; } while (my $file = readdir $DIR) { "." eq $file || ".." eq $file and next; my $afile = $adir . "/" . $file; my $pfile = $dir . "/" . $file; if ($file =~ m"^\.") { wrlog "in cleanup, skipping dotfile " . $afile; next; } my $isdir = -d $afile; if ($isdir) { cleandir($pfile); if (!$keepd{$pfile}) { wrlog "deleting old directory " . $pfile; $keepf{$pfile} and wrlog "incidentally, that is now a non-dir file"; rmdir $afile or wrlog "warning: could not delete old directory " . escape($afile) . " : $!"; } } else { if (!$keepf{$pfile}) { wrlog "deleting old file " . $pfile; $keepd{$pfile} and wrlog "incidentally, that is now a directory"; unlink $afile or wrlog "warning: could not delete old file " . escape($afile) . " : $!"; } else { $totalsize += abs(-s($afile)); } } } closedir $DIR or wrlog "warning: error closedir directory to clean " . escape($adir); } wrlog "starting to clean up old files"; cleandir("/"); wrlog "finished cleanup of old files (recursed to " . $cleancnt . " dirs)"; wrlog "total size of old files is " . $totalsize . " bytes"; { for my $dir (@mkdir) { my $adir = $dldir . "/" . $dir; if (!mkdir($adir) && !$!{EEXIST}) { wrlog "warning: could not mkdir " . escape($dir) . " : " . $!; } } } sub getfile { my($file) = @_; checkname($file); my $efile = escape($file); $file =~ m"\A\/" or die "error: invalid filename 1 to get " . $efile; my $afile = $dldir . "/" . $file; my $mtime = $mtime{$file}; wrlog "getting file " . $efile . " : " . ($mtime ? "mtime " . $mtime : "new"); open my $PART, ">", $vardir . "/part" or die "error opening part file: $!"; binmode $PART or die "error binmoding part"; my $ondata = sub { my($buf, $_resp, $_prot) = @_; if ($maxtotalsize < ($totalsize += length($buf))) { die "error: out of quota"; } print $PART $buf or die "error writing data to part file: $!"; }; my $resp = $LWP->get( $baseurl . $efile, ":content_cb" => $ondata, ($mtime ? ("If-Modified-Since" => $mtime) : ()), ); 0 <= (my $filesize = tell $PART) or die "error: tell part: $!"; close $PART or die "error closing part file: $!"; if (my $err = $resp->header("X-Died")) { die $err; } elsif (304 == $resp->code) { wrlog "got not modified " . $efile; } elsif (!$resp->is_success) { wrlog "error downloading " . $efile . " : " . $resp->status_line; } else { my $nmtime = $resp->header("Last-Modified"); my $enmtime = escape($nmtime); rmdir $afile or 1; rename $vardir . "/part", $afile or die "error renaming part to " . escape($afile) . " : $!"; print $NMTIME $efile . " " . $enmtime . "\n"; flush $NMTIME or die "error flusing 0 new.mtime: $!"; wrlog "success downloading " . $efile . " : mtime " . $enmtime . " size " . $filesize; } } for my $file (@file) { getfile($file); } close $NMTIME or die "error closing new.mtime: $!"; wrlog "finished all work. total size is " . $totalsize . " bytes"; }; if (my $e = $@) { wrlog $e; } wrlog "Exiting mirrorhnwfb at " . $DATE->new_date("now")->printf("%O %Z") . "."; __END__