##############################################
#
# Welcome to the Archive!
#
##############################################
# Please crawl our files.
# We appreciate if you can crawl responsibly.
# Stay open!
##############################################
# slow down the ask jeeves crawler which was hitting our SE a little too fast
# via collection pages. --Feb2008 tracey--
User-agent: Teoma
Disallow: /control/
Disallow: /report/
Sitemap: http://www.archive.org/sitemap/sitemap.xml
Crawl-delay: 10
User-agent: *
Disallow: /control/
Disallow: /report/
Disallow: /details/goldenbull2007john/
Disallow: /stream/goldenbull2007john/
Disallow: /download/goldenbull2007john/
Disallow: /14/items/goldenbull2007john/goldenbull2007john_djvu.txt
Sitemap: http://www.archive.org/sitemap/sitemap.xml
Crawl-delay: 10
####
http://www.archive.org/sitemap/sitemap_00000.xml.gz
2012-01-24T11:32:13Z
http://www.archive.org/sitemap/sitemap_00001.xml.gz
2012-01-24T11:32:18Z
####
#!/usr/bin/env perl
#
# Name: TestFetch.pl
#
# Requires Internet access
#
use strict;
use warnings;
use LWP::Simple;
use HTML::Parser;
use HTTP::Status qw(:constants :is status_message);
package main;
my $text = 'http://www.archive.org/sitemap/sitemap_00000.xml.gz';
my $filename = 'sitemap_00000.xml.gz';
my $hstatus = 0;
$hstatus = LWP::Simple->getstore ($text, $filename);
if($hstatus != HTTP_OK) {
print "$hstatus: ", status_message($hstatus), "\n";
}