sub getfile { my ( $url, $outfile, $options ) = @_; $options ||= {}; # Default option values my $max_retries = $options->{'max_retries'} || 5; my $file_host = $options->{'file_host'} || $ENV{FILE_HOST} || "file-host.companyname.internal"; my $ua = $options->{'ua'} || LWP::UserAgent->new( keep_alive => 1, cookie_jar => {} ); # Force to a positive integer. $max_retries = int($max_retries); $max_retries = 1 if $max_retries < 1; # Construct auth headers my %auth_headers = (); if( $options->{'basic_auth_creds'} ) { %auth_headers = ( 'Authorization' => "Basic ".encode_base64($options->{'basic_auth_creds'}) ); } my ( $csum_type, $csum ) = csum_to_lookup($url); if ( $csum_type eq 'sha1' ) { $url = "http://$file_host/getfile/" . $csum; $wanted_sha1 = $csum; } else { ... # Look in the database to convert to SHA1 } my $result; ATTEMPT: foreach my $attempt ( 1 .. $max_retries ) { debug("attempt $attempt to get url: '$url'"); my $f_out; # This will contain the sha1 of the downloaded data once the download is complete my $digest = Digest->new('SHA-1'); # NB: This uses the request method on LWP::UA, that takes an instance of HTTP::Request and a callback function. # See: https://metacpan.org/pod/LWP::UserAgent#ua-request-request-content_cb $result = $ua->request( GET($url, %auth_headers), # Func exported from HTTP::Request::Common, returns an instance of HTTP::Request sub { my ( $data, $res ) = @_; if ( $res->is_success ) { unless ($f_out) { $f_out = open_outfile( $outfile, $nooverwrite ) or croak "unable to open $outfile\n"; } print $f_out $data; $digest->add($data) if defined $wanted_sha1; } }, ); unless( $result->request->uri->eq($url) ) { print "$url has redirected to ".$result->request->uri."\n"; } # close file or flush the filehandle to disk # FIXME close or flush could fail (eg if insufficient disk space). this should be bubbled up if (ref($outfile)) { $outfile->flush(); } else { close $f_out if defined($f_out); } # For error messages if something goes wrong. my $resp_string = sprintf "%d - %s", $result->code, $result->message; # Check the downloaded data is what we expect # (Unless we have no checksum when downloading an arbitrary URL) # TODO: this segment checks sha1 even if download fails - unnecessary warning! if ( defined $wanted_sha1 ) { my $download_failed = 0; if( ! -f $outfile || 0 == -s $outfile ) { unlink $outfile unless ref($outfile); warn "http downloaded failed ($resp_string): Wanted SHA1:$wanted_sha1 but got an empty file"; $download_failed = 1; } elsif( $digest->hexdigest ne $wanted_sha1 ) { unlink $outfile unless ref($outfile); warn "http downloaded failed ($resp_string): Wanted SHA1:$wanted_sha1 but got:" . $digest->hexdigest; $download_failed = 1; } if( $download_failed ) { if( $attempt == $max_retries ) { my $download_url = $result->request->uri; my $hostname = $result->request->uri->host; my $timestamp = scalar gmtime(); if( my $packed_ip = gethostbyname( $hostname ) ) { my $file_server_ip = inet_ntoa($packed_ip); # Using old style pure perl instead of a modern library warn "$timestamp : Request was to $download_url on IP: $file_server_ip"; } else { warn "$timestamp : Request was to $download_url but cannot resolve IP address for $hostname"; } } else { next ATTEMPT; } } } if ( $result->is_success ) { last ATTEMPT; } else { unlink $outfile unless ref($outfile); carp "$url could not be retrieved - $resp_string\n"; # Delay before the next attempt sleep 2**$attempt unless $attempt == $max_retries; } } return wantarray() ? ( $result->is_success(), $result ) : $result->is_success(); }