sub getFile { my $url = shift; my $type = shift; my $download_directory = shift; my($remoteName, $remotePath, $sysCall, $target, $rootName, $current_directory, $matchLineSave, $thisFileName, @listing, $template, $matchName, $matchLine, @history, $i, $key, $value); &checkPath($download_directory); # split download URL into directory and name if ($url =~ /(.*\/)(.+?)$/) { $remotePath = $1; $remoteName = $2; } else { &lastWords("Cannot parse filename from $url"); } if ($url =~ /^http:/i) { if ($remoteName =~ /[\*\?]/) { &lastWords("Wild card filenames not allowed for http downloads"); } $matchName = $remoteName; } else { # get a directory listing from FTP sites unlink("$download_directory/.listing"); unlink("$download_directory/index.html"); $sysCall = $wget; if ($ENV{'WINDIR'}){ $sysCall =~ s#/#\\#g; } while (($key, $value) = each(%wget_options)) { if (length($value)) { $sysCall .= " $key=$value"; } else { $sysCall .= " $key"; } } $sysCall .= " --dont-remove-listing"; $sysCall .= " --directory-prefix=$download_directory"; $sysCall .= " \"$remotePath\""; if (system($sysCall)) { &lastWords("Could not retrieve directory listing with $sysCall"); } unless (-e "$download_directory/.listing" && -s "$download_directory/.listing") { &lastWords("Failed to retrieve directory listing with $sysCall"); } # got a directory listing from remote site, slurp local download .history into array if (open(HISTORY, "<$download_directory/.history")) { @history = ; close HISTORY; } # make filename into regular expression $template = $remoteName; $template =~ s/(\W)/\\$1/g; $template =~ s/\\\?/\./g; $template =~ s/\\\*/\.\*/g; # work through directory .listing # exit with first match that isn't already in .history $matchName = "no match"; if (open(LISTING, "$download_directory/.listing")) { @listing = ; close LISTING; foreach (@listing) { if (/\s+($template)\s*$/ || /\s+($template)\s+->/) { # got a match from .listing $matchName = $1; $matchLine = $_; for ($i = 0; $i <= $#history; $i++) { if ($history[$i] =~ /\s+($matchName)\s*$/ || $history[$i] =~ /\s+($matchName)\s+->/) { if ($history[$i] eq $matchLine) { # this file already downloaded $matchName = ""; } else { # new version of file available $history[$i] = $matchLine; $matchLineSave = $matchLine; $matchLine = ""; } last; } } if ($matchName && $matchName ne "no match") { last; } } } } # if $matchName is empty, there is no new update # if $matchName is "no match", $remoteName was not found in the directory listing # if $matchLine is empty, no need to append to .history if ($matchName eq "no match") { &lastWords("No match to $url on remote server"); } if ($matchName && $matchLine) { push @history, $matchLine; $matchLineSave = $matchLine; } if ($matchName) { # replace (potentially) wildcard $url with actual download path $url = "$remotePath$matchName"; } else { # nothing to retrieve &message("No update available for $url\n"); return 0; } } # download the file $sysCall = $wget; if ($ENV{'WINDIR'}){ $sysCall =~ s#/#\\#g; } while (($key, $value) = each(%wget_options)) { if (length($value)) { $sysCall .= " $key=$value"; } else { $sysCall .= " $key"; } } # seems like wget cannot cope with --retr-symlinks and --timestamping if ($matchLineSave =~ /\s+($matchName)\s*$/) { $sysCall .= " --timestamping"; } else { $sysCall .= " --retr-symlinks"; } $sysCall .= " --directory-prefix=$download_directory"; # Only use --ignore-length option if desperate # if ($url =~ /^http:/i) { # $sysCall .= " --ignore-length"; # } $sysCall .= " \"$url\""; # Try each download twice. # The second attempt will normally not result in a download because timestamping is on. # The exception is when a file is updated on the remote server during downloading: # wget resumes the download, and ends up with a mixed file of the correct size, # but the file date is that of the original file. # Don't try to do this if the download file is a link if ($matchLineSave =~ /\s+($matchName)\s*$/) { if (system($sysCall)){ &lastWords("Error return from: $sysCall"); } } if (system($sysCall)){ &lastWords("Error return from: $sysCall"); } else { $logEntry .= "Downloaded $url to $download_directory\n"; print "Resting for 10 seconds\n"; sleep 10; } $thisFileName = "$download_directory/$matchName"; # ensure file is writeable chmod 0644, $thisFileName; if ($thisFileName =~ /(.*)\.gz$/i || $thisFileName =~ /(.*)\.Z$/i) { $rootName = $1; # decompress with forced overwrite $sysCall = "$gzip -df $thisFileName"; if ($ENV{'WINDIR'}){ $sysCall =~ s#/#\\#g; } # try up to 3 times before giving up if (system($sysCall) && system($sysCall) && system($sysCall)){ &lastWords("Error return from: $sysCall"); } else { &message("Expanded $thisFileName\n"); $thisFileName = $rootName; } } if ($type eq "taxonomy") { # taxonomy file may be a tar archive if ($thisFileName =~ /\.tar$/i) { # unpack if tar archive # have to cd to $local_taxonomy_directory because -C option of tar unreliable in DOS if ($ENV{'WINDIR'}){ $current_directory = `cd`; $current_directory =~ s#\\#/#g; } else { $current_directory = `pwd`; } chomp($current_directory); if ($local_taxonomy_directory =~ /^(\w:)(.*)/) { chdir($1); chdir($2); } else { chdir($local_taxonomy_directory); } $thisFileName =~ /.*\/(.+?)$/; $sysCall = "$tar -xf $1"; if ($ENV{'WINDIR'}){ $sysCall =~ s#/#\\#g; } # try up to 3 times before giving up if (system($sysCall) && system($sysCall) && system($sysCall)){ &lastWords("Error return from: $sysCall"); } else { &message("Unpacked $thisFileName\n"); } unlink($thisFileName); # cd back to original directory (just in case) if ($current_directory =~ /^(\w:)(.*)/) { chdir($1); chdir($2); } else { chdir($current_directory); } } } elsif ($type eq "unigene") { # nothing more to do for UniGene } else { # sequence database files need to be renamed if ($type eq "fasta") { $target = "$local_incoming_directory/$db_name\_xyzzy.fasta"; } elsif ($type eq "name") { $target = "$local_incoming_directory/$db_name\_xyzzy.nam"; } elsif ($type eq "reference") { # use the existing extension, if any, otherwise use .ref # lose ".complete" from MSDB $thisFileName =~ s/msdb\.ref\.complete\./msdb\.ref\./; if ($thisFileName =~ /.*\.(.+?)$/) { $target = "$local_incoming_directory/$db_name\_xyzzy.".lc($1); } else { $target = "$local_incoming_directory/$db_name\_xyzzy.ref"; } } if (move($thisFileName, $target)) { &message("Renamed $thisFileName to $target\n"); } else { &lastWords("Error return from renaming $thisFileName to $target"); } } # if we get here, download must have succeeded, so can now write out # the updated .history, to avoid getting the same file again if (@history) { open(HISTORY, ">$download_directory/.history"); foreach (@history) { print HISTORY $_; } close HISTORY; } # all done return 1; }