#!/usr/bin/perl -w # # # This script ftp's the MADIS dataset's archive files per the ftp.par1.txt # parameter file. The parameter files controls the time range and which # datasets to download. The data is downloaded relative to their base # locations under the enviroment variable MADIS_DATA and un-gzipped so it # is ready for the MADIS's dump executables. # # This only works unix/Linux. # # See README.unix for more information # # Notes: # # 1. Put ftp.par1.txt file and get_MADIS_Data_windows.pl in the same directory. # # 2. The data is downloaded below the MADIS_DATA enviromental variable. # Any missing subdirectories are created. # # 3. "$" and "@" in input's might have to be escaped depending on perl version # and operating configuration. Ex. ab$5 --> ab\$5 # # 4. We changed to wget from perl's Net::FTP. wget must be in users path. # # 5. The downloaded files are un-gzip by this script. # # # # Special variables defined: # %datasetpath # - key matches parameter file data set name # - value matches data path below MADIS_DATA # # Modifcation: # 04/02/2011 Changed fetch to wget and https from Net::FTP. # 08/04/2015 Changed servers to MADIS operational server # 08/05/2015 Added new LDAD datasets # $|=1; use strict; use POSIX qw(:termios_h); use Net::FTP; my($key,$addsec,@cdffiles,$daysec,$devdir,@dirfiles,$dirmdy,$ds,@f,$f1,$f2); my($fileday,$filemdy,$ftp,@gzdirfiles,$gzf,@gzfiles,$hour,$is,$loc,$localdir); my(@locs,$mday,$min,$mon,$newerr,$anyerr,$p,$PREPATH,$remotedir,$rest,$sec); my(@tmp,$tmp1,$wday,$yday,$year,$yrdif,@knowndskeys,@datasetkeys); my($unm,$pnm,@dspars,$dsdefines,$endsec,$startsec,$dskey,$res,$interactive); my(@res); # Default -- don't acknowledge before closing $interactive = 1; my($anonymous) = 0; if($#ARGV == 1 || $#ARGV == 2){ # perl get_MADIS_Data_windows.pl USERNAME PASSWORD INTERACTIVE-NO[N] # account information in the input args $unm = $ARGV[0]; $pnm = $ARGV[1]; # turn interactive off. ARGV[2] = N if($#ARGV == 2 && $ARGV[2] eq "N"){ $interactive = 1; } } else { # username/password print " Enter username: "; sysread(STDIN, $unm, 50); chomp ($unm); print " Enter password: "; sysread(STDIN, $pnm, 50); chomp ($pnm); } $unm =~ s/^\s//g; $unm =~ s/\s$//g; $pnm =~ s/^\s//g; $pnm =~ s/\s$//g; $anyerr=0; # MADIS_DATA must be all ready set. my($MADIS_DATA) = $ENV{'MADIS_DATA'}; if($MADIS_DATA eq ""){ print "MADIS_DATA is not defined, please define it then run again.\n"; if($interactive == 0){ print "Please enter return-key to terminate script:"; sysread(STDIN, $key, 100); exit(0); } } elsif(! -d "$MADIS_DATA"){ print "MADIS_DATA directory: $MADIS_DATA does not exist, please create "; print "it then run again\n"; if($interactive == 0){ print "Please enter return-key to terminate script:"; sysread(STDIN, $key, 100); exit(0); } } ################################################################ # MADIS dataset locations under MADIS_DATA my(%datasetpath) = ( "METAR"=>"point/metar/netcdf", "SAO"=>"point/sao/netcdf", "MARITIME"=>"point/maritime/netcdf", "MODERNIZED COOP"=>"LDAD/coop/netCDF", "URBANET"=>"LDAD/urbanet/netCDF", "INTEGRATED MESONET"=>"LDAD/mesonet/netCDF", "HYDROLOGICAL SURFACE"=>"LDAD/hydro/netCDF", "MULTI-AGENCY PROFILER"=>"LDAD/profiler/netCDF", "SNOW"=>"LDAD/snow/netCDF", "WISDOM"=>"LDAD/WISDOM/netCDF", "SATELLITE WIND - 3 HOUR"=>"point/HDW/netcdf", "SATELLITE WIND - 1 HOUR"=>"point/HDW1h/netcdf", "SATELLITE SOUNDING"=>"point/POES/netcdf", "SATELLITE RADIANCE"=>"point/satrad/netcdf", "RADIOMETER"=>"point/radiometer/netcdf", "RADIOSONDE"=>"point/raob/netcdf", "AUTOMATED AIRCRAFT REPORTS"=>"point/acars/netcdf", "AUTOMATED AIRCRAFT PROFILES AT AIRPORTS"=> "point/acarsProfiles/netcdf", "NOAA PROFILER NETWORK"=>"point/profiler/netcdf", "CRN" =>"LDAD/crn/netCDF", "HCN" =>"LDAD/hcn/netCDF", "NEPP" =>"LDAD/nepp/netCDF", "HFMETAR" =>"LDAD/hfmetar/netCDF", ); (@datasetkeys) = sort keys %datasetpath; $#knowndskeys = -1; foreach $dskey (@datasetkeys){ push(@knowndskeys,"\"$dskey\"\n") } ################################################################ # read processing parms open(IN,"ftp.par1.txt") || die "Could not open ftp.par1.txt file, please check that the file exists, is readable, and is in get_MADIS_Data_windows.pl directory.\n"; my(@pars); @pars = ; close(IN); chomp(@pars); ################################################################ #Convert start and end times to system time shift(@pars); shift(@pars); shift(@pars); # Line 4 start time in "YYYYMMDD HH" format my($stime) = shift(@pars); # Line 5 end time in "YYYYMMDD HH" format my($etime) = shift(@pars); # check for special ftp path # Convert start and end time to seconds $stime =~ s/\s//g; $etime =~ s/\s//g; my($syr2,$smn,$sday,$shr,$smin,$ssec); $syr2 = substr($stime,2,2); $smn = substr($stime,4,2); $sday = substr($stime,6,2); $shr = substr($stime,8,2); $smin = 0; $ssec = 0; my($eyr2,$emn,$eday,$ehr,$emin,$esec); $eyr2 = substr($etime,2,2); $emn = substr($etime,4,2); $eday = substr($etime,6,2); $ehr = substr($etime,8,2); $emin = 0; $esec = 0; ($startsec) = &YMDHMS21970SEC($syr2,$smn,$sday,$shr,$smin,$ssec); ($endsec) = &YMDHMS21970SEC($eyr2,$emn,$eday,$ehr,$emin,$esec); $daysec = 3600*24; $addsec = $endsec%$daysec; $endsec += ($daysec - $addsec - 1) ; $tmp1 = $startsec + (3600*24); ################################################################ # Read dataset(s) request my($sprpln,$sprp); shift(@pars); shift(@pars); shift(@pars); shift(@pars); $#dspars = -1; $sprp = "DEFAULT"; # Line 10 is first dataset line foreach $ds (@pars){ $sprpln = "\U$ds"; $sprpln =~ s/^\s//g; (@tmp) = split(" ",$sprpln); if($#tmp > -1){ if($tmp[0] eq "Y"){ $dsdefines = 0; foreach $dskey (@datasetkeys){ if(grep(/$dskey/,$sprpln)){ push(@dspars,$dskey); $dsdefines = 1; last; } } # UNDEFINED CASE if($dsdefines == 0){ print "Uncoded dataset: $sprpln\n"; print "Known datasets:\n@knowndskeys\n"; if($interactive == 0){ print "Please enter return-key to terminate script:"; sysread(STDIN, $key, 100); exit(0); } } } # Special path check elsif($tmp[0] eq "FTP"){ # comment line $sprpln = shift(@pars); $sprpln = "\U$sprpln"; $sprpln =~ s/^\s//g; (@tmp) = split(" ",$sprpln); # special directory code if(grep (/^PUBLIC2$/,$tmp[0])){ $sprp = "/madisPublic2/data"; } elsif(grep (/^PUBLIC3$/,$tmp[0])){ $sprp = "/madisPublic3/data"; } elsif(grep (/^RESEARCH2$/,$tmp[0])){ $sprp = "/madisResearch2/data"; } last; } } } ################################################################ # Dataset requested check if($#dspars == -1){ print "No dataset requested in ftp.par1.txt, please request a dataset"; print " and run this again\n"; if($interactive == 0){ print "Please enter return-key to terminate script:"; sysread(STDIN, $key, 100); exit(0); } } ################################################################ # Which FTP server $PREPATH=""; (@tmp) = split("_",$unm); $tmp[2] = "\L$tmp[2]"; my($ftpserver); $ftpserver = "https://madis-data.ncep.noaa.gov"; if($sprp eq "" || $sprp eq "DEFAULT"){ $sprp = "/madisPublic/data"; if($tmp[0] eq "anonymous"){ $sprp = "/madisPublic1/data"; $anonymous = 1; } elsif($tmp[2] eq "public"){ $sprp = "/madisPublic1/data"; $anonymous = 1; } elsif($tmp[2] eq "research"){ $sprp = "/madisResearch/data"; } elsif($tmp[2] eq "noaa"){ $sprp = "/madisNoaa/data"; } elsif($tmp[2] eq "gov"){ $sprp = "/madisGov/data"; } $PREPATH=$sprp; } else{ $PREPATH=$sprp; } ################################################################ # Get files for requested data sets my(@datasets) = sort @dspars; my($dateloc); my($numfilesdownloaded) = 0; my($numberofdays) = 0; foreach $ds (@datasets){ $loc = $datasetpath{$ds}; ################################################################ # Build directories and files list ################################################################ # Set path to archive directory, so we don't get corrupt files. &CHKDIRSANDFIRST($loc); # FTP temp parms file $localdir ="$MADIS_DATA/$loc"; # Fetch each day for($is=$startsec; $is<=$endsec; $is+=(3600*24)){ # Sleep a minute after 10 days $numberofdays++; if($numberofdays == 10){ sleep(60); ($numberofdays) = 0; } my($dateloc,$fileday) = &DIRFILEYEARMMDD($is); print "Ftping data to MADIS_DATA/$loc for day ${fileday}.\n"; # TRAP all errors eval { # ftp gzip data files to us $newerr=0; chdir($localdir) or $newerr=1; if($newerr == 1){ print "Could not change to local director: $localdir "; print "Please create directory and run program again.\n"; $ftp->quit; if($interactive == 0){ print "Please enter return-key to terminate script:"; sysread(STDIN, $key, 100); exit(0); } } $remotedir = "$PREPATH/archive/$dateloc/$loc"; if($anonymous == 1){ # print " `wget --timeout=600 \"${ftpserver}${remotedir}/*gz\"`\n" ; (@res) = `wget --no-check-certificate --no-directories --recursive --level=1 --accept \"*.gz\" --timeout=600 ${ftpserver}${remotedir}/` ; } else { (@res) = `wget --user=$unm --password=$pnm --no-check-certificate --no-directories --recursive --level=1 --accept \"*.gz\" --timeout=600 ${ftpserver}${remotedir}/` ; } }; # Eval ends. # Error check if ($@) { print "FTP failed. The last error message was: $@"; $anyerr = 1; } } #un-gzip data $devdir = "$ENV{'MADIS_DATA'}/$loc"; (@gzfiles) = &GZDIRLIST("$devdir"); # Only do the days we ftp'ed for($is=$startsec; $is<=$endsec; $is+=(3600*24)) { ($dateloc,$fileday) = &DIRFILEYEARMMDD($is); (@cdffiles) = grep(/$fileday/,(@gzfiles)); foreach $f1 (@cdffiles) { $f2 = $f1; $f2 =~ s/\.gz$//; $newerr=0; open (IN, "$devdir/$f1") or $newerr = 1; if($newerr == 1){ $anyerr=1; print "can't open file to un-gzip:$devdir/$f1 $!\n"; close(IN); } else { $newerr = 0; $numfilesdownloaded++; open (GZIP, "|gzip -dc > $devdir/$f2") or $newerr = 1; if($newerr == 1){ $anyerr=1; print "can't open process to gzip: $!\n"; close(IN); close(GZIP); unlink("$devdir/$f2"); $numfilesdownloaded--; } else { binmode(IN); binmode(GZIP); $res = 1; ($res) = print GZIP ; if(! $res){ $anyerr=1; print "Problem uncompressing $devdir/$f2 with "; print "gzip:\n$!\n"; unlink("$devdir/$f2"); $numfilesdownloaded--; } close(IN); close(GZIP); } } unlink("$devdir/$f1"); } } } if( $anyerr == 1){ print "Download completed with errors.\n"; } else { if($numfilesdownloaded > 0){ print "Download successfully completed.\n"; } else { print "Download completed, but no data found.\n"; } } if($interactive == 0){ print "Please enter return-key to terminate script:"; sysread(STDIN, $key, 100000); } exit(0); ############################################################ # function YMDHMS21970SEC(yr2,mn,day,hr,min,sec): Return time for date sub YMDHMS21970SEC{ my($yr2,$mn,$day,$hr,$min,$sec) = @_; # seconds to this year my($seconds) = &YY21970SEC($yr2); my($julian) = &JDATE($yr2,$mn,$day); # seconds to today if($julian > 0) { $seconds += (($julian-1)*24*3600); } # seconds to today's time $seconds += ($hr*60*60) + ($min*60) + $sec; return ($seconds); } # Return seconds from 1970 to start of year sub YY21970SEC{ my($yr) = @_; my($yy) = $yr; if($yr < 1000){ ($yy) = 1900; $yy += $yr; if($yr < 70){ $yy += 100; } } my($i); my($leapyrs) = 0; foreach $i (1970...($yy-1)){ $leapyrs += &LEAPDAY($i,12,1); } $yrdif = $yy - 1970; return($yrdif*365*86400 + $leapyrs*86400); } # Return 1 if we are after the leap day in a leap year. sub LEAPDAY { my($year,$month,$day) = @_; if ($year % 4) { return(0); } if (!($year % 100)) { # years that are multiples of 100 # are not leap years if ($year % 400) { # unless they are multiples of 400 return(0); } } if ($month < 2) { return(0); } elsif (($month == 2) && ($day < 29)) { return(0); } else { return(1); } } # Purpose: Get day of the year for date sub JDATE{ my($yr,$mn,$day) = @_; my($i); my($jdays) = 0; my (@lyr) = (0,0,31,29,31,30,31,30,31,31,30,31,30,31); my (@ryr) = (0,0,31,28,31,30,31,30,31,31,30,31,30,31); my ($rem) = $yr; $rem %=4; if( $rem == 0 ){ foreach $i (1...$mn){ $jdays += $lyr[$i];} } else { foreach $i (1...$mn){ $jdays += $ryr[$i];} } return($jdays + $day); } # Build directory and file date syntax sub DIRFILEYEARMMDD() { my($tmsec) = @_; ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$rest) = gmtime($tmsec); $year+= 1900; $yday++; $mon++; $dirmdy = sprintf("%0.4d/%0.2d/%0.2d",$year,$mon,$mday); $filemdy = sprintf("%0.4d%0.2d%0.2d",$year,$mon,$mday); return($dirmdy,$filemdy); } # Build if needed MADIS directories for datasets sub CHKDIRSANDFIRST() { my(@llocs) = @_; my($l,$path); foreach $l (@llocs){ $path = "$MADIS_DATA/$l"; if(! -d "$MADIS_DATA/$l"){ system "mkdir -p $path"; } if(! -w "$path"){ print "Can not write to directory: $path\nPlease check the "; print "directory and its permissions: $path and then run this "; print "script again\n"; if($interactive == 0){ print "Please enter return-key to terminate script:"; sysread(STDIN, $key, 100); exit(0); } } } } # Purpose: Get all gzip files in a directory #Please use this method so we can make sure we are getting the files we expect #This is to take care of missing directories problems sub GZDIRLIST() { my($directory) = @_; $#gzdirfiles = -1; if(! -d $directory) { return(@gzdirfiles); } opendir(DIRHANDLE,"$directory"); @dirfiles=readdir (DIRHANDLE); closedir (DIRHANDLE); @gzdirfiles= grep(/gz$/,@dirfiles); return(@gzdirfiles); } 1;