#!/usr/bin/perl # Makes the script executable use strict; # ensures proper variable declarations, etc. use warnings; # allows warnings to be issued #print "Create a new directory: "; #my $dir = ; #chomp ($dir); #mkdir "$dir"; # Open files for writing open DATA, ">tdata.txt" or die "open: $!\n"; open STATS, ">yearly_stats.txt" or die "open: $!\n"; print "Scraped data will be written to file 'tdata.txt'\n"; # Create date list use Date::Simple::D8 (':all'); my $today = Date::Simple::D8->today(); my $start = Date::Simple::D8->new('18960101'); my $end = Date::Simple::D8->new("$today"); my @dates; while ( $start < $end ) { push @dates, $start; $start = $start->next; } # Initiate browsing agent print "Initiating the browsing agent...\n"; use WWW::Mechanize; my $url = "http://bub2.meteo.psu.edu/wxstn/wxstn.htm"; my $mech = WWW::Mechanize->new(keep_alive => 1); print "Accessing URL...\n"; $mech->get($url); print "Collecting data...\n"; # Start the scraping while (@dates) { $mech->submit_form( form_number => 1, fields => { dtg => $dates[0], } ); # Download the resulting page, text only, and scrape for data my $page = $mech->content(format=>'text'); # Daily max, min, average my @data = ($page =~ /Temperature\s+:\s+(\d\d)/g); # Daily 30-year max normal my ($thirtyyrhi) = $page =~ /30-Year Average High Temperature\s+:\s+(\S*)/; if ($thirtyyrhi eq '(N/A)') { $thirtyyrhi = "99.99"; } # Daily 30-year min normal my ($thirtyyrlo) = $page =~ /30-Year Average Low Temperature\s+:\s+(\S*)/; if ($thirtyyrlo eq '(N/A)') { $thirtyyrlo = "99.99"; } # Assign data to the array my $hlahdd = ("$dates[0] $data[0] $data[1] $data[2] $thirtyyrhi $thirtyyrlo\n"); # Print the array to screen and to file print "$hlahdd"; print DATA "$hlahdd"; # Pause... then go back a page sleep .1; $mech->back(); # remove the date just used shift @dates; } # Exit the scraping loop # Close the written file close DATA; close STATS; print "Success!\n";