#This was originally for jcwren's programming contest, #but after suggestions from friends, it really doesn't #match the critera anymore, so I didn't feel comfortable #posting in under the [id://43857|original thread], but #I'm still sufficiently happy with it that I wanted to #post it. This is the first time I've used LWP, and the #contest was a great way to learn about a great, versatile module.
#This script reads in CNNi's headlines, the "interactive headlines" (linked to larger write ups), and weather. #As always, I look forward to your comments, criticisms and suggestions. #updated 20000108 -- added some parameters and self_url() trickery. # see lines 101 and 201-237 use strict; use LWP::Simple; use LWP::UserAgent; use CGI::Pretty qw(:all); use Date::Manip; &Date_Init ("TZ=US/Mountain"); my %state_abbr = ( "ALABAMA" => 'AL', "ALASKA" => 'AK', "ARIZONA" => 'AZ', "ARKANSAS" => 'AR', "CALIFORNIA" => 'CA', "COLORADO" => 'CO', "CONNECTICUT" => 'CT', "DELAWARE" => 'DE', "D.C." => 'DC', "FLORIDA" => 'FL', "GEORGIA" => 'GA', "HAWAII" => 'HI', "IDAHO" => 'ID', "ILLINOIS" => 'IL', "INDIANA" => 'IN', "IOWA" => 'IA', "KANSAS" => 'KS', "KENTUCKY" => 'KY', "LOUISIANA" => 'LA', "MAINE" => 'ME', "MARYLAND" => 'MD', "MASSACHUSETTS" => 'MA', "MICHIGAN" => 'MI', "MINNESOTA" => 'MN', "MISSISSIPPI" => 'MS', "MISSOURI" => 'MO', "MONTANA" => 'MT', "NEBRASKA" => 'NE', "NEVADA" => 'NV', "NEW HAMPSHIRE" => 'NH', "NEW JERSEY" => 'NJ', "NEW MEXICO" => 'NM', "NEW YORK" => 'NY', "NORTH CAROLINA" => 'NC', "NORTH DAKOTA" => 'ND', "OHIO" => 'OH', "OKLAHOMA" => 'OK', "OREGON" => 'OR', "PENNSYLVANIA" => 'PA', "PUERTO RICO" => 'PR', "RHODE ISLAND" => 'RI', "SOUTH CAROLINA" => 'SC', "SOUTH DAKOTA" => 'SD', "TENNESSEE" => 'TN', "TEXAS" => 'TX', "UTAH" => 'UT', "VERMONT" => 'VT', "VIRGIN ISLANDS" => 'VI', "VIRGINIA" => 'VA', "WASHINGTON" => 'WA', "WEST VIRGINIA" => 'WV', "WISCONSIN" => 'WI', "WYOMING" => 'WY', ); my $newsurl ="http://headlinenews.cnn.com/QUICKNEWS/virtual/swf.headline.txt"; my $weatherurl ="http://headlinenews.cnn.com/QUICKNEWS/virtual/swf.weather.txt"; my $interurl ="http://headlinenews.cnn.com/QUICKNEWS/virtual/swf.interactive.txt"; my $interactive_ticker; my $news_ticker = retrieve_news(); my $weather_ticker = retrieve_weather(); if (param("showinteractive")!~/no/i){ $interactive_ticker= retrieve_interactive(); } my @state_weather =split /&/, $weather_ticker; my $citystate; my $lowstate; my $lowtemp; my $histate; my $hitemp; my ($URL, @lines, @entry, $content); my ($moddate); my (%headlines); write_header("CNN wrapup"); %headlines = log_news ($news_ticker); write_news( $interactive_ticker, %headlines),"

"; if (param("showweather")!~/no/i){ write_weather(@state_weather); } write_footer(); #################### # # Prints out a nice HTML header. # #################### sub write_header { my $title=shift; print header(); print start_html(-Title => $title, -BGCOLOR=>"#000000",-TEXT=>"#00FF00", -LINK=>"#33FF00", -VLINK=>"00CC00", -ALINK=>"FFFFFF"); print start_form(); # start_multipart_form() if file upload my $das_url =&self_url(); print ""; } #################### # # End of page stuff. # #################### sub write_footer { my $newsdate=get_moddate($newsurl);$newsdate= scalar localtime $newsdate; $newsdate = UnixDate($newsdate, "%m/%d/%Y %H:%M"); my $weatherdate= get_moddate($weatherurl);$weatherdate= scalar localtime $weatherdate ;$weatherdate = UnixDate($weatherdate, "%m/%d/%Y %H:%M"); my $presenttime=ParseDate("now"); print "

Headlines last updated ", $newsdate, "\n | Weather last updated ", $weatherdate, "\n | This page last updated ", UnixDate($presenttime, "%m/%d/%Y %H:%M"),"

", end_form(), end_html() } #################### # # wrapper for LWP->get() call. # #################### sub retrieve_weather { return "&" . get ($weatherurl); } #################### # # wrapper for LWP->get() call. # #################### sub retrieve_news { return get ($newsurl); } #################### # # wrapper for LWP->get() call. # #################### sub retrieve_interactive{ return get ($interurl); } #################### # # prints out the interactive and state headlines in a table # #################### sub write_news{ my ($l_inter, %l_headlines) = @_; my $statectr; my $new_tagstart; my $new_tagend; #start the table. print ''; print ''; # split up the interactive headlines. @lines = split /&intheadline\d*\=/, $l_inter; # get last modified date of the $interurl file. format nicely. $moddate = scalar localtime get_moddate ($interurl); $moddate= UnixDate($moddate, "%m/%d/%Y %H:%M"); # split the headline information URL & headline text & put into table. foreach (@lines) { my $hl_found=0; @entry = split /&inturl\d*\=/; @entry[1] =~ s/^\s//; @entry[1] =~ s/\s\s/ /g; @entry[0] =~ s/interactivecount\=\d*//; if (@entry[0]){ print "'; # sorting headlines on state, then timestamp, then headline text. PRINTHEADLINE: foreach my $thisheadline (sort {($l_headlines{$a}{"State"} cmp $l_headlines{$b}{"State"})or ($l_headlines{$b}{"Timestamp"} cmp $l_headlines{$a}{"Timestamp"}) or ($l_headlines{$a}{"Headline"} cmp $l_headlines{$b}{"Headline"}) } keys %l_headlines) { my $broken_headline ="";# $l_headlines{$thisheadline}{"Headline"}; if (UnixDate ($l_headlines{$thisheadline}{"Timestamp"}, "%m/%d/%Y") eq UnixDate (&ParseDate("Today"),"%m/%d/%Y")) { $new_tagstart='' ;$new_tagend= ''; } else { if (param ("newonly")=~/yes/i) { next PRINTHEADLINE } $new_tagstart='' ;$new_tagend= ''; } if (param("search")!~/^$/) { my $searchin = param("search"); $searchin =~ s/(\?)/\\w/gi; $searchin =~ s/(\*)/\.$1?/gi; #line noise? ha! $searchin = "\\b". $searchin . "\\b"; if (defined(param("searchmethod"))) { if (param("searchmethod")=~/showonly/ ) { if ($l_headlines{$thisheadline}{"Headline"}!~/$searchin/i) {next PRINTHEADLINE} } } $l_headlines{$thisheadline}{"Headline"}=~ s/($searchin)/$1<\/FONT>/i; } if (param("definitions")=~/yes/i){ map {$broken_headline .= "$_ " }split (/ /,$l_headlines{$thisheadline}{"Headline"}); } else { $broken_headline=$l_headlines{$thisheadline}{"Headline"} } # # Can you figure out why the anchor's printed out in the headline cell, rather than # the state or at the beginning of the row? # print "", "

Breaking news

@entry[0]<\/TD><\/TR>\n"; } } # begin next part of table. print '

Headlines from around the country

'; # print out the state HTML anchors in two lines. # note there's no checking to see if one exists or not :( foreach my $thisstate ( sort keys %state_abbr) { $statectr ++; print "", $state_abbr{$thisstate}, " "; ($statectr==26) && print "
"; } # print an HTML anchor to the Weather. print '
Weather
",$new_tagstart,$l_headlines{$thisheadline}{"State"},$new_tagend,"<\/TD>", "",$new_tagstart,$l_headlines{$thisheadline}{"Timestamp"},"<\/TD>",$new_tagend, ""," ",$new_tagstart,$broken_headline,"<\/TD>",$new_tagend, "<\/TR>\n"; } # end table. print "<\/TABLE>"; print &self_url(); } #################### # # put the weather into a table. # # #################### sub write_weather { my %cityinfo; my $ctr=0; # used for columnating. # # split into HoH format # {INDEX}{INFOTYPE}{INFODATA} # e.g. {1}{CITY}{BIRMINGHAM, AL} # {1}{TEMP}{-5} # etc... # foreach my $temp (@_) { $temp=~/([a-zA-z]*)(\d*)\=(.*)/; $cityinfo{$2}{$1}=$3; } # print out the HTML Anchor. print ""; # begin table. print "\n"; print "
Weather from all 50 states<\/TD><\/TR>\n"; print "
City Name<\/TD> Current
Conditions<\/TD>
Current
Temperature<\/TD>", "
City Name<\/TD> Current
Conditions<\/TD>
Current
Temperature<\/TD>", "
City Name<\/TD> Current
Conditions<\/TD>
Current
Temperature<\/TD>\n"; $ctr = 0; foreach my $thiscity (sort { $cityinfo{$a}{"city"}cmp $cityinfo{$b}{"city"}} keys %cityinfo ) { if ($cityinfo{$thiscity}{"weatherurl"} =~m#http://www.cnn.com/WEATHER/(c)(.)/#i) { $citystate=", Canada" } else { # use the second saved part of the regexp to populate the state. or district. # stupid districts. $cityinfo{$thiscity}{"weatherurl" } =~m#http://www.cnn.com/WEATHER/(..)/(.*)/#i; $citystate= ", $2 "; } # most of the time, the picture for a weather condition is just the # condition name, stripped of whitespace, plus '.GIF'. $cityinfo{$thiscity}{"weatherpic"} = $cityinfo{$thiscity}{"condition"}. ".gif"; $cityinfo{$thiscity}{"weatherpic"} =~ s/[ |\/]/./g; # but not always. stupid special exceptions. $cityinfo{$thiscity}{"weatherpic"} =~ s/haze/hazy/g; $cityinfo{$thiscity}{"weatherpic"} =~ s/foggy/fog/g; $cityinfo{$thiscity}{"weatherpic"} =~ s/lgt\.snow\.fog/snow.fog/g; $cityinfo{$thiscity}{"weatherpic"} =~ s/snow\.and\.fog/snow.fog/g; $cityinfo{$thiscity}{"weatherpic"} =~ s/frz\.rain/freezing.rain/g; $cityinfo{$thiscity}{"weatherpic"} =~ s/mist/misty/g; $cityinfo{$thiscity}{"weatherpic"} =~ s/lgt.snowshower/light.snow/g; # three columns to a row. if (!(($ctr-1) % 3)) {print "<\/TR>"} # print out one row of data. $cityinfo{$thiscity}{weatherurl} && print "
", $cityinfo{$thiscity}{"city"} ,"$citystate<\/A><\/TD>", "" , $cityinfo{$thiscity}{"condition"}, "<\/TD>", "<\/TD>", "" , $cityinfo{$thiscity}{"temperature"}, "<\/TD>"; print "\n"; # log information for highest/ lowest temps. if (($cityinfo{$thiscity}{"temperature"} < $lowtemp )or ($lowtemp==undef)){ $lowstate = "". $cityinfo{$thiscity}{"city"} ."$citystate<\/A>"; $lowtemp = $cityinfo{$thiscity}{"temperature"} } if ($cityinfo{$thiscity}{"temperature"} > $hitemp){ $histate = "". $cityinfo{$thiscity}{"city"} ."$citystate<\/A>"; $hitemp = $cityinfo{$thiscity}{"temperature"} ; } $ctr++; } # reaching the end of the table. # print out highest & lowest of the moment. print "
Extremes of the moment!<\/TD><\/TR>\n"; print "
High<\/TD>$histate<\/TD>$hitemp<\/TD>Lo<\/TD>$lowstate<\/TD>$lowtemp<\/TD><\/TR>\n"; print "<\/TABLE>\n"; } #################### # # checks for new news & writes it to a headline file. # #################### sub log_news { my %l_data; my @lines; my $hl_found; # # no headlines? that's OK! # (no die statement) open HL, "headlines.txt"; @lines = ; # headlines.txt format : state\ttimestamp\theadline close HL; # # start by reading all of the existing headlines from "headlines.txt"; # foreach (@lines) { @entry = split /\t/; my $nextkey = scalar keys %l_data; if (@entry[0]) { $l_data{$nextkey}{"State"}=@entry[0]; $l_data{$nextkey}{"Timestamp"}=@entry[1]; $l_data{$nextkey}{"Headline"}=@entry[2]; chomp $l_data{$nextkey}{"Headline"}; } } # # OK, read in the new headlines, and if they don't match any existing ones, # read em into the hash. # @lines = split /&state\d*\=/, shift; $moddate = scalar localtime get_moddate ($newsurl); $moddate= UnixDate($moddate, "%m/%d/%Y %H:%M"); foreach (@lines) { $hl_found=0; @entry = split /&headline\d*\=/; @entry[1] =~ s/&headlineload\=done//; @entry[1]=~s/^\s//; @entry[1]=~s/\s\s/ /g; @entry[0] =~ s/headlinecount\=\d*//; @entry[0]= trim(@entry[0]); my $nextkey = scalar keys %l_data; foreach my $thisheadline (keys %l_data) { if ($l_data{$thisheadline}{"Headline"} eq @entry[1]){$hl_found=1}; last if $hl_found; } if ((not $hl_found) && (@entry[0])){ $l_data{$nextkey}{"State"}=@entry[0]; $l_data{$nextkey}{"Timestamp"}=$moddate ; $l_data{$nextkey}{"Headline"}=@entry[1]; } } # Now that all that's been done, # sort & write all the headlines to "headlines.txt". open HL, ">headlines.txt"; foreach my $thisheadline (sort {($l_data{$a}{"State"} cmp $l_data{$b}{"State"})or ($l_data{$a}{"Timestamp"} cmp $l_data{$b}{"Timestamp"}) or ($l_data{$a}{"Headline"} cmp $l_data{$b}{"Headline"}) } keys %l_data){ print (HL $l_data{$thisheadline}{"State"},"\t"); print (HL $l_data{$thisheadline}{"Timestamp"},"\t"); print (HL $l_data{$thisheadline}{"Headline"},"\n"); } close HL; return %l_data; } #################### # # wrapper for LWP::Useragent code. # returns last modified date of a url. # in : the url to date # out : last modified date, or 0 if unable to determine date # #################### sub get_moddate { my $ans; my $lurl=shift; my $ua = LWP::UserAgent->new(); $ans = $ua->request(HTTP::Request->new("HEAD", $lurl)); if ($ans->is_success) { return $ans->last_modified || 0 ; } else { print STDERR "$lurl: Error [", $ans->code, "] ", $ans->message, "!\n"; } } #################### # # emulates trim function found # in other languages. Thank you, cookbook! :) # in : a string # out : a string *cough* # #################### sub trim { my @out = @_; for (@out) { s/^\s+//; s/\s+$//; } return wantarray ? @out : $out[0]; }