1: #This was originally for jcwren's programming contest, 
   2: #but after suggestions from friends, it really doesn't 
   3: #match the critera anymore, so I didn't feel comfortable
   4: #posting in under the [id://43857|original thread], but
   5: #I'm still sufficiently happy with it that I wanted to 
   6: #post it. This is the first time I've used LWP, and the 
   7: #contest was a great way to learn about a great, versatile module.<br>
   8: 
   9: #This script reads in CNNi's headlines, the "interactive headlines" (linked to larger write ups), and weather.
  10: 
  11: #As always, I look forward to your comments, criticisms and suggestions.
  12: 
  13: #updated 20000108 -- added some parameters and self_url() trickery.
  14: # see lines 101 and 201-237
  15: <code>
  16: use strict;
  17: use LWP::Simple;
  18: use LWP::UserAgent;
  19: use CGI::Pretty qw(:all);
  20: use Date::Manip; &Date_Init ("TZ=US/Mountain");
  21: my %state_abbr = (
  22:     "ALABAMA"               =>      'AL',
  23:     "ALASKA"                =>      'AK',
  24:     "ARIZONA"               =>      'AZ',
  25:     "ARKANSAS"              =>      'AR',
  26:     "CALIFORNIA"            =>      'CA',
  27:     "COLORADO"              =>      'CO',
  28:     "CONNECTICUT"           =>      'CT',
  29:     "DELAWARE"              =>      'DE',
  30:     "D.C."                  =>      'DC',
  31:     "FLORIDA"               =>      'FL',
  32:     "GEORGIA"               =>      'GA',
  33:     "HAWAII"                =>      'HI',
  34:     "IDAHO"                 =>      'ID',
  35:     "ILLINOIS"              =>      'IL',
  36:     "INDIANA"               =>      'IN',
  37:     "IOWA"                  =>      'IA',
  38:     "KANSAS"                =>      'KS',
  39:     "KENTUCKY"              =>      'KY',
  40:     "LOUISIANA"             =>      'LA',
  41:     "MAINE"                 =>      'ME',
  42:     "MARYLAND"              =>      'MD',
  43:     "MASSACHUSETTS"         =>      'MA',
  44:     "MICHIGAN"              =>      'MI',
  45:     "MINNESOTA"             =>      'MN',
  46:     "MISSISSIPPI"           =>      'MS',
  47:     "MISSOURI"              =>      'MO',
  48:     "MONTANA"               =>      'MT',
  49:     "NEBRASKA"              =>      'NE',
  50:     "NEVADA"                =>      'NV',
  51:     "NEW HAMPSHIRE"         =>      'NH',
  52:     "NEW JERSEY"            =>      'NJ',
  53:     "NEW MEXICO"            =>      'NM',
  54:     "NEW YORK"              =>      'NY',
  55:     "NORTH CAROLINA"        =>      'NC',
  56:     "NORTH DAKOTA"          =>      'ND',
  57:     "OHIO"                  =>      'OH',
  58:     "OKLAHOMA"              =>      'OK',
  59:     "OREGON"                =>      'OR',
  60:     "PENNSYLVANIA"          =>      'PA',
  61:     "PUERTO RICO"           =>      'PR',
  62:     "RHODE ISLAND"          =>      'RI',
  63:     "SOUTH CAROLINA"        =>      'SC',
  64:     "SOUTH DAKOTA"          =>      'SD',
  65:     "TENNESSEE"             =>      'TN',
  66:     "TEXAS"                 =>      'TX',
  67:     "UTAH"                  =>      'UT',
  68:     "VERMONT"               =>      'VT',
  69:     "VIRGIN ISLANDS"        =>      'VI',
  70:     "VIRGINIA"              =>      'VA',
  71:     "WASHINGTON"            =>      'WA',
  72:     "WEST VIRGINIA"         =>      'WV',
  73:     "WISCONSIN"             =>      'WI',
  74:     "WYOMING"               =>      'WY',
  75:     );
  76: 
  77: 
  78: 
  79: my $newsurl     ="http://headlinenews.cnn.com/QUICKNEWS/virtual/swf.headline.txt";
  80: my $weatherurl  ="http://headlinenews.cnn.com/QUICKNEWS/virtual/swf.weather.txt";
  81: my $interurl    ="http://headlinenews.cnn.com/QUICKNEWS/virtual/swf.interactive.txt";
  82: my $interactive_ticker;
  83: my $news_ticker = retrieve_news();
  84: my $weather_ticker = retrieve_weather();
  85: if (param("showinteractive")!~/no/i){
  86:     $interactive_ticker= retrieve_interactive();
  87: }
  88: my @state_weather =split /&/, $weather_ticker;
  89: 
  90: my $citystate;
  91: my $lowstate; my $lowtemp;
  92: my $histate; my  $hitemp;
  93: 
  94: my ($URL, @lines, @entry, $content);
  95: my ($moddate);
  96: my (%headlines);
  97: 
  98: write_header("CNN wrapup");
  99: %headlines = log_news ($news_ticker);
 100: write_news( $interactive_ticker, %headlines),"<P>";
 101: if (param("showweather")!~/no/i){
 102:     write_weather(@state_weather);
 103: }
 104: write_footer();
 105: 
 106: ####################
 107: #
 108: #   Prints out a nice HTML header.
 109: #
 110: ####################
 111: sub write_header {
 112:     my $title=shift;
 113:     print header();
 114:     print start_html(-Title => $title, -BGCOLOR=>"#000000",-TEXT=>"#00FF00", -LINK=>"#33FF00", -VLINK=>"00CC00", -ALINK=>"FFFFFF");
 115:     print start_form(); # start_multipart_form() if file upload
 116:     my $das_url =&self_url();
 117:     print "<META HTTP-EQUIV='Refresh' CONTENT='600;URL=$das_url'>";
 118: }
 119: 
 120: ####################
 121: #
 122: #   End of page stuff.
 123: #
 124: ####################
 125: sub write_footer {
 126:     my $newsdate=get_moddate($newsurl);$newsdate= scalar localtime $newsdate; $newsdate = UnixDate($newsdate, "%m/%d/%Y %H:%M");
 127:     my $weatherdate= get_moddate($weatherurl);$weatherdate= scalar localtime $weatherdate ;$weatherdate = UnixDate($weatherdate, "%m/%d/%Y %H:%M");
 128:     my $presenttime=ParseDate("now");
 129:     print "<center><p><A HREF='$newsurl'>Headlines</A> last updated ", $newsdate,
 130:     "\n | <A HREF='$weatherurl'>Weather</A> last updated ", $weatherdate,
 131:     "\n | This page last updated ", UnixDate($presenttime, "%m/%d/%Y %H:%M"),"</CENTER>",
 132:     end_form(), end_html() }
 133: 
 134: ####################
 135: #
 136: #   wrapper for LWP->get() call.
 137: #
 138: ####################
 139: sub retrieve_weather {
 140:     return "&" . get ($weatherurl);
 141: }
 142: 
 143: ####################
 144: #
 145: #   wrapper for LWP->get() call.
 146: #
 147: ####################
 148: sub retrieve_news {
 149:     return get ($newsurl);
 150: }
 151: 
 152: ####################
 153: #
 154: #   wrapper for LWP->get() call.
 155: #
 156: ####################
 157: sub retrieve_interactive{
 158:     return get ($interurl);
 159: }
 160: 
 161: ####################
 162: #
 163: #   prints out the interactive and state headlines in a table
 164: #
 165: ####################
 166: 
 167: sub write_news{
 168:     my ($l_inter, %l_headlines) = @_;
 169:     my $statectr;
 170:     my $new_tagstart;
 171:     my $new_tagend;
 172: 
 173:     #start the table.
 174:     print '<TABLE  align=CENTER BORDER="1">';
 175:     print '<TR><TD ALIGN=CENTER colspan=3 valign=center><h4> Breaking news</h4></TD></TR>';
 176: 
 177:     # split up the interactive headlines.
 178:     @lines = split /&intheadline\d*\=/, $l_inter;
 179: 
 180:     # get last modified date of the $interurl file. format nicely.
 181:     $moddate = scalar localtime get_moddate ($interurl);
 182:     $moddate= UnixDate($moddate, "%m/%d/%Y %H:%M");
 183: 
 184:     # split the headline information URL & headline text & put into table.
 185:     foreach (@lines) {
 186:         my $hl_found=0;
 187:         @entry = split /&inturl\d*\=/;
 188:         @entry[1] =~ s/^\s//;
 189:         @entry[1] =~ s/\s\s/ /g;
 190:         @entry[0] =~ s/interactivecount\=\d*//;
 191:         if (@entry[0]){
 192:             print "<TR><TD colspan =3 align=center><A HREF=http:\/\/headlinenews.cnn.com@entry[1] target=new>@entry[0]</a><\/TD><\/TR>\n";
 193:         }
 194:     }
 195: 
 196: 
 197:     # begin next part of table.
 198:     print '<TR><TD colspan=3 align=center><h4>Headlines from around the country</h4>';
 199: 
 200:     # print out the state HTML anchors in two lines.
 201:     # note there's no checking to see if one exists or not :(
 202:     foreach my $thisstate ( sort keys %state_abbr) {
 203:         $statectr ++;
 204:         print "<A HREF=#",$state_abbr{$thisstate},">", $state_abbr{$thisstate}, "</a>  ";
 205:         ($statectr==26) && print "<BR>";
 206:     }
 207:     # print an HTML anchor to the Weather.
 208:     print '<BR><A HREF=#WEATHER>Weather</A></TD></TR>';
 209: 
 210:     # sorting headlines on state, then timestamp, then headline text.
 211:     PRINTHEADLINE: foreach my $thisheadline (sort {($l_headlines{$a}{"State"} cmp $l_headlines{$b}{"State"})or
 212:                                     ($l_headlines{$b}{"Timestamp"} cmp $l_headlines{$a}{"Timestamp"}) or
 213:                                     ($l_headlines{$a}{"Headline"} cmp $l_headlines{$b}{"Headline"}) } keys %l_headlines) {
 214:         my $broken_headline ="";# $l_headlines{$thisheadline}{"Headline"};
 215: 
 216:         if (UnixDate ($l_headlines{$thisheadline}{"Timestamp"}, "%m/%d/%Y") eq UnixDate (&ParseDate("Today"),"%m/%d/%Y")) {
 217:             $new_tagstart='<FONT COLOR="#FFFFFF">' ;$new_tagend= '</FONT>';
 218:         } else {
 219:             if (param ("newonly")=~/yes/i) {
 220:                 next PRINTHEADLINE
 221:             }
 222:             $new_tagstart='' ;$new_tagend= '';
 223:         }
 224: 
 225:         if (param("search")!~/^$/) {
 226:             my $searchin = param("search");
 227:             $searchin =~ s/(\?)/\\w/gi;
 228:             $searchin =~ s/(\*)/\.$1?/gi;   #line noise? ha!
 229:             $searchin = "\\b". $searchin . "\\b";
 230:             if (defined(param("searchmethod"))) {
 231:                 if (param("searchmethod")=~/showonly/ ) {
 232:                     if ($l_headlines{$thisheadline}{"Headline"}!~/$searchin/i) {next PRINTHEADLINE}
 233:                 }
 234:             }
 235:             $l_headlines{$thisheadline}{"Headline"}=~ s/($searchin)/<FONT COLOR="#FF0000">$1<\/FONT>/i;
 236:         }
 237: 
 238:         if (param("definitions")=~/yes/i){
 239:             map {$broken_headline .= "<A HREF='http://www.m-w.com/cgi-bin/dictionary?va=$_'>$_</A> " }split (/ /,$l_headlines{$thisheadline}{"Headline"});
 240:         } else {
 241:             $broken_headline=$l_headlines{$thisheadline}{"Headline"}
 242:         }
 243: 
 244:         #
 245:         # Can you figure out why the anchor's printed out in the headline cell, rather than
 246:         # the state or at the beginning of the row?
 247:         #
 248:         print "<TR>",
 249:                 "<TD>",$new_tagstart,$l_headlines{$thisheadline}{"State"},$new_tagend,"<\/TD>",
 250:                 "<TD>",$new_tagstart,$l_headlines{$thisheadline}{"Timestamp"},"<\/TD>",$new_tagend,
 251:                 "<TD>","<A NAME='\#",$new_tagstart,$state_abbr{$l_headlines{$thisheadline}{"State"}},"'>  ",$new_tagstart,$broken_headline,"<\/TD>",$new_tagend,
 252:               "<\/TR>\n";
 253:     }
 254:     # end table.
 255:     print "<\/TABLE>";
 256:     print &self_url();
 257: }
 258: ####################
 259: #
 260: #   put the weather into a table.
 261: #
 262: #
 263: ####################
 264: sub write_weather {
 265: my %cityinfo;
 266: my $ctr=0; # used for columnating.
 267:     #
 268:     #   split into  HoH format
 269:     #   {INDEX}{INFOTYPE}{INFODATA}
 270:     #   e.g. {1}{CITY}{BIRMINGHAM, AL}
 271:     #        {1}{TEMP}{-5}
 272:     #        etc...
 273:     #
 274:     foreach my $temp (@_) {
 275:         $temp=~/([a-zA-z]*)(\d*)\=(.*)/;
 276:         $cityinfo{$2}{$1}=$3;
 277:     }
 278: 
 279:     # print out the HTML Anchor.
 280:     print "<A NAME=WEATHER>";
 281: 
 282:     # begin table.
 283:     print "<TABLE align=CENTER border=1>\n";
 284:     print "<TR><TD colspan=12 align=center>Weather from all 50 states<\/TD><\/TR>\n";
 285:     print "<TR><TD>City Name<\/TD><TD  colspan=2> Current<br>Conditions<\/TD><TD>Current<br>Temperature<\/TD>",
 286:               "<TD>City Name<\/TD><TD  colspan=2> Current<br>Conditions<\/TD><TD>Current<br>Temperature<\/TD>",
 287:               "<TD>City Name<\/TD><TD  colspan=2> Current<br>Conditions<\/TD><TD>Current<br>Temperature<\/TD>\n";
 288: 
 289:     $ctr = 0;
 290:     foreach  my $thiscity (sort { $cityinfo{$a}{"city"}cmp $cityinfo{$b}{"city"}} keys  %cityinfo ) {
 291:         if ($cityinfo{$thiscity}{"weatherurl"} =~m#http://www.cnn.com/WEATHER/(c)(.)/#i) {
 292:             $citystate=",  Canada"
 293:         } else {
 294:             # use the second saved part of the regexp to populate the state. or district.
 295:             # stupid districts.
 296:             $cityinfo{$thiscity}{"weatherurl" } =~m#http://www.cnn.com/WEATHER/(..)/(.*)/#i;
 297:             $citystate= ",  $2 ";
 298:         }
 299:         # most of the time, the picture for a weather condition is just the
 300:         # condition name, stripped of whitespace, plus '.GIF'.
 301:         $cityinfo{$thiscity}{"weatherpic"} =  $cityinfo{$thiscity}{"condition"}.  ".gif";
 302:         $cityinfo{$thiscity}{"weatherpic"} =~ s/[  |\/]/./g;
 303: 
 304:         # but not always. stupid special exceptions.
 305:         $cityinfo{$thiscity}{"weatherpic"} =~  s/haze/hazy/g;
 306:         $cityinfo{$thiscity}{"weatherpic"} =~  s/foggy/fog/g;
 307:         $cityinfo{$thiscity}{"weatherpic"} =~  s/lgt\.snow\.fog/snow.fog/g;
 308:         $cityinfo{$thiscity}{"weatherpic"} =~  s/snow\.and\.fog/snow.fog/g;
 309:         $cityinfo{$thiscity}{"weatherpic"} =~  s/frz\.rain/freezing.rain/g;
 310:         $cityinfo{$thiscity}{"weatherpic"} =~  s/mist/misty/g;
 311:         $cityinfo{$thiscity}{"weatherpic"} =~  s/lgt.snowshower/light.snow/g;
 312: 
 313: 
 314:         # three columns to a row.
 315:         if (!(($ctr-1) % 3))  {print "<\/TR>"}
 316: 
 317:         # print out one row of data.
 318:         $cityinfo{$thiscity}{weatherurl}  && print "<TD><A HREF='", $cityinfo{$thiscity}{weatherurl},"' target=new>",  $cityinfo{$thiscity}{"city"}  ,"$citystate<\/A><\/TD>",
 319:                               "<TD ALIGN='justify'>" ,   $cityinfo{$thiscity}{"condition"},
 320:                              "<\/TD><TD><IMG  SRC='http://www.cnn.com/WEATHER/images/9712/conds/" ,  $cityinfo{$thiscity}{"weatherpic"},"'>",
 321:                               "<\/TD>",
 322:                              "<TD  ALIGN='CENTER'>" ,    $cityinfo{$thiscity}{"temperature"},  "<\/TD>";
 323:         print "\n";
 324: 
 325:         # log information for highest/ lowest temps.
 326:         if  (($cityinfo{$thiscity}{"temperature"} < $lowtemp )or  ($lowtemp==undef)){
 327:             $lowstate =  "<A HREF='".  $cityinfo{$thiscity}{weatherurl}."' target=new>".  $cityinfo{$thiscity}{"city"}  ."$citystate<\/A>";
 328:             $lowtemp =  $cityinfo{$thiscity}{"temperature"}
 329:         }
 330:         if  ($cityinfo{$thiscity}{"temperature"} > $hitemp){
 331:             $histate =   "<A HREF='".  $cityinfo{$thiscity}{weatherurl}."' target=new>".  $cityinfo{$thiscity}{"city"}  ."$citystate<\/A>";
 332:             $hitemp =  $cityinfo{$thiscity}{"temperature"} ;
 333:         }
 334:         $ctr++;
 335:     }
 336:     # reaching the end of the table.
 337:     # print out highest & lowest of the moment.
 338:     print  "<TR><TD colspan=12 align=center>Extremes of the moment!<\/TD><\/TR>\n";
 339:     print  "<TR><TD>High<\/TD><TD  colspan=4>$histate<\/TD><TD>$hitemp<\/TD><TD>Lo<\/TD><TD  colspan=4>$lowstate<\/TD><TD>$lowtemp<\/TD><\/TR>\n";
 340:     print  "<\/TABLE>\n";
 341: }
 342: 
 343: ####################
 344: #
 345: #   checks for new news & writes it to a headline file.
 346: #
 347: ####################
 348: sub log_news {
 349:     my %l_data;
 350:     my @lines;
 351:     my $hl_found;
 352:     #
 353:     # no headlines? that's OK!
 354:     # (no die statement)
 355:     open HL, "headlines.txt";
 356:     @lines = <HL> ; # headlines.txt format : state\ttimestamp\theadline
 357:     close HL;
 358: 
 359:     #
 360:     #   start by reading all of the existing headlines from "headlines.txt";
 361:     #
 362:     foreach (@lines) {
 363:         @entry = split /\t/;
 364:         my $nextkey = scalar keys %l_data;
 365:         if (@entry[0]) {
 366:             $l_data{$nextkey}{"State"}=@entry[0];
 367:             $l_data{$nextkey}{"Timestamp"}=@entry[1];
 368:             $l_data{$nextkey}{"Headline"}=@entry[2];
 369:             chomp $l_data{$nextkey}{"Headline"};
 370:         }
 371:     }
 372: 
 373:     #
 374:     #   OK, read in the new headlines, and if they don't match any existing ones,
 375:     #   read em into the hash.
 376:     #
 377:     @lines = split /&state\d*\=/, shift;
 378:     $moddate = scalar localtime get_moddate ($newsurl);
 379:     $moddate= UnixDate($moddate, "%m/%d/%Y %H:%M");
 380: 
 381:     foreach (@lines) {
 382:         $hl_found=0;
 383:         @entry = split /&headline\d*\=/;
 384:         @entry[1] =~ s/&headlineload\=done//;
 385:         @entry[1]=~s/^\s//;
 386:         @entry[1]=~s/\s\s/ /g;
 387:         @entry[0] =~ s/headlinecount\=\d*//;
 388:         @entry[0]= trim(@entry[0]);
 389:         my $nextkey = scalar keys %l_data;
 390:         foreach my $thisheadline (keys %l_data) {
 391:             if ($l_data{$thisheadline}{"Headline"} eq @entry[1]){$hl_found=1};
 392:             last if $hl_found;
 393:         }
 394:         if ((not $hl_found) && (@entry[0])){
 395:             $l_data{$nextkey}{"State"}=@entry[0];
 396:             $l_data{$nextkey}{"Timestamp"}=$moddate ;
 397:             $l_data{$nextkey}{"Headline"}=@entry[1];
 398:         }
 399:     }
 400: 
 401: 
 402:     # Now that all that's been done,
 403:     # sort & write all the headlines to "headlines.txt".
 404:     open HL, ">headlines.txt";
 405:     foreach my $thisheadline (sort {($l_data{$a}{"State"} cmp $l_data{$b}{"State"})or
 406:                                     ($l_data{$a}{"Timestamp"} cmp $l_data{$b}{"Timestamp"}) or
 407:                                     ($l_data{$a}{"Headline"} cmp $l_data{$b}{"Headline"}) } keys %l_data){
 408:         print (HL $l_data{$thisheadline}{"State"},"\t");
 409:         print (HL $l_data{$thisheadline}{"Timestamp"},"\t");
 410:         print  (HL $l_data{$thisheadline}{"Headline"},"\n");
 411:     }
 412:     close HL;
 413:     return %l_data;
 414: }
 415: 
 416: 
 417: ####################
 418: #
 419: #   wrapper for LWP::Useragent code.
 420: #   returns last modified date of a url.
 421: #   in : the url to date
 422: #   out : last modified date, or 0 if unable to determine date
 423: #
 424: ####################
 425: sub get_moddate {
 426:     my $ans;
 427:     my $lurl=shift;
 428:     my $ua = LWP::UserAgent->new();
 429: 
 430:     $ans = $ua->request(HTTP::Request->new("HEAD", $lurl));
 431:     if ($ans->is_success) {
 432:         return $ans->last_modified || 0 ;
 433:     } else {
 434:         print STDERR "$lurl: Error [", $ans->code, "] ", $ans->message, "!\n";
 435:     }
 436: }
 437: 
 438: ####################
 439: #
 440: # emulates trim function found
 441: # in other languages. Thank you, cookbook! :)
 442: # in : a string
 443: # out : a string *cough*
 444: #
 445: ####################
 446: sub trim {
 447: 
 448:     my @out = @_;
 449:     for (@out) {
 450:         s/^\s+//;
 451:         s/\s+$//;
 452:     }
 453:     return wantarray ? @out : $out[0];
 454: }
 455: </code>