0: #This was originally for jcwren's programming contest,
1: #but after suggestions from friends, it really doesn't
2: #match the critera anymore, so I didn't feel comfortable
3: #posting in under the [id://43857|original thread], but
4: #I'm still sufficiently happy with it that I wanted to
5: #post it. This is the first time I've used LWP, and the
6: #contest was a great way to learn about a great, versatile module.<br>
7:
8: #This script reads in CNNi's headlines, the "interactive headlines" (linked to larger write ups), and weather.
9:
10: #As always, I look forward to your comments, criticisms and suggestions.
11:
12: #updated 20000108 -- added some parameters and self_url() trickery.
13: # see lines 101 and 201-237
14: <code>
15: use strict;
16: use LWP::Simple;
17: use LWP::UserAgent;
18: use CGI::Pretty qw(:all);
19: use Date::Manip; &Date_Init ("TZ=US/Mountain");
20: my %state_abbr = (
21: "ALABAMA" => 'AL',
22: "ALASKA" => 'AK',
23: "ARIZONA" => 'AZ',
24: "ARKANSAS" => 'AR',
25: "CALIFORNIA" => 'CA',
26: "COLORADO" => 'CO',
27: "CONNECTICUT" => 'CT',
28: "DELAWARE" => 'DE',
29: "D.C." => 'DC',
30: "FLORIDA" => 'FL',
31: "GEORGIA" => 'GA',
32: "HAWAII" => 'HI',
33: "IDAHO" => 'ID',
34: "ILLINOIS" => 'IL',
35: "INDIANA" => 'IN',
36: "IOWA" => 'IA',
37: "KANSAS" => 'KS',
38: "KENTUCKY" => 'KY',
39: "LOUISIANA" => 'LA',
40: "MAINE" => 'ME',
41: "MARYLAND" => 'MD',
42: "MASSACHUSETTS" => 'MA',
43: "MICHIGAN" => 'MI',
44: "MINNESOTA" => 'MN',
45: "MISSISSIPPI" => 'MS',
46: "MISSOURI" => 'MO',
47: "MONTANA" => 'MT',
48: "NEBRASKA" => 'NE',
49: "NEVADA" => 'NV',
50: "NEW HAMPSHIRE" => 'NH',
51: "NEW JERSEY" => 'NJ',
52: "NEW MEXICO" => 'NM',
53: "NEW YORK" => 'NY',
54: "NORTH CAROLINA" => 'NC',
55: "NORTH DAKOTA" => 'ND',
56: "OHIO" => 'OH',
57: "OKLAHOMA" => 'OK',
58: "OREGON" => 'OR',
59: "PENNSYLVANIA" => 'PA',
60: "PUERTO RICO" => 'PR',
61: "RHODE ISLAND" => 'RI',
62: "SOUTH CAROLINA" => 'SC',
63: "SOUTH DAKOTA" => 'SD',
64: "TENNESSEE" => 'TN',
65: "TEXAS" => 'TX',
66: "UTAH" => 'UT',
67: "VERMONT" => 'VT',
68: "VIRGIN ISLANDS" => 'VI',
69: "VIRGINIA" => 'VA',
70: "WASHINGTON" => 'WA',
71: "WEST VIRGINIA" => 'WV',
72: "WISCONSIN" => 'WI',
73: "WYOMING" => 'WY',
74: );
75:
76:
77:
78: my $newsurl ="http://headlinenews.cnn.com/QUICKNEWS/virtual/swf.headline.txt";
79: my $weatherurl ="http://headlinenews.cnn.com/QUICKNEWS/virtual/swf.weather.txt";
80: my $interurl ="http://headlinenews.cnn.com/QUICKNEWS/virtual/swf.interactive.txt";
81: my $interactive_ticker;
82: my $news_ticker = retrieve_news();
83: my $weather_ticker = retrieve_weather();
84: if (param("showinteractive")!~/no/i){
85: $interactive_ticker= retrieve_interactive();
86: }
87: my @state_weather =split /&/, $weather_ticker;
88:
89: my $citystate;
90: my $lowstate; my $lowtemp;
91: my $histate; my $hitemp;
92:
93: my ($URL, @lines, @entry, $content);
94: my ($moddate);
95: my (%headlines);
96:
97: write_header("CNN wrapup");
98: %headlines = log_news ($news_ticker);
99: write_news( $interactive_ticker, %headlines),"<P>";
100: if (param("showweather")!~/no/i){
101: write_weather(@state_weather);
102: }
103: write_footer();
104:
105: ####################
106: #
107: # Prints out a nice HTML header.
108: #
109: ####################
110: sub write_header {
111: my $title=shift;
112: print header();
113: print start_html(-Title => $title, -BGCOLOR=>"#000000",-TEXT=>"#00FF00", -LINK=>"#33FF00", -VLINK=>"00CC00", -ALINK=>"FFFFFF");
114: print start_form(); # start_multipart_form() if file upload
115: my $das_url =&self_url();
116: print "<META HTTP-EQUIV='Refresh' CONTENT='600;URL=$das_url'>";
117: }
118:
119: ####################
120: #
121: # End of page stuff.
122: #
123: ####################
124: sub write_footer {
125: my $newsdate=get_moddate($newsurl);$newsdate= scalar localtime $newsdate; $newsdate = UnixDate($newsdate, "%m/%d/%Y %H:%M");
126: my $weatherdate= get_moddate($weatherurl);$weatherdate= scalar localtime $weatherdate ;$weatherdate = UnixDate($weatherdate, "%m/%d/%Y %H:%M");
127: my $presenttime=ParseDate("now");
128: print "<center><p><A HREF='$newsurl'>Headlines</A> last updated ", $newsdate,
129: "\n | <A HREF='$weatherurl'>Weather</A> last updated ", $weatherdate,
130: "\n | This page last updated ", UnixDate($presenttime, "%m/%d/%Y %H:%M"),"</CENTER>",
131: end_form(), end_html() }
132:
133: ####################
134: #
135: # wrapper for LWP->get() call.
136: #
137: ####################
138: sub retrieve_weather {
139: return "&" . get ($weatherurl);
140: }
141:
142: ####################
143: #
144: # wrapper for LWP->get() call.
145: #
146: ####################
147: sub retrieve_news {
148: return get ($newsurl);
149: }
150:
151: ####################
152: #
153: # wrapper for LWP->get() call.
154: #
155: ####################
156: sub retrieve_interactive{
157: return get ($interurl);
158: }
159:
160: ####################
161: #
162: # prints out the interactive and state headlines in a table
163: #
164: ####################
165:
166: sub write_news{
167: my ($l_inter, %l_headlines) = @_;
168: my $statectr;
169: my $new_tagstart;
170: my $new_tagend;
171:
172: #start the table.
173: print '<TABLE align=CENTER BORDER="1">';
174: print '<TR><TD ALIGN=CENTER colspan=3 valign=center><h4> Breaking news</h4></TD></TR>';
175:
176: # split up the interactive headlines.
177: @lines = split /&intheadline\d*\=/, $l_inter;
178:
179: # get last modified date of the $interurl file. format nicely.
180: $moddate = scalar localtime get_moddate ($interurl);
181: $moddate= UnixDate($moddate, "%m/%d/%Y %H:%M");
182:
183: # split the headline information URL & headline text & put into table.
184: foreach (@lines) {
185: my $hl_found=0;
186: @entry = split /&inturl\d*\=/;
187: @entry[1] =~ s/^\s//;
188: @entry[1] =~ s/\s\s/ /g;
189: @entry[0] =~ s/interactivecount\=\d*//;
190: if (@entry[0]){
191: print "<TR><TD colspan =3 align=center><A HREF=http:\/\/headlinenews.cnn.com@entry[1] target=new>@entry[0]</a><\/TD><\/TR>\n";
192: }
193: }
194:
195:
196: # begin next part of table.
197: print '<TR><TD colspan=3 align=center><h4>Headlines from around the country</h4>';
198:
199: # print out the state HTML anchors in two lines.
200: # note there's no checking to see if one exists or not :(
201: foreach my $thisstate ( sort keys %state_abbr) {
202: $statectr ++;
203: print "<A HREF=#",$state_abbr{$thisstate},">", $state_abbr{$thisstate}, "</a> ";
204: ($statectr==26) && print "<BR>";
205: }
206: # print an HTML anchor to the Weather.
207: print '<BR><A HREF=#WEATHER>Weather</A></TD></TR>';
208:
209: # sorting headlines on state, then timestamp, then headline text.
210: PRINTHEADLINE: foreach my $thisheadline (sort {($l_headlines{$a}{"State"} cmp $l_headlines{$b}{"State"})or
211: ($l_headlines{$b}{"Timestamp"} cmp $l_headlines{$a}{"Timestamp"}) or
212: ($l_headlines{$a}{"Headline"} cmp $l_headlines{$b}{"Headline"}) } keys %l_headlines) {
213: my $broken_headline ="";# $l_headlines{$thisheadline}{"Headline"};
214:
215: if (UnixDate ($l_headlines{$thisheadline}{"Timestamp"}, "%m/%d/%Y") eq UnixDate (&ParseDate("Today"),"%m/%d/%Y")) {
216: $new_tagstart='<FONT COLOR="#FFFFFF">' ;$new_tagend= '</FONT>';
217: } else {
218: if (param ("newonly")=~/yes/i) {
219: next PRINTHEADLINE
220: }
221: $new_tagstart='' ;$new_tagend= '';
222: }
223:
224: if (param("search")!~/^$/) {
225: my $searchin = param("search");
226: $searchin =~ s/(\?)/\\w/gi;
227: $searchin =~ s/(\*)/\.$1?/gi; #line noise? ha!
228: $searchin = "\\b". $searchin . "\\b";
229: if (defined(param("searchmethod"))) {
230: if (param("searchmethod")=~/showonly/ ) {
231: if ($l_headlines{$thisheadline}{"Headline"}!~/$searchin/i) {next PRINTHEADLINE}
232: }
233: }
234: $l_headlines{$thisheadline}{"Headline"}=~ s/($searchin)/<FONT COLOR="#FF0000">$1<\/FONT>/i;
235: }
236:
237: if (param("definitions")=~/yes/i){
238: map {$broken_headline .= "<A HREF='http://www.m-w.com/cgi-bin/dictionary?va=$_'>$_</A> " }split (/ /,$l_headlines{$thisheadline}{"Headline"});
239: } else {
240: $broken_headline=$l_headlines{$thisheadline}{"Headline"}
241: }
242:
243: #
244: # Can you figure out why the anchor's printed out in the headline cell, rather than
245: # the state or at the beginning of the row?
246: #
247: print "<TR>",
248: "<TD>",$new_tagstart,$l_headlines{$thisheadline}{"State"},$new_tagend,"<\/TD>",
249: "<TD>",$new_tagstart,$l_headlines{$thisheadline}{"Timestamp"},"<\/TD>",$new_tagend,
250: "<TD>","<A NAME='\#",$new_tagstart,$state_abbr{$l_headlines{$thisheadline}{"State"}},"'> ",$new_tagstart,$broken_headline,"<\/TD>",$new_tagend,
251: "<\/TR>\n";
252: }
253: # end table.
254: print "<\/TABLE>";
255: print &self_url();
256: }
257: ####################
258: #
259: # put the weather into a table.
260: #
261: #
262: ####################
263: sub write_weather {
264: my %cityinfo;
265: my $ctr=0; # used for columnating.
266: #
267: # split into HoH format
268: # {INDEX}{INFOTYPE}{INFODATA}
269: # e.g. {1}{CITY}{BIRMINGHAM, AL}
270: # {1}{TEMP}{-5}
271: # etc...
272: #
273: foreach my $temp (@_) {
274: $temp=~/([a-zA-z]*)(\d*)\=(.*)/;
275: $cityinfo{$2}{$1}=$3;
276: }
277:
278: # print out the HTML Anchor.
279: print "<A NAME=WEATHER>";
280:
281: # begin table.
282: print "<TABLE align=CENTER border=1>\n";
283: print "<TR><TD colspan=12 align=center>Weather from all 50 states<\/TD><\/TR>\n";
284: print "<TR><TD>City Name<\/TD><TD colspan=2> Current<br>Conditions<\/TD><TD>Current<br>Temperature<\/TD>",
285: "<TD>City Name<\/TD><TD colspan=2> Current<br>Conditions<\/TD><TD>Current<br>Temperature<\/TD>",
286: "<TD>City Name<\/TD><TD colspan=2> Current<br>Conditions<\/TD><TD>Current<br>Temperature<\/TD>\n";
287:
288: $ctr = 0;
289: foreach my $thiscity (sort { $cityinfo{$a}{"city"}cmp $cityinfo{$b}{"city"}} keys %cityinfo ) {
290: if ($cityinfo{$thiscity}{"weatherurl"} =~m#http://www.cnn.com/WEATHER/(c)(.)/#i) {
291: $citystate=", Canada"
292: } else {
293: # use the second saved part of the regexp to populate the state. or district.
294: # stupid districts.
295: $cityinfo{$thiscity}{"weatherurl" } =~m#http://www.cnn.com/WEATHER/(..)/(.*)/#i;
296: $citystate= ", $2 ";
297: }
298: # most of the time, the picture for a weather condition is just the
299: # condition name, stripped of whitespace, plus '.GIF'.
300: $cityinfo{$thiscity}{"weatherpic"} = $cityinfo{$thiscity}{"condition"}. ".gif";
301: $cityinfo{$thiscity}{"weatherpic"} =~ s/[ |\/]/./g;
302:
303: # but not always. stupid special exceptions.
304: $cityinfo{$thiscity}{"weatherpic"} =~ s/haze/hazy/g;
305: $cityinfo{$thiscity}{"weatherpic"} =~ s/foggy/fog/g;
306: $cityinfo{$thiscity}{"weatherpic"} =~ s/lgt\.snow\.fog/snow.fog/g;
307: $cityinfo{$thiscity}{"weatherpic"} =~ s/snow\.and\.fog/snow.fog/g;
308: $cityinfo{$thiscity}{"weatherpic"} =~ s/frz\.rain/freezing.rain/g;
309: $cityinfo{$thiscity}{"weatherpic"} =~ s/mist/misty/g;
310: $cityinfo{$thiscity}{"weatherpic"} =~ s/lgt.snowshower/light.snow/g;
311:
312:
313: # three columns to a row.
314: if (!(($ctr-1) % 3)) {print "<\/TR>"}
315:
316: # print out one row of data.
317: $cityinfo{$thiscity}{weatherurl} && print "<TD><A HREF='", $cityinfo{$thiscity}{weatherurl},"' target=new>", $cityinfo{$thiscity}{"city"} ,"$citystate<\/A><\/TD>",
318: "<TD ALIGN='justify'>" , $cityinfo{$thiscity}{"condition"},
319: "<\/TD><TD><IMG SRC='http://www.cnn.com/WEATHER/images/9712/conds/" , $cityinfo{$thiscity}{"weatherpic"},"'>",
320: "<\/TD>",
321: "<TD ALIGN='CENTER'>" , $cityinfo{$thiscity}{"temperature"}, "<\/TD>";
322: print "\n";
323:
324: # log information for highest/ lowest temps.
325: if (($cityinfo{$thiscity}{"temperature"} < $lowtemp )or ($lowtemp==undef)){
326: $lowstate = "<A HREF='". $cityinfo{$thiscity}{weatherurl}."' target=new>". $cityinfo{$thiscity}{"city"} ."$citystate<\/A>";
327: $lowtemp = $cityinfo{$thiscity}{"temperature"}
328: }
329: if ($cityinfo{$thiscity}{"temperature"} > $hitemp){
330: $histate = "<A HREF='". $cityinfo{$thiscity}{weatherurl}."' target=new>". $cityinfo{$thiscity}{"city"} ."$citystate<\/A>";
331: $hitemp = $cityinfo{$thiscity}{"temperature"} ;
332: }
333: $ctr++;
334: }
335: # reaching the end of the table.
336: # print out highest & lowest of the moment.
337: print "<TR><TD colspan=12 align=center>Extremes of the moment!<\/TD><\/TR>\n";
338: print "<TR><TD>High<\/TD><TD colspan=4>$histate<\/TD><TD>$hitemp<\/TD><TD>Lo<\/TD><TD colspan=4>$lowstate<\/TD><TD>$lowtemp<\/TD><\/TR>\n";
339: print "<\/TABLE>\n";
340: }
341:
342: ####################
343: #
344: # checks for new news & writes it to a headline file.
345: #
346: ####################
347: sub log_news {
348: my %l_data;
349: my @lines;
350: my $hl_found;
351: #
352: # no headlines? that's OK!
353: # (no die statement)
354: open HL, "headlines.txt";
355: @lines = <HL> ; # headlines.txt format : state\ttimestamp\theadline
356: close HL;
357:
358: #
359: # start by reading all of the existing headlines from "headlines.txt";
360: #
361: foreach (@lines) {
362: @entry = split /\t/;
363: my $nextkey = scalar keys %l_data;
364: if (@entry[0]) {
365: $l_data{$nextkey}{"State"}=@entry[0];
366: $l_data{$nextkey}{"Timestamp"}=@entry[1];
367: $l_data{$nextkey}{"Headline"}=@entry[2];
368: chomp $l_data{$nextkey}{"Headline"};
369: }
370: }
371:
372: #
373: # OK, read in the new headlines, and if they don't match any existing ones,
374: # read em into the hash.
375: #
376: @lines = split /&state\d*\=/, shift;
377: $moddate = scalar localtime get_moddate ($newsurl);
378: $moddate= UnixDate($moddate, "%m/%d/%Y %H:%M");
379:
380: foreach (@lines) {
381: $hl_found=0;
382: @entry = split /&headline\d*\=/;
383: @entry[1] =~ s/&headlineload\=done//;
384: @entry[1]=~s/^\s//;
385: @entry[1]=~s/\s\s/ /g;
386: @entry[0] =~ s/headlinecount\=\d*//;
387: @entry[0]= trim(@entry[0]);
388: my $nextkey = scalar keys %l_data;
389: foreach my $thisheadline (keys %l_data) {
390: if ($l_data{$thisheadline}{"Headline"} eq @entry[1]){$hl_found=1};
391: last if $hl_found;
392: }
393: if ((not $hl_found) && (@entry[0])){
394: $l_data{$nextkey}{"State"}=@entry[0];
395: $l_data{$nextkey}{"Timestamp"}=$moddate ;
396: $l_data{$nextkey}{"Headline"}=@entry[1];
397: }
398: }
399:
400:
401: # Now that all that's been done,
402: # sort & write all the headlines to "headlines.txt".
403: open HL, ">headlines.txt";
404: foreach my $thisheadline (sort {($l_data{$a}{"State"} cmp $l_data{$b}{"State"})or
405: ($l_data{$a}{"Timestamp"} cmp $l_data{$b}{"Timestamp"}) or
406: ($l_data{$a}{"Headline"} cmp $l_data{$b}{"Headline"}) } keys %l_data){
407: print (HL $l_data{$thisheadline}{"State"},"\t");
408: print (HL $l_data{$thisheadline}{"Timestamp"},"\t");
409: print (HL $l_data{$thisheadline}{"Headline"},"\n");
410: }
411: close HL;
412: return %l_data;
413: }
414:
415:
416: ####################
417: #
418: # wrapper for LWP::Useragent code.
419: # returns last modified date of a url.
420: # in : the url to date
421: # out : last modified date, or 0 if unable to determine date
422: #
423: ####################
424: sub get_moddate {
425: my $ans;
426: my $lurl=shift;
427: my $ua = LWP::UserAgent->new();
428:
429: $ans = $ua->request(HTTP::Request->new("HEAD", $lurl));
430: if ($ans->is_success) {
431: return $ans->last_modified || 0 ;
432: } else {
433: print STDERR "$lurl: Error [", $ans->code, "] ", $ans->message, "!\n";
434: }
435: }
436:
437: ####################
438: #
439: # emulates trim function found
440: # in other languages. Thank you, cookbook! :)
441: # in : a string
442: # out : a string *cough*
443: #
444: ####################
445: sub trim {
446:
447: my @out = @_;
448: for (@out) {
449: s/^\s+//;
450: s/\s+$//;
451: }
452: return wantarray ? @out : $out[0];
453: }
454: </code> In reply to CNNi slurper by boo_radley
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |