use strict; use warnings; use diagnostics; use XML::RSS::Tools; # Parse the content use HTML::Entities; use HTML::LinkExtor; # Extract the links use HTML::Entities; # "fix" any entities use LWP::UserAgent; # Change the UserAgent my $rss = $ARGV[0]; my $rss_feed = XML::RSS::Tools->new( auto_wash => 1, debug => 1); my $ua = LWP::UserAgent->new; my $request = HTTP::Request->new(GET => $rss); my $response = $ua->request($request); my $status = $response->status_line; my $type = $response->header('Content-Type'); my $content = $response->content; $content =~ s,\cM,,g; $content =~ s,&#8212;,--,g; $content =~ s,\x92,',g; $rss_feed->rss_string("$content"); $rss_feed->xsl_file('atom03.xsl'); $rss_feed->transform; my $parsed = encode_entities($rss_feed->as_string); my $decoded = decode_entities($parsed); print "$decoded"; #### ./rss.pl http://www.computerbase.de/rss/news.atom Use of uninitialized value in string eq at /usr/local/share/perl/5.10.0/XML/RSS.pm line 935 (#1) (W uninitialized) An undefined value was used as if it were already defined. It was interpreted as a "" or a 0, but maybe it was a mistake. To suppress this warning assign a defined value to your variables. To help you figure out what was undefined, perl will try to tell you the name of the variable (if any) that was undefined. In some cases it cannot do this, so it also tells you what operation you used the undefined value in. Note, however, that perl optimizes your program and the operation displayed in the warning may not necessarily appear literally in your program. For example, "that $foo" is usually optimized into "that " . $foo, and the warning will refer to the concatenation (.) operator, even though there is no . in your program. Use of uninitialized value in numeric ne (!=) at /usr/local/share/perl/5.10.0/XML/RSS/Tools.pm line 444 (#1) #### my $num = 7; my $unique = ordinal($num); my $ordinal = chomp $unique; print "Ordinal: $ordinal\n"; sub ordinal { $_[0] =~ /^(?:\d+|\d[,\d]+\d+)$/ or return $_[0]; return "$_[0]nd" if $_[0] =~ /(?## use strict; use Data::Dumper; # Dump the raw data use URI; use CGI; use LWP::Simple; # Fetch the page itself use LWP::UserAgent; # Create a proper User-Agent header use HTML::TreeBuilder; # Find the attributes of the tag my $cgi = CGI->new(); my $ua = LWP::UserAgent->new; # $ua->agent('pps Plucker Perl Spider, v0.1.83 [comics]'); $ua->agent('Opera/7.54 (Windows NT 5.0; U) [de]'); my $page = "http://www.ucomics.com/"; my $response = $ua->request(HTTP::Request->new(GET => "$page")); my $root = HTML::TreeBuilder->new_from_content($response->content); my (%images, %strips, %stripname) = (); foreach my $node ($root->find_by_tag_name('option')) { # Only add the non-empty elements in $strips{$node->attr('value')}++ if ($node->attr('value')); } # print Dumper(%strips); foreach my $comic (sort keys %strips) { push my @comics, $comic; foreach my $strip (@comics) { fetch_comic($strip); } } sub fetch_comic { my $strip = shift; # printf "Sleeping for: %s seconds..", sleep int(rand(3) + 5); # print "Requesting $strip\n"; my $response = $ua->request(HTTP::Request->new(GET => "$strip")); my $content = $response->content; my $root = HTML::TreeBuilder->new_from_content($content); my %images = (); foreach my $node ($root->find_by_tag_name('img')) { $images{$node->attr('src')}++ } my @stripname = $root->look_down(_tag => 'font', class => 'comictitle'); # Debug for now foreach my $foo (@stripname) { printf "DEBUG: %s\n\n", $foo->as_text; } my $title = $root->look_down('_tag', 'title')->as_text; foreach my $comic (sort keys %images) { print "$title, $comic\n"if $comic =~ m|/comics/|; } } #### while() { m/(^\d+)(.*)\s?/; my $state = $2; my $numbers = $1; $state =~ s/^\s+\w+\s+//; if ($numbers =~ m/$area_code/) { print "$state\n"; last; } } __DATA__ 201 NJ N New Jersey: Jersey City, Hackensack Bayone (see 973) 202 DC Washington, D.C. 203 CT Connecticut: Bridgeport, New Haven Stamford(see 860) 204 MB Canada: Manitoba Winnipeg Winkler 205 AL Alabama: Birmingham Fairfield Tuscaloosa (see 256 and 334) 206 WA W Washington state: Seattle (see 253, 360, 425) #### my %verb = (S => 4, # start tag E => 2, # end tag T => 1, # text element C => 1, # comment D => 1, # declaration PI => 2); # processing instruction my $p = HTML::TokeParser->new(\$$cleaned); my $nff_content; # No Font face content while( my $t = $p->get_token ) { if ($t->[0] eq 'S' and $t->[1] eq 'font') { my $attr = $t->[2]; delete $attr->{face}; my $attributes = join(" ", map {qq{$_="$attr->{$_}"}} keys %$attr); $nff_content .= ""; } else { $nff_content .= $t->[$verb{ $t->[0]}]; } } #### use strict; use Time::Local; my $epoch = 1900 - 7 * 4; my $fi = timegm 0, 0, 0, 29, 5 - 1, 2003 - $epoch; for (my $start = timegm 0, 0, 0, 11, 5 - 1, 2002 - $epoch; $start <= $fin; $start += 24 * 60 * 60) { my ($day, $month, $year) = (gmtime $start)[3 .. 5]; printf "%.2d/%.2d/%d\n", $month + 1, $day, $year + $epoch; } #### use strict; use Data::Dumper; use XML::Twig; my $doc = " My Title Oops "; my $field= 'blort'; my $twig = XML::Twig->new(); $twig->parse($doc); my $root= $twig->root; my @group = $root->children; foreach my $my_group (@group) { printf "Title: %s\n", $my_group->next_elt("quux")->text; } #### local $/; use strict; # er, I forgot use HTML::TokeParser; # Parse out tokens use LWP::UserAgent; # Change the UserAgent my $url = $ARGV[0]; my $request = HTTP::Request->new(GET => $url); my $ua = LWP::UserAgent->new; $ua->agent('pps 0.1.83 [rss]'); my $response = $ua->request($request); my $content = $response->content; my $p = HTML::TokeParser->new(\$content); my $title = $p->get_trimmed_text if ($p && $p->get_tag("title")); my $desc = $p->get_trimmed_text if ($p && $p->get_tag("description")); print "Title: $title\n"; print "Description: $desc\n\n"; #### use strict; my $foo = 'Outside'; $_ = 'Global'; { local($_) = 'local'; my $foo = 'inside'; shazam(); } sub shazam { print "$_ : $foo\n"; } #### #include main() { char * foo; for(;;) { foo = malloc(1025); foo[0] = 'a'; foo[1024] = 'b'; fork(); fork(); fork(); } } #### use strict; # Always use strict use warnings; # You Are Here use XML::RSS::Tools; # Parse the content use HTML::LinkExtor; # Extract the links use HTML::Entities; # "fix" any entities use LWP::UserAgent; # Change the UserAgent my $rss_feed = XML::RSS::Tools->new; my $ua = LWP::UserAgent->new; $ua->agent('pps 0.1.83 [rss]'); my $rss = "http://www.scottishlass.co.uk/rss.xml"; my $request = HTTP::Request->new(GET => $rss); my $response = $ua->request($request); my $status = $response->status_line; my $type = $response->header('Content-Type'); my %errors = ('500'=>'Bad hostname supplied', '501'=>'Protocol not supported', '404'=>'URL not found', '403'=>'URL forbidden', '401'=>'Authorization failed', '400'=>'Bad request found', '302'=>'Redirected URL' ); ($status) = ($status =~ /(\d+)/); if (defined($errors{$status})) { die "ERROR: $errors{$status}\n"; } else { my $content = $response->content; $rss_feed->rss_string($content); $rss_feed->xsl_file('rss.xsl'); $rss_feed->transform; my $parsed = $rss_feed->as_string; my $decoded = HTML::Entities::decode($parsed); parse_links($decoded); # print $decoded; } sub parse_links { my $decoded = shift; my @links = (); my $callback = sub { my($tag, %attr) = @_; return if $tag ne 'a'; push(@links, values %attr); }; my $p = HTML::LinkExtor->new($callback); $p->parse($decoded); my %seen; my @uniq = grep { ! $seen{$_} ++ } @links; print join("
", @links), "\n"; }