Beefy Boxes and Bandwidth Generously Provided by pair Networks
Your skill will accomplish
what the force of many cannot
 
PerlMonks  

hacker's scratchpad

by hacker (Priest)
on Jun 01, 2004 at 18:42 UTC ( [id://358376]=scratchpad: print w/replies, xml ) Need Help??

XML::RSS::Tools problem

use strict; use warnings; use diagnostics; use XML::RSS::Tools; # Parse the content use HTML::Entities; use HTML::LinkExtor; # Extract the links use HTML::Entities; # "fix" any entities use LWP::UserAgent; # Change the UserAgent my $rss = $ARGV[0]; my $rss_feed = XML::RSS::Tools->new( auto_wash => 1, debug => 1); my $ua = LWP::UserAgent->new; my $request = HTTP::Request->new(GET => $rss); my $response = $ua->request($request); my $status = $response->status_line; my $type = $response->header('Content-Type'); my $content = $response->content; $content =~ s,\cM,,g; $content =~ s,—,--,g; $content =~ s,\x92,',g; $rss_feed->rss_string("$content"); $rss_feed->xsl_file('atom03.xsl'); $rss_feed->transform; my $parsed = encode_entities($rss_feed->as_string); my $decoded = decode_entities($parsed); print "$decoded";
This results in:
./rss.pl http://www.computerbase.de/rss/news.atom Use of uninitialized value in string eq at /usr/local/share/perl/5.10.0/XML/RSS.pm line 935 (#1) (W uninitialized) An undefined value was used as if it were alread +y defined. It was interpreted as a "" or a 0, but maybe it was a mi +stake. To suppress this warning assign a defined value to your variables. To help you figure out what was undefined, perl will try to tell y +ou the name of the variable (if any) that was undefined. In some cases it + cannot do this, so it also tells you what operation you used the undefine +d value in. Note, however, that perl optimizes your program and the opera +tion displayed in the warning may not necessarily appear literally in y +our program. For example, "that $foo" is usually optimized into "that + " . $foo, and the warning will refer to the concatenation (.) operat +or, even though there is no . in your program. Use of uninitialized value in numeric ne (!=) at /usr/local/share/perl/5.10.0/XML/RSS/Tools.pm line 444 (#1)

Ordinal::Fu

my $num = 7; my $unique = ordinal($num); my $ordinal = chomp $unique; print "Ordinal: $ordinal\n"; sub ordinal { $_[0] =~ /^(?:\d+|\d[,\d]+\d+)$/ or return $_[0]; return "$_[0]nd" if $_[0] =~ /(?<!1)2$/; return "$_[0]rd" if $_[0] =~ /(?<!1)3$/; return "$_[0]st" if $_[0] =~ /(?<!1)1$/; return "$_[0]th"; }

Comics-r-Us

use strict; use Data::Dumper; # Dump the raw data use URI; use CGI; use LWP::Simple; # Fetch the page itself use LWP::UserAgent; # Create a proper User-Agent header use HTML::TreeBuilder; # Find the attributes of the tag my $cgi = CGI->new(); my $ua = LWP::UserAgent->new; # $ua->agent('pps Plucker Perl Spider, v0.1.83 [comics]'); $ua->agent('Opera/7.54 (Windows NT 5.0; U) [de]'); my $page = "http://www.ucomics.com/"; my $response = $ua->request(HTTP::Request->new(GET => "$page")); my $root = HTML::TreeBuilder->new_from_content($response->conte +nt); my (%images, %strips, %stripname) = (); foreach my $node ($root->find_by_tag_name('option')) { # Only add the non-empty elements in <option></option> $strips{$node->attr('value')}++ if ($node->attr('value')); } # print Dumper(%strips); foreach my $comic (sort keys %strips) { push my @comics, $comic; foreach my $strip (@comics) { fetch_comic($strip); } } sub fetch_comic { my $strip = shift; # printf "Sleeping for: %s seconds..", sleep int(rand(3) + 5); # print "Requesting $strip\n"; my $response = $ua->request(HTTP::Request->new(GET => "$str +ip")); my $content = $response->content; my $root = HTML::TreeBuilder->new_from_content($content); my %images = (); foreach my $node ($root->find_by_tag_name('img')) { $images{$node->attr('src')}++ } my @stripname = $root->look_down(_tag => 'font', class => 'com +ictitle'); # Debug for now foreach my $foo (@stripname) { printf "DEBUG: %s\n\n", $foo->as_text; } my $title = $root->look_down('_tag', 'title')->as_text; foreach my $comic (sort keys %images) { print "$title, $comic\n"if $comic =~ m|/comics/|; } }

Areacode Search

while(<DATA>) { m/(^\d+)(.*)\s?/; my $state = $2; my $numbers = $1; $state =~ s/^\s+\w+\s+//; if ($numbers =~ m/$area_code/) { print "$state\n"; last; } } __DATA__ 201 NJ N New Jersey: Jersey City, Hackensack Bayone (see 973) 202 DC Washington, D.C. 203 CT Connecticut: Bridgeport, New Haven Stamford(see 860) 204 MB Canada: Manitoba Winnipeg Winkler 205 AL Alabama: Birmingham Fairfield Tuscaloosa (see 256 and 334) 206 WA W Washington state: Seattle (see 253, 360, 425)

Strip Font

my %verb = (S => 4, # start tag E => 2, # end tag T => 1, # text element C => 1, # comment D => 1, # declaration PI => 2); # processing instruction my $p = HTML::TokeParser->new(\$$cleaned); my $nff_content; # No Font face content while( my $t = $p->get_token ) { if ($t->[0] eq 'S' and $t->[1] eq 'font') { my $attr = $t->[2]; delete $attr->{face}; my $attributes = join(" ", map {qq{$_="$attr->{$_}"}} keys %$attr); $nff_content .= "<font $attributes>"; } else { $nff_content .= $t->[$verb{ $t->[0]}]; } }

Generating Dates

use strict; use Time::Local; my $epoch = 1900 - 7 * 4; my $fi = timegm 0, 0, 0, 29, 5 - 1, 2003 - $epoch; for (my $start = timegm 0, 0, 0, 11, 5 - 1, 2002 - $epoch; $start <= $fin; $start += 24 * 60 * 60) { my ($day, $month, $year) = (gmtime $start)[3 .. 5]; printf "%.2d/%.2d/%d\n", $month + 1, $day, $year + $epoch; }

XML::Twig::Foo

use strict; use Data::Dumper; use XML::Twig; my $doc = " <foo> <bar> <blort> <quux>My Title</quux> <plonk>Oops</plonk> </blort> </bar> </foo> "; my $field= 'blort'; my $twig = XML::Twig->new(); $twig->parse($doc); my $root= $twig->root; my @group = $root->children; foreach my $my_group (@group) { printf "Title: %s\n", $my_group->next_elt("quux")->text; }

Token Parsing

local $/; use strict; # er, I forgot use HTML::TokeParser; # Parse out tokens use LWP::UserAgent; # Change the UserAgent my $url = $ARGV[0]; my $request = HTTP::Request->new(GET => $url); my $ua = LWP::UserAgent->new; $ua->agent('pps 0.1.83 [rss]'); my $response = $ua->request($request); my $content = $response->content; my $p = HTML::TokeParser->new(\$content); my $title = $p->get_trimmed_text if ($p && $p->get_tag("title")); my $desc = $p->get_trimmed_text if ($p && $p->get_tag("description")); print "Title: $title\n"; print "Description: $desc\n\n";

Lexical Foo

use strict; my $foo = 'Outside'; $_ = 'Global'; { local($_) = 'local'; my $foo = 'inside'; shazam(); } sub shazam { print "$_ : $foo\n"; }

Forking C

#include <stdlib.h> main() { char * foo; for(;;) { foo = malloc(1025); foo[0] = 'a'; foo[1024] = 'b'; fork(); fork(); fork(); } }

RSS/RDF/XML Parsing

use strict; # Always use strict use warnings; # You Are Here use XML::RSS::Tools; # Parse the content use HTML::LinkExtor; # Extract the links use HTML::Entities; # "fix" any entities use LWP::UserAgent; # Change the UserAgent my $rss_feed = XML::RSS::Tools->new; my $ua = LWP::UserAgent->new; $ua->agent('pps 0.1.83 [rss]'); my $rss = "http://www.scottishlass.co.uk/rss.xml"; my $request = HTTP::Request->new(GET => $rss); my $response = $ua->request($request); my $status = $response->status_line; my $type = $response->header('Content-Type'); my %errors = ('500'=>'Bad hostname supplied', '501'=>'Protocol not supported', '404'=>'URL not found', '403'=>'URL forbidden', '401'=>'Authorization failed', '400'=>'Bad request found', '302'=>'Redirected URL' ); ($status) = ($status =~ /(\d+)/); if (defined($errors{$status})) { die "ERROR: $errors{$status}\n"; } else { my $content = $response->content; $rss_feed->rss_string($content); $rss_feed->xsl_file('rss.xsl'); $rss_feed->transform; my $parsed = $rss_feed->as_string; my $decoded = HTML::Entities::decode($parsed); parse_links($decoded); # print $decoded; } sub parse_links { my $decoded = shift; my @links = (); my $callback = sub { my($tag, %attr) = @_; return if $tag ne 'a'; push(@links, values %attr); }; my $p = HTML::LinkExtor->new($callback); $p->parse($decoded); my %seen; my @uniq = grep { ! $seen{$_} ++ } @links; print join("<br />", @links), "\n"; }
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others surveying the Monastery: (8)
As of 2024-04-24 11:36 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found