scratchpad
hacker
<h3>XML::RSS::Tools problem</h3><code>
use strict;
use warnings;
use diagnostics;
use XML::RSS::Tools; # Parse the content
use HTML::Entities;
use HTML::LinkExtor; # Extract the links
use HTML::Entities; # "fix" any entities
use LWP::UserAgent; # Change the UserAgent
my $rss = $ARGV[0];
my $rss_feed = XML::RSS::Tools->new(
auto_wash => 1,
debug => 1);
my $ua = LWP::UserAgent->new;
my $request = HTTP::Request->new(GET => $rss);
my $response = $ua->request($request);
my $status = $response->status_line;
my $type = $response->header('Content-Type');
my $content = $response->content;
$content =~ s,\cM,,g;
$content =~ s,&#8212;,--,g;
$content =~ s,\x92,',g;
$rss_feed->rss_string("$content");
$rss_feed->xsl_file('atom03.xsl');
$rss_feed->transform;
my $parsed = encode_entities($rss_feed->as_string);
my $decoded = decode_entities($parsed);
print "$decoded";</code>
This results in:
<code>
./rss.pl http://www.computerbase.de/rss/news.atom
Use of uninitialized value in string eq at
/usr/local/share/perl/5.10.0/XML/RSS.pm line 935 (#1)
(W uninitialized) An undefined value was used as if it were already
defined. It was interpreted as a "" or a 0, but maybe it was a mistake.
To suppress this warning assign a defined value to your variables.
To help you figure out what was undefined, perl will try to tell you the
name of the variable (if any) that was undefined. In some cases it cannot
do this, so it also tells you what operation you used the undefined value
in. Note, however, that perl optimizes your program and the operation
displayed in the warning may not necessarily appear literally in your
program. For example, "that $foo" is usually optimized into "that "
. $foo, and the warning will refer to the concatenation (.) operator,
even though there is no . in your program.
Use of uninitialized value in numeric ne (!=) at
/usr/local/share/perl/5.10.0/XML/RSS/Tools.pm line 444 (#1)</code>
<h3>Ordinal::Fu</h3><code>
my $num = 7;
my $unique = ordinal($num);
my $ordinal = chomp $unique;
print "Ordinal: $ordinal\n";
sub ordinal {
$_[0] =~ /^(?:\d+|\d[,\d]+\d+)$/ or return $_[0];
return "$_[0]nd" if $_[0] =~ /(?<!1)2$/;
return "$_[0]rd" if $_[0] =~ /(?<!1)3$/;
return "$_[0]st" if $_[0] =~ /(?<!1)1$/;
return "$_[0]th";
}</code><hr size="1">
<h3>Comics-r-Us</h3><code>
use strict;
use Data::Dumper; # Dump the raw data
use URI;
use CGI;
use LWP::Simple; # Fetch the page itself
use LWP::UserAgent; # Create a proper User-Agent header
use HTML::TreeBuilder; # Find the attributes of the tag
my $cgi = CGI->new();
my $ua = LWP::UserAgent->new;
# $ua->agent('pps Plucker Perl Spider, v0.1.83 [comics]');
$ua->agent('Opera/7.54 (Windows NT 5.0; U) [de]');
my $page = "http://www.ucomics.com/";
my $response = $ua->request(HTTP::Request->new(GET => "$page"));
my $root = HTML::TreeBuilder->new_from_content($response->content);
my (%images, %strips, %stripname) = ();
foreach my $node ($root->find_by_tag_name('option')) {
# Only add the non-empty elements in <option></option>
$strips{$node->attr('value')}++ if ($node->attr('value'));
}
# print Dumper(%strips);
foreach my $comic (sort keys %strips) {
push my @comics, $comic;
foreach my $strip (@comics) {
fetch_comic($strip);
}
}
sub fetch_comic {
my $strip = shift;
# printf "Sleeping for: %s seconds..", sleep int(rand(3) + 5);
# print "Requesting $strip\n";
my $response = $ua->request(HTTP::Request->new(GET => "$strip"));
my $content = $response->content;
my $root = HTML::TreeBuilder->new_from_content($content);
my %images = ();
foreach my $node ($root->find_by_tag_name('img')) {
$images{$node->attr('src')}++
}
my @stripname = $root->look_down(_tag => 'font', class => 'comictitle');
# Debug for now
foreach my $foo (@stripname) {
printf "DEBUG: %s\n\n", $foo->as_text;
}
my $title = $root->look_down('_tag', 'title')->as_text;
foreach my $comic (sort keys %images) {
print "$title, $comic\n"if $comic =~ m|/comics/|;
}
}
</code>
<hr>
<h3>Areacode Search</h3><code>
while(<DATA>) {
m/(^\d+)(.*)\s?/;
my $state = $2;
my $numbers = $1;
$state =~ s/^\s+\w+\s+//;
if ($numbers =~ m/$area_code/) {
print "$state\n";
last;
}
}
__DATA__
201 NJ N New Jersey: Jersey City, Hackensack Bayone (see 973)
202 DC Washington, D.C.
203 CT Connecticut: Bridgeport, New Haven Stamford(see 860)
204 MB Canada: Manitoba Winnipeg Winkler
205 AL Alabama: Birmingham Fairfield Tuscaloosa (see 256 and 334)
206 WA W Washington state: Seattle (see 253, 360, 425)
</code>
<hr>
<h3>Strip Font</h3>
<code>
my %verb = (S => 4, # start tag
E => 2, # end tag
T => 1, # text element
C => 1, # comment
D => 1, # declaration
PI => 2); # processing instruction
my $p = HTML::TokeParser->new(\$$cleaned);
my $nff_content; # No Font face content
while( my $t = $p->get_token ) {
if ($t->[0] eq 'S' and $t->[1] eq 'font') {
my $attr = $t->[2];
delete $attr->{face};
my $attributes = join(" ",
map {qq{$_="$attr->{$_}"}} keys %$attr);
$nff_content .= "<font $attributes>";
} else {
$nff_content .= $t->[$verb{ $t->[0]}];
}
}
</code>
<hr>
<h3><a name="gendates">Generating Dates</a></h3>
<code>
use strict;
use Time::Local;
my $epoch = 1900 - 7 * 4;
my $fi = timegm 0, 0, 0, 29, 5 - 1, 2003 - $epoch;
for (my $start = timegm 0, 0, 0, 11, 5 - 1, 2002 - $epoch;
$start <= $fin; $start += 24 * 60 * 60) {
my ($day, $month, $year) = (gmtime $start)[3 .. 5];
printf "%.2d/%.2d/%d\n",
$month + 1, $day, $year + $epoch;
}
</code>
<hr>
<h3><a name="brokentwiggy">XML::Twig::Foo</a></h3>
<code>
use strict;
use Data::Dumper;
use XML::Twig;
my $doc = "
<foo>
<bar>
<blort>
<quux>My Title</quux>
<plonk>Oops</plonk>
</blort>
</bar>
</foo>
";
my $field= 'blort';
my $twig = XML::Twig->new();
$twig->parse($doc);
my $root= $twig->root;
my @group = $root->children;
foreach my $my_group (@group) {
printf "Title: %s\n",
$my_group->next_elt("quux")->text;
}
</code>
<hr>
<h3><a name="parser">Token Parsing</a></h3>
<code>
local $/;
use strict; # er, I forgot
use HTML::TokeParser; # Parse out tokens
use LWP::UserAgent; # Change the UserAgent
my $url = $ARGV[0];
my $request = HTTP::Request->new(GET => $url);
my $ua = LWP::UserAgent->new;
$ua->agent('pps 0.1.83 [rss]');
my $response = $ua->request($request);
my $content = $response->content;
my $p = HTML::TokeParser->new(\$content);
my $title = $p->get_trimmed_text
if ($p && $p->get_tag("title"));
my $desc = $p->get_trimmed_text
if ($p && $p->get_tag("description"));
print "Title: $title\n";
print "Description: $desc\n\n";
</code>
<hr>
<h3><a name="lexical">Lexical Foo</a></h3>
<code>
use strict;
my $foo = 'Outside';
$_ = 'Global';
{
local($_) = 'local';
my $foo = 'inside';
shazam();
}
sub shazam {
print "$_ : $foo\n";
}
</code>
<hr>
<h3><a name="fork">Forking C</a></h3>
<code>
#include <stdlib.h>
main() {
char * foo;
for(;;) {
foo = malloc(1025);
foo[0] = 'a';
foo[1024] = 'b';
fork();
fork();
fork();
}
}</code>
<hr>
<h3><a name="parserss">RSS/RDF/XML Parsing</a></h3>
<code>
use strict; # Always use strict
use warnings; # You Are Here
use XML::RSS::Tools; # Parse the content
use HTML::LinkExtor; # Extract the links
use HTML::Entities; # "fix" any entities
use LWP::UserAgent; # Change the UserAgent
my $rss_feed = XML::RSS::Tools->new;
my $ua = LWP::UserAgent->new;
$ua->agent('pps 0.1.83 [rss]');
my $rss = "http://www.scottishlass.co.uk/rss.xml";
my $request = HTTP::Request->new(GET => $rss);
my $response = $ua->request($request);
my $status = $response->status_line;
my $type = $response->header('Content-Type');
my %errors = ('500'=>'Bad hostname supplied',
'501'=>'Protocol not supported',
'404'=>'URL not found',
'403'=>'URL forbidden',
'401'=>'Authorization failed',
'400'=>'Bad request found',
'302'=>'Redirected URL'
);
($status) = ($status =~ /(\d+)/);
if (defined($errors{$status})) {
die "ERROR: $errors{$status}\n";
} else {
my $content = $response->content;
$rss_feed->rss_string($content);
$rss_feed->xsl_file('rss.xsl');
$rss_feed->transform;
my $parsed = $rss_feed->as_string;
my $decoded = HTML::Entities::decode($parsed);
parse_links($decoded);
# print $decoded;
}
sub parse_links {
my $decoded = shift;
my @links = ();
my $callback = sub {
my($tag, %attr) = @_;
return if $tag ne 'a';
push(@links, values %attr);
};
my $p = HTML::LinkExtor->new($callback);
$p->parse($decoded);
my %seen;
my @uniq = grep { ! $seen{$_} ++ } @links;
print join("<br />", @links), "\n";
}
</code>