I'm trying to learn
Go and one wounderful online resource I found is
http://senseis.xmp.net wiki. Particularly I found
beginner exercises section to be very interesting to me. I wanted to print these excercises including solutions so I will be able to try to solve them offline. So I wrote this script which downloads all exercies and solutions, extracts all content removing headers, footers and navigation bars and puts it in one big HTML page to be printed.
#!/usr/bin/perl
# Downloads all beginner exercises and solutions from senseis.xmp.net
# wiki (see http://senseis.xmp.net/?BeginnerExercises); extracts wiki
# topic content and puts it in one big HTML page to make it easy to
# print all of them as one document
#
# Usage: download.pl > output.html
#
use strict;
use warnings;
use LWP::Simple;
use URI;
use XML::LibXML;
my $PARSER = XML::LibXML->new();
my $output = create_output_node();
while (my $topic = next_topic()) {
my $url = 'http://senseis.xmp.net/?' . $topic;
print STDERR $topic, "\n\n";
my @topic_nodes = extract_wiki_topic_content($url);
links_rel_to_abs($_, $url) for @topic_nodes;
add_wiki_topic_to_node($output, $topic, @topic_nodes);
}
print $output->ownerDocument->toString;
{
my $i = 0;
my $show_solution = 0;
sub next_topic {
$i ++;
if($i > 149) {
return undef if $show_solution;
$i = 0; $show_solution = 1;
return next_topic();
}
my $topic = "BeginnerExercise$i";
$topic .= 'Solution' if $show_solution;
return $topic;
}
}
sub create_output_node {
my $dom = XML::LibXML::Document->new();
my $root = XML::LibXML::Element->new('html');
$dom->setDocumentElement($root);
my $title_text = 'Beginner Exercises and Solutions (senseis.xmp.ne
+t)';
my $title = XML::LibXML::Element->new('title');
$title->appendText($title_text);
$root->appendChild($title);
my $body = XML::LibXML::Element->new('body');
$root->appendChild($body);
my $h1 = XML::LibXML::Element->new('h1');
$h1->appendText($title_text);
$body->appendChild($h1);
return $body;
}
sub extract_wiki_topic_content {
my $url = shift;
my $dom = $PARSER->parse_html_string(get($url));
return $dom->findnodes(<<'XPATH');
//td[@valign="top" and @rowspan = 2]
/*[count(preceding-sibling::hr[@noshade]) > 0 and
count(following-sibling::hr[@noshade]) > 0]
XPATH
}
sub links_rel_to_abs {
my $node = shift;
my $base_url = shift;
for my $link ($node->findnodes('.//@href'), $node->findnodes('.//@
+src')) {
$link->setValue(URI->new_abs($link->value, $base_url));
}
}
sub add_wiki_topic_to_node {
my $node = shift;
my $title = shift;
my @topic_nodes = @_;
my $h2 = XML::LibXML::Element->new('h2');
$h2->appendText($title);
$node->appendChild($h2);
$node->appendChild($_) for @topic_nodes;
}
--
Ilya Martynov, ilya@iponweb.net
CTO IPonWEB (UK) Ltd
Quality Perl Programming and Unix Support
UK managed @ offshore prices - http://www.iponweb.net
Personal website - http://martynov.org