#!/usr/bin/perl use XML::XSH; xsh <<'END_XSH'; recovering 1; # for broken entity recovery (a frequent HTML problem) quiet; # avoid tracing of open open HTML animals = "http://www.oreilly.com/animals.html"; foreach {1..2} { foreach //table[not(.//table) and contains(tr[1]/td[$__], "Book Title") ]/tr[position() > 1] { # pwd; $cover = string(td[last()]); $subject = string(td[last() - 1]); eval { push @{$cover{$cover}}, $subject; } } } create t1 root; foreach {sort keys %cover} { ## print "animal $__"; insert element cover into /root; cd /root/cover[last()]; insert element animal into .; insert text $__ into animal; foreach {sort @{$cover{$__}}} { ## print " book $__"; insert element book into .; insert text $__ into book[last()]; } } quiet; # avoid final message from ls ls /; END_XSH
In reply to Screen-scraping using XSH - O'Reilly Animal lister by merlyn
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |