#!/usr/local/bin/perl use strict; use warnings; use LWP::Simple; use HTML::TreeBuilder; my $url = "http://www.ebi.ac.uk/thornton-srv/databases/cgi-bin/pdbsum/GetPage.pl?pdbcode=2j6p&template=ligands.html&l=1.1"; my $content =get ($url); my $p = HTML::TreeBuilder->new; $p->parse_content($content); my @hrefs; my $href; my @anchors = $p->look_down(_tag => q{a}); for my $anchor (@anchors){ my $txt = $anchor->as_text; if ($txt=~ /EPE\s/){ $href = $anchor->attr(q{href}); push @hrefs, $href; } } $p->delete; for my $href (@hrefs){ my $url = join(q{}, q{http://www.ebi.ac.uk}, $href); print qq{$url\n}; my $content = get($url); my $p = HTML::TreeBuilder->new; $p->parse_content($content); my @anchors = $p->look_down(_tag => q{a}); for my $anchor (@anchors){ my $href = $anchor->attr(q{href}); if ($href =~ /ligplot\d\d_\d\d/){ # e.g. ligplot04_01 next if $href =~ /pdf/; my $url = join(q{}, q{http://www.ebi.ac.uk}, $href); print qq{\t$href\n}; my $content = get($url); #print $content; # do something with the postscript file } } $p->delete; } print qq{done\n};