use strict; use warnings; use LWP::Simple; my $url3 = "http://www.ebi.ac.uk/thornton-srv/databases/cgi-bin/pdbsum/GetPage.pl?pdbcode=2j6p&template=ligands.html&l=1.1"; my $content = get($url3) or die "$url3: get failed"; use HTML::TreeBuilder; my $p = HTML::TreeBuilder->new; $p->parse_content($content); my @href; my $href; my @anchors = $p->look_down( _tag => q{a} ); for my $anchor (@anchors) { my $txt = $anchor->as_text; if ( $txt =~ /EPE\s/ ) { print $txt, qq{\n}; $href = $anchor->attr(q{href}); print $href, qq{\n}; chomp($href); push @href, $href; #now I need to go to the link where there are my EPE ligand and then parse and extract the link of the RunLigplot.pl that is the output of a program LigPlot, written in perl, is a postcript, and I need that file as a script to extract info....I need to repeat these parsing for every link,for every EPE.... } } $p->delete; for my $param (@href) { print "processing param: \"$param\"\n"; my $url = "http://www.ebi.ac.uk$param"; my $content = get($url) or die "$url: get failed"; process_content($content); # << This is missing from your code !!! } sub process_content { my $content = shift; $p = HTML::TreeBuilder->new; $p->parse_content($content); my @href0; my @anchors0 = $p->look_down( _tag => q{a} ); for my $anchor0 (@anchors0) { print "anchor: " . $anchor0->as_HTML() . "\n"; my $href0 = $anchor0->attr(q{href}); next unless($href0); print "found href: $href0\n"; if ( $href0 =~ /(ligplot\d\d_\d\d)/ ) { my $id = $1; print $href0, qq{\n}; push @href0, $href0; for my $param0 (@href0) { my $url = "http://www.ebi.ac.uk$param0"; print "getting $url\n"; $content = get($url) or die "$url: get failed"; # my $content = shift; # print my $param0; #I need to download files from every link $param0... my $pdb = 'testpdb'; my @files = ( [ "http://www.ebi.ac.uk$param0", "$pdb.$id.ps" ], ); for my $duplet (@files) { print "mirror $duplet->[0] to $duplet->[1]\n"; mirror( $duplet->[0], $duplet->[1] ); } } $p->delete; } } } #### < my $content = get($url3); --- > my $content = get($url3) or die "$url3: get failed"; 32c32,34 < my $content = get("http://www.ebi.ac.uk$param"); --- > print "processing param: \"$param\"\n"; > my $url = "http://www.ebi.ac.uk$param"; > my $content = get($url) or die "$url: get failed"; 42a45 > print "anchor: " . $anchor0->as_HTML() . "\n"; 43a47,48 > next unless($href0); > print "found href: $href0\n"; 45c50,51 < if ( $href0 =~ /ligplot\d\d_\d\d'/ ) { --- > if ( $href0 =~ /(ligplot\d\d_\d\d)/ ) { > my $id = $1; 49,51c55,59 < $content = get("http://www.ebi.ac.uk$param0"); < my $content = shift; < print my $param0; --- > my $url = "http://www.ebi.ac.uk$param0"; > print "getting $url\n"; > $content = get($url) or die "$url: get failed"; > # my $content = shift; > # print my $param0; 53a62 > my $pdb = 'testpdb'; 55c64 < ( [ "http://www.ebi.ac.uk$param0", "$pdb.$param0.pl" ], ); --- > ( [ "http://www.ebi.ac.uk$param0", "$pdb.$id.ps" ], ); 56a66 > print "mirror $duplet->[0] to $duplet->[1]\n";