What I need is to go to a site, search for something, grab every href for each hit, then go to the next page of results and do the same until there's no more pages.
So, let's say I enter "Harry Potter" as my search term at www.mywebsite.com... The page that is returned to me has a form which includes what page I'm looking at as well as how many pages are left. There are 20 results per page. I need to grab each result and then go onto the next page by posting the form.
Does that make sense?
Here's what I've got. I can't even get it to go to page 2.
#!/usr/bin/perl -w use strict; use LWP::Simple; use WWW::Mechanize; use HTML::Form; my $url = 'http://www.ncbi.nlm.nih.gov/sites/entrez?term=rnr2&cmd=Sear +ch&db=nuccore&QueryKey=17'; my $browser = WWW::Mechanize->new; my $site = $browser->get($url); die( "Can't get $url -- ", $site->status_line ) unless $site->is_success; $browser->form('EntrezForm'); foreach my $item($browser->form('EntrezForm')){ my $nextPage = ""; my $maxPage = ""; my $field=""; my $fieldValue = ""; print "\n"."-----NewPage-----"."\n"; while( my ($k, $v) = each %$item ) { if ($k eq "action"){ my $action = $v; print "\n\n"."ACTION: ".$action."\n"; } if ($k eq "method"){ my $method = $v; print "\n\n"."METHOD: ".$method."\n"; } if ($k eq "attr") { print "\n\n"."ATTRIBUTES"."\n"; while( my ($k, $v) = each %$v ) { print "key: $k, value: $v.\n"; } } if ($k eq "inputs"){ print "\n\n"."INPUTS"."\n"; my @newarray = @$v; foreach my $thisItem(@newarray){ while (my($key, $value) = each %$thisItem){ if ( (($key eq "name") && ($value eq "EntrezSystem2. +PEntrez.Nuccore.Sequence_ResultsPanel.Pager.PageNumber"))|| (($key eq "name") && ($value eq "EntrezSystem2. +PEntrez.Nuccore.Sequence_ResultsPanel.Pager.MaxPage")) ) { $field = $value; if ($field =~ m/PageNumber/){$nextPage=($field +Value+1);$browser->set_fields("$field" => "$nextPage",);} if ($field =~ m/MaxPage/){$maxPage=$fieldValue +;} print $field." => ".$fieldValue."\n"; } if ($key eq "value"){ $fieldValue = $value; } } } } } #parse HTML to get <a>links</a> of each organism hit #save links to file for use after this big loop if ($nextPage <= $maxPage) { $browser->submit(); print "submit"; $browser->content; $browser->form('EntrezForm'); } }
In reply to post, return, parse, repeat by ShayShay
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |