I get resultsuse strict; use WWW::Mechanize; use Time::Local; use POSIX 'strftime'; use HTML::Strip; #use Whitespace; my($url) = 'http://www.usgbc.org/LEED/Project/RegisteredProjectList.as +px?CMSPageID=243&CategoryID=19&'; my($pageCheck) = ""; my $mech = WWW::Mechanize->new(autocheck =>1); my $hs = HTML::Strip->new(); my($linkName) = 'dgRegProjList$_ctl29$_ctl'; my($linkNumber) = 1; $mech->agent_alias('Linux Mozilla'); $mech->get($url) or die "Page $url can't be reached"; print "Made it past the url test"; my($form) = $mech->forms(); my $menu = $form->find_input("lstLeedRating"); print for $menu->possible_values(); $pageCheck = $mech->click_button(name => "btnSearch"); if($pageCheck->is_success){ print $pageCheck->content; } else { print STDERR $pageCheck->status_line, "\n"; die "Page with those fields not found!"; } my $page = $mech->content; my $clean_text = $hs->parse( $page ); $hs->eof; print $clean_text; for($linkNumber= 2; $linkNumber <= 187; $linkNumber++) { $pageCheck = $mech->click_button(name => 'dgRegProjList$_ctl29 +$_ctl'.$linkNumber); if($pageCheck->is_success){ print $pageCheck->content; } else { print STDERR $pageCheck->status_line, "\n"; die "Page with those fields not found!"; } my $page = $mech->content; my $clean_text = $hs->parse( $page ); $hs->eof; print $clean_text; }
But my output what Im looking for is the business, there info seperated by commas and at end of page advance to next page. With a loop that I thought would work I get No clickable input with name dgRegProjList$_ctl29_ctl2 ... which is the name of the link on the page. Thanks again for your helpentire page #Then the part I Want.... Project Name Owner City State Country LEED Rating System #business under, and I was hoping to get ride of white space.
In reply to Re^2: Web Spider problem
by bauer1sc
in thread Web Spider problem
by bauer1sc
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |