use strict; use WWW::Mechanize; use Time::Local; use POSIX 'strftime'; use HTML::Strip; my($url) = 'http://www.usgbc.org/LEED/Project/RegisteredProjectList.aspx?CMSPageID=243&CategoryID=19&'; my($pageCheck) = ""; my $mech = WWW::Mechanize->new(autocheck =>1); my($dirname) = "C:\\Program Files\\Perl Express\\Scripts\\"; my($fname) = "output.txt"; my($x) = 0; open(OUT , ">$dirname$fname") || die "Cant open file"; #initialization to strip HTML from document my $hs = HTML::Strip->new(); $mech->agent_alias('Linux Mozilla'); $mech->get($url) or die "Page $url can't be reached"; print "Made it past the url test\n"; my($form) = $mech->forms(); #selects all options from the lstLeedRating Field my $menu = $form->find_input("lstLeedRating"); #clicks the BtnSearch $pageCheck = $mech->click_button(name => "btnSearch"); my $pageCheck2 = ""; $pageCheck2 = $mech -> submit_form( with_fields => { '__EVENTTARGET' => 'dgRegProjectList:_ctl29:_ctl7', }, ); if($pageCheck2->is_success){ print $pageCheck2->content; #print $pageCheck->content; } else { print STDERR $pageCheck2->status_line, "\n"; die "Page with those fields not found!"; } my $page = $mech->content; my $clean_text = $hs->parse( $page ); $hs->eof; $clean_text =~s/.*(multiple LEED Rating Systems)//s; my @lines = split /\n/, $clean_text; for (@lines){ s/^\s+//; s/\s+$//; s/\s+/ /g; } $clean_text =~s/(2  3  4).*//s; my @lines2 = split /\n/, $clean_text; for (@lines2){ s/^\s+//; s/\s+$//; s/\s+/ /g; } my $clean = join "\n", @lines2; print OUT $clean; close OUT;