Sometimes i just like to take out the bigger hammer, or the chainsaw if you like
This saves the returned data to files in Data::Dumper form that you can view with an editor, and it can reuse parts so you dont have to start at the beginning each time. But i used the big hammer rather than HTML::Form too.use strict; use warnings; use LWP; use HTML::Entities qw/decode_entities/; use Data::Dumper; my $ua; $ua = new LWP::UserAgent(keep_alive=>1 ,agent =>"Mozilla/5.0 (Windows NT 5.1; rv:51 +.0) Gecko/20100101 Firefox/51.0" # ,ssl_opts => { verify_hostname => 0 # , SSL_verify_mode => 'SSL_VERI +FY_NONE' # } # ,timeout=>$timeout ); # use HTTP::Cookies::Mozilla; # $cookie_jar_obj = HTTP::Cookies::Mozilla->new(file=>$cookie_file); # $ua->cookie_jar( $cookie_jar_obj ); my $response; my $response2; my $reuse1=0; my $reuse2=0; if ($reuse2) {$reuse1=1}; unless ($reuse1) { $response=getter('http://www.appaloosa.com/web/mbrverify.aspx'); + saver('mbrverify.txt',$response); } else { $response=unsaver('mbrverify.txt'); } #print Dumper($response); unless ($reuse2) { my $res=form_parser1($response); # print Dumper($res); my $form=$res->[0]; my $mynumber=0; # id number $form->{pairs}{txtMemNumA}=$mynumber; # print Dumper($res); $response2=backer($response,$form); saver('mbrverify2.txt',$resp +onse2); } else{ $response2=unsaver('mbrverify2.txt'); } #print Dumper($response2); exit; sub getter { my $uri=shift; my $args=shift; # array ref of args my $page = URI->new($uri); if ($args) { $page->query_form( @$args); } my $req = new HTTP::Request (GET => $page); my $response = $ua->request ($req); unless ($response->is_success) { die $response->status_line; } return $response; } # getter sub poster { my $uri=shift; my $vars=shift; # array ref of args my $page = URI->new($uri); $response = $ua->post( $page, $vars); unless ($response->is_success) { die $response->status_line; } return $response; } # poster sub form_parser1 { my $response=shift; my $page=decode_entities($response->decoded_content); my @forms=$page=~m!(<form.+?</form.*?>)!msg; my $res=[]; for my $form (@forms){ my ($fid)=$form=~m!(<form.+?>)!msg; my $fh={}; ($fh->{method})=$fid=~m!method="(.*?)"!ms; ($fh->{action})=$fid=~m!action="(.*?)"!ms; my @inputs=$form=~m!(<input.+?/>)!msg; my $infields=[]; my $inhash={}; $fh->{pairs}=$inhash; for my $input (@inputs) { my ($name)=$input=~m!name="(.+?)"!i; my ($value)=$input=~m!value="(.+?)"!i; $inhash->{$name}=$value; push @$infields,$name; } # input $fh->{infields}=$infields; push @$res,$fh; } # form return $res; } # parser sub backer { # only handles post and get my $responseto=shift; my $form=shift; # my $base=$responseto->base; # gives Can't locate object method "sc +heme" via package "URI::http" my $base=${$responseto->{_request}{_uri}}; my $page = URI->new_abs($form->{action}, $base); my $response; my $vars=[]; my $hash=$form->{pairs}; for my $id (@{$form->{infields}}) { push @$vars,$id,$hash->{$id}; } if (uc($form->{method}) eq 'POST') { return poster($page,$vars); } # post else{ return getter($page,$vars); } # get return $response; } # backer sub saver { my $fn=shift; my $response=shift; open (my $rep,'>',$fn); binmode $rep, ":encoding(UTF-8)"; local $Data::Dumper::Deepcopy=1; local $Data::Dumper::Purity=1; local $Data::Dumper::Sortkeys=1; local $Data::Dumper::Indent=1; print $rep Dumper($response); close $rep; } #saver sub unsaver { my $fn=shift; open (my $rep,'<',$fn); binmode $rep, ":encoding(UTF-8)"; my $stuff=do { local $/;<$rep>}; close $rep; { no strict; return eval ($stuff) ; } } # unsaver
I left in some commented stuff i might have otherwise used as comments. If you were logged in via firefox using cookies may pass you by the verification stage. What i got back changed based on my agent. If it were https and not passing CA check my bigger hammer method is to bypass checking.
In reply to Re: Scraping an ASP form I don't have any control over
by huck
in thread Scraping an ASP form I don't have any control over
by wveagle81
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |