Baz has asked for the wisdom of the Perl Monks concerning the following question:
#!/usr/bin/perl -w #use strict; use URI; use LWP::UserAgent; use HTTP::Request; use HTTP::Headers; use HTTP::Response; use HTTP::Cookies; use HTTP::Request::Common qw(GET POST); my $cookie_file = "cookies.txt"; my $cookie_jar = HTTP::Cookies->new( file => $cookie_file, autosave => 1); my $url_home = "http://www.bt.co.uk/directory-enquiries/dq_home.jsp"; my $url_search = "http://www.bt.co.uk/directory-enquiries/dq_locationf +inder.jsp"; my $ua = new LWP::UserAgent(); # Get a session ID first my $req = GET $url_home; my $res1 = $ua->request($req); die $res1->as_string() . "\n" if $res1->is_error(); die "Can't find a session ID!\n" unless ($res1->as_string() =~ /BV_Ses +sionID=([^&]+)\&/); my $sessID = $1; die "Can't find an engine ID!\n" unless ($res1->as_string() =~ /BV_Eng +ineID=([^&]+)\&/); my $engID = $1; #die "Can't find a Cookie ID!\n" unless ($res1->as_string() =~ /BV_IDS +=([^;]+)\;/); #my $cookie = $1; #print STDERR "Got session ID $sessID\n"; #print STDERR "Got engine ID $engID\n"; #print STDERR "Got Cookie $cookie\n"; $cookie_jar->extract_cookies($res1); # Save the cookie jar's state print "Cookies: ",$cookie_jar->as_string(),"\n"; $cookie_jar->save($cookie_file); ###################### Start Searching # too lazy for urlencode... $sessID =~ s/\@/%40/g; my $request = POST $url_search, [ QRY => 'res', BV_SessionID => $sessID, BV_EngineID => $engID, new_search => 'true', NAM => 'Griffin', GIV => '', LOC => '', STR => '', PCD => 'BT', limit => '50', CallingPage => 'Homepage', ]; $cookie_jar->load; $cookie_jar->add_cookie_header($request); my $res2 = $ua->request($request); ###################### How many BT** on this page my $pageCount = 0; if( $res2->content =~ /(\d+) of (\d+)/) { print $1," of ",$2,"\n"; $pageCount = $2; } my %count = (); my $content = $res2->content; while($content =~ /pcd\=BT(\d+)/g) { $count{$1}++; } foreach my $keys(sort keys %count) { print $keys,": ",$count{$keys},"\n"; } ###################### Reveal Second Page Results die $res2->as_string() . "\n" if $res2->is_error(); die "Can't find a session ID!\n" unless ($res2->as_string() =~ /BV_SessionID=([^&]+)\&/); $sessID = $1; die "Can't find an engine ID!\n" unless ($res2->as_string() =~ /BV_EngineID=([^&]+)\&/); $engID = $1; print STDERR "Got session ID $sessID\n"; print STDERR "Got engine ID $engID\n"; # too lazy for urlencode... $sessID =~ s/\@/%40/g; $request = POST $url_search, [ QRY => 'res', # BV_SessionID => $sessID, # BV_EngineID => $engID, NAM => 'Griffin', lci => '0', PCD => 'BT', start_id => '50', CallingPage => 'Homepage', ]; my $res3 = $ua->request($request); ################### Print 3 pages to http://baz.perlmonk.org/save.html open (LOG,">save.html"); my $fileOut = $res1->as_string().$res2->as_string().$res3->as_string() +; print LOG "$fileOut";
|
|---|