#!/usr/bin/perl -w use strict; use Data::Dumper; use HTML::TokeParser; use URI; use LWP::UserAgent; use HTTP::Request; use HTTP::Headers; use HTTP::Response; use HTTP::Cookies; use HTML::LinkExtor; use HTTP::Request::Common qw(GET POST); my $name = "Griffin"; my $WHATWORKS = "http://www.bt.co.uk/directory-enquiries/dq_home.jsp"; $WHATWORKS = URI->new($WHATWORKS); my $cookie_file = "cookies.txt"; my $cookie_jar = HTTP::Cookies->new( file => $cookie_file, autosave => 1, ignore_discard => 1, # IMPORTANT!!!!!!!!!!!! ); my $url_search; my $url_home = "http://www.bt.co.uk/directory-enquiries/dq_home.jsp"; my $ua = new LWP::UserAgent(); $ua->agent( "Mozilla/8.0(${^O};retmaspod)" ); $ua->cookie_jar( $cookie_jar ); ########################################################### ## Get Main Search Page - this page contains the engine and session ids my $req = GET $url_home; my $res = $ua->request( $req ); open (LOG,">save.html"); my $fileOut = $res->content(); print LOG "$fileOut"; ########################################################### ## Get 1st Page of Results my %FORMOLA; ParseIt( \$res->{_content} ); # Get Ids $WHATWORKS->query_form( BV_EngineID => $FORMOLA{BV_EngineID}, BV_SessionID => $FORMOLA{BV_SessionID}, QRY => "res", new_search => "true", NAM => $name, PCD => "BT", limit => "50", CallingPage => "Homepage", STR => "", LOC => "", GIV => "" ); warn Dumper{ $WHATWORKS->query_form}; ########################################################### ## Get Sebsequent page for this name search $url_search = $WHATWORKS; $req = GET $url_search; $res = $ua->request($req); $fileOut = $res->content(); print LOG "$fileOut"; ## find how many page in this search by maching Page 1 of * if( $res->content() =~ /Page (\d+) of (\d+)/) { print "\nPages: $2"; } # for each remaining page for(my $i=1;$i<$2;$i++) { my $startId = $i*50; my $WHATWORKS2 = "http://www.bt.co.uk/directory-enquiries/dq_home.jsp?Homepage&start_id=25&lci=0&QRY=res&NAM=Griffin&PCD=BT"; print "\nhttp://www.bt.co.uk/directory-enquiries/dq_home.jsp?Homepage&start_id=25&lci=0&QRY=res&NAM=Griffin&PCD=BT", $WHATWORKS2 = URI->new($WHATWORKS2); $url_search = $WHATWORKS2; $req = GET $url_search; $res = $ua->request($req); $fileOut = $res->content(); print LOG "$fileOut"; } sub ParseIt { my $p = new HTML::TokeParser( $_[0] ); while(my $t = $p->get_token() ) { my $ttype = shift @{ $t }; if($ttype eq "S") # start tag? { my($tag, $attr, $attrseq, $rawtxt) = @{ $t }; if($tag eq 'input' && $attr->{type} eq 'hidden' ) { $FORMOLA{ $attr->{name} } = $attr->{value} } } } }