#set up #!c:/perl/bin/perl -w use WWW::Mechanize; cookie_jar => {}; use HTML::TokeParser; $mech = WWW::Mechanize->new(); $mech->agent_alias('Windows IE 6'); #open CEO list and create array for the list sub search { foreach $exec (@exec) { chop($exec); if ($exec=~m{(\d\d)[- /.](\d\d)[- /.](\d\d\d\d)}) { $datee="$1/$2/$3"; $dateyb=$3-1; $datemb=$1+1; $dateb="$datemb/01/$dateyb"; } if ($exec=~m/((\w+)\s*\(w+)/) { $name=$1 $2; $lname=$2; } #check the array #print "@exec"; #get to the web page $mech->get("http://public.kenan-flagler.unc.edu/applications/resources/user/search.cfm?searchtype=alphasearch¤talpha=F"); $mech->follow("Factiva"); #this gets me to the search page #I have to go to each link because it forwards me to the next webpage #and checks for authorization #this was trial and error for me $mech->submit(); $mech->get("http://global.factiva.com/en/sess/login.asp?"); $mech->get("http://global.factiva.com/sb/default.aspx?NAPC=S&fcpil=en"); #then I check to make sure I am on the right page $results=$mech->content(); print $results; #these are my search terms my $searchterm1="(rst=(BW OR FB OR FORTU OR NYTF OR TIMAG OR J OR EC OR FTFT)) AND (($name) AND (CEO OR Chief OR Executive)) AND ($lname)/n20/(optimis* OR confident OR confidence) NOT (litigat* OR investigat* OR bankrupt* OR merge* OR fraud) AND (date from $dateb to $datee)"; my $searchterm2="(rst=(BW OR FB OR FORTU OR NYTF OR TIMAG OR J OR EC OR FTFT)) AND (($name) AND (CEO OR Chief OR Executive)) AND ($lname)/n20/(reliable OR cautious OR conservative OR practical OR frugal OR steady) NOT (litigat* OR investigat* OR bankrupt* OR merge* OR fraud) AND (date from $dateb to $datee)"; my $searchterm3="(rst=(BW OR FB OR FORTU OR NYTF OR TIMAG OR J OR EC OR FTFT)) AND (($name) AND (CEO OR Chief OR Executive)) AND (litigat* OR investigat* OR bankrupt* OR merge* OR fraud) AND (date from $dateb to $datee)"; my $searchterm4="(rst=(BW OR FB OR FORTU OR NYTF OR TIMAG OR J OR EC OR FTFT)) AND (($name) AND (CEO OR Chief OR Executive)) AND (date from $dateb to $datee)"; #run a sub routine for the search terms and output files #search sub routine requires two arguments ($searchterm#,$datafile) &search($searchterm1,"c:/scripts/datao.txt"); &search($searchterm2,"c:/scripts/datano.txt"); &search($searchterm3,"c:/scripts/dataprob.txt"); &search($searchterm4,"c:/scripts/datatot.txt"); sub search { foreach $exec (@exec) { if ($exec=~m{(\d\d)[- /.](\d\d)[- /.](\d\d\d\d)}) { $datee="$1/$2/$3"; $dateyb=$3-1; $datemb=$1+1; $dateb="$datemb/01/$dateyb"; } if ($exec=~m/((\w+)\s*\(w+)/) { $name=$1 $2; $lname=$2; } # # #this is where I am having the problem #because the page is in javascript the mechanize #module doesn't recognize the form #I need some way to work around this #or some suggestions for other approaches # # $mech->form_name("PageBaseForm"); $mech->select("dr","_Unspecified"); $mech->select("sfd",""); $mech->tick("istensfn_bool","True"); $mech->tick("ister_bool","True"); $mech->tick("isteo_bool","True"); $mech->field("Run Search",$_[0]); $mech->submit(); #if I could get here I think it would work to get the #results from the search $results=$mech->content(); if ($results=~m/Headlines\s*\d+\s*'-'\s*\d+\s*of\s*(\d+)/i) { $count=$1; } else { $count='0'; } #print the output to a file open(DAT,">>$datafile"); print DAT "$name $datee $count"; } #close the file when I am done with this search close(DAT); #end the subroutine } #### Here is a small part of the code where I think I should be getting what I need, but you probably are all familiar with this type of code (while I am not)... . . .