#!\usr\bin\perl use strict; use WWW::Mechanize::Firefox; my $id="DELETED"; my $password="DELETED"; #Activate Agent my $mech = WWW::Mechanize::Firefox->new( activate => 1, bufsize => 1000_000_000, ); #Define set of search terms my $year="1951"; my @month=qw/01 02 03 04 05 06 07 08 09 10 11 12/; my @day=qw/ 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31/; my $month; foreach $month (@month) { my $day; foreach $day (@day) { my $search="CR-$year-$month$day"; print "Searching for document: $search\n"; #Go To Lexis Nexis $mech->get("http://web.lexis-nexis.com.ezp-prod1.hul.harvard.edu/congcomp/form/cong/s_pubbasic.html?_m=62485f04b0083ffbe44503686c0779a2&wchp=dGLzVtb-zSkSA&_md5=9885e06fb7c73a073134e39a0198b6b7"); my $html1=$mech->content; if ($html1=~/\bHarvard University PIN Login\b/) { $mech->form_number(1); $mech->field("__authen_id" ,$id); $mech->field("__authen_password" ,$password); $mech->submit(); $mech->follow_link(n=>6); #follows link to content } else { $mech->reload($bypass_cache); #Fill in Search Form $mech->form_number(1); $mech->field("thes1",$search); $mech->click({xpath=>'/html/body/table/tbody/tr/td[2]/div/div/form/div[2]/div/div[2]/p[2]/a'}); #Check Whether any Results my $html=$mech->content; if ($html=~/\bNo Documents Found\b/) { print "CR-$year-$month$day not found\n\n" } else { #If find results negotiate way to PDF file $mech->follow_link(n=>9); $mech->follow_link(n=>10); $mech->follow_link(n=>18,synchronize=>0); #Download PDF To Disk my $file=$mech->uri(); my $filename="CR$year$month$day.pdf"; $mech->get($file,':content_file'=>$filename, synchronize=>0); print "CR-$year-$month$day downloaded\n\n"; #sleep 2; } } } }