#!/usr/bin/perl
use WWW::Mechanize;
#!/usr/bin/perl -w
use strict;
use Win32::OLE qw(in with);
use Win32::OLE::Const 'Microsoft Excel';
$Win32::OLE::Warn = 3; # die on errors.
+..
# get already active Excel application or open new
my $Excel = Win32::OLE->GetActiveObject('Excel.Application')
|| Win32::OLE->new('Excel.Application', 'Quit');
# open Excel file
my $Book = $Excel->Workbooks->Open("C:/Documents and Settings/rto5u/My
+ Documents/CV.xls");
# select worksheet number 1 (you can also select a worksheet by name)
my $Sheet = $Book->Worksheets(1);
foreach my $row (2..4)
{
foreach my $col (1..1)
{
# skip empty cells
next unless defined $Sheet->Cells($row,$col)->{'Value'};
my $URL = 'http://scholar.google.com/advanced_scholar_search';
my $FORM_NAME = 'f';
#print "Author Name: ";
#chomp ($AUTHOR = <>);
my $AUTHOR = "MD Li";
#print "Paper Title: ";
#chomp ($TITLE = <>);
my $TITLE = $Sheet->Cells($row,$col)->{'Value'};
print "$TITLE";
#my $TITLE = "Region-specific transcriptional response to chro
+nic nicotine in rat brain";
my $mech = WWW::Mechanize->new(stack_depth=>10);
$mech->get($URL) || die ("Could not connect to $URL.\n");
my $res = $mech->submit_form(
form_name => $FORM_NAME,
fields => {
'num' => 100,
'as_epq' => $TITLE,
'as_occt' => 'title',
'as_sauthors' => $AUTHOR,
'as_allsubj' => 'all',
},
);
while ($res && $res->is_success()){
my $content = $res->content;
#print $content;
while ($content =~ /<p class=g>(.*?)<\/font>\s\s\s/gs){
my $section = $1;
my $title = "";
my $citedby = 0;
# get title
$title = getTitle($section);
$title =~ s/<.*?>//g;
$title =~ s/…/\.\.\./g;
# get citedby #
$citedby = getCitedBy($section);
if ($citedby){
print "\"$title\"\nCited by: $citedby\n\n";
}
}
$res = $mech->follow_link( text_regex => qr/Next/i);
}
}
}
$Book->Close;
######################################################################
+#######
sub getTitle($){
my ($section) = @_;
my $title;
if ($section =~ /<span class="w">.*?<a href.*?>(.*?)<\/a><\/span>/
+s){ # papers with a link
$title = $1;
}elsif ($section =~ / (.*?)<font size=-1>/s){ # pa
+pers w/o a link
$title = $1;
}else{
$title = $1;
}
return $title;
}
#---------------------------------------------------------------------
+-------
sub getCitedBy($){
my ($section) = @_;
my $citedby;
if ($section =~ />Cited by (\d+)</s){
$citedby = $1;
}
return $citedby;
}
#---------------------------------------------------------------------
+-------
The two programs work separately. I tried to put the fetch.pl program within the for loop that goes through the paper titles in the excel spreadsheet...I tried trouble shooting the best I could, but the problem consistently turns about to be, "Can't call method "url" on an undefined value at C:/strawberry/perl/site/lib/WWW/Mechanize.pm line 707" Again I apologize for my lack of familiarity with the customs of this forum. I'm not necessarily new to programming, but I am extremely new to Perl syntax |