#!/usr/bin/perl use strict; use WWW::Mechanize; use Win32::OLE qw(in with); use Win32::OLE::Const 'Microsoft Excel'; $Win32::OLE::Warn = 3; # die on errors... # get already active Excel application or open new my $Excel = Win32::OLE->GetActiveObject('Excel.Application') || Win32::OLE->new('Excel.Application', 'Quit'); # open Excel file my $Book = $Excel->Workbooks->Open("C:/Documents and Settings/rto5u/My Documents/CV.xls"); # select worksheet number 1 (you can also select a worksheet by name) my $Sheet = $Book->Worksheets(1); foreach my $row (2..4) { foreach my $col (1..1) { # skip empty cells next unless defined $Sheet->Cells($row,$col)->{'Value'}; my $URL = 'http://scholar.google.com/advanced_scholar_search'; my $FORM_NAME = 'f'; #print "Author Name: "; #chomp ($AUTHOR = <>); my $AUTHOR = "MD Li"; print "Author Name: $AUTHOR\n"; #print "Paper Title: "; #chomp ($TITLE = <>); my $TITLE = $Sheet->Cells($row,$col)->{'Value'}; print "Paper Title: $TITLE\n"; #print "$TITLE"; #my $TITLE = "Region-specific transcriptional response to chronic nicotine in rat brain"; my $mech = WWW::Mechanize->new(stack_depth=>10); $mech->get($URL) || die ("Could not connect to $URL.\n"); my $res = $mech->submit_form( form_name => $FORM_NAME, fields => { 'num' => 100, 'as_epq' => $TITLE, 'as_occt' => 'title', 'as_sauthors' => $AUTHOR, 'as_allsubj' => 'all', }, ); while ($res && $res->is_success()){ my $content = $res->content; #print $content; while ($content =~ /

(.*?)<\/font>\s\s\s/gs){ my $section = $1; my $title = ""; my $citedby = 0; # get title $title = getTitle($section); $title =~ s/<.*?>//g; $title =~ s/…/\.\.\./g; # get citedby # $citedby = getCitedBy($section); if ($citedby){ print "\"$title\"\nCited by: $citedby\n\n"; } } $res = $mech->follow_link( text_regex => qr/Next/i); } } } $Book->Close; ############################################################################# sub getTitle($){ my ($section) = @_; my $title; if ($section =~ /.*?(.*?)<\/a><\/span>/s){ # papers with a link $title = $1; }elsif ($section =~ / (.*?)/s){ # papers w/o a link $title = $1; }else{ $title = $1; } return $title; } #---------------------------------------------------------------------------- sub getCitedBy($){ my ($section) = @_; my $citedby; if ($section =~ />Cited by (\d+)