I haven't been able to get vroom working, although I have manually copied and pasted the pm files into the appropriate c:\perl\site\lib\vroom\search\google.pm and c:\perl\site\lib\vroom\vroom.pm directories, it still says vroom\search\google not found and lists the above directories as to where it should be found, the package I am using is this:
package VROOM::Search::Google;
use strict;
use VROOM::Search qw(escape_query unescape_sequence);
use Time::HiRes qw(gettimeofday);
@VROOM::Search::Google::ISA = qw(VROOM::Search);
sub prepare_request
{
my $self = shift;
my $query = escape_query(shift);
my $params = shift;
my $uri = 'http://www.google.com';
$params->{baseurl} = $uri unless defined $params->{baseurl};
$params->{hl} = 'en' unless defined $params->{hl};
$self->{baseurl} = $uri = $params->{baseurl};
$uri .= '/search?q='.$query;
while (my ($name, $value) = each %$params) {
next if $name =~ /baseurl/;
$uri .= '&'.$name.'='.$value;
}
$self->{initime} = $self->{endtime} = [gettimeofday];
$self->{request} = new HTTP::Request(GET => $uri);
}
sub store_results
{
my $self = shift;
my $res = shift;
$self->{endtime} = [gettimeofday];
if ($res->code != 200) {
$self->{request} = undef;
return undef;
}
#
# Google doesn't return Content-Length,
# so ($res->headers)->content_length will be zero. We're forced to
# use Perl function - length.
#
$self->{fetch}++;
$self->{pgsize} += length($res->content);
#
# If we reach here, HTTP response is OK. Proceed to parse the html
# document for search results
#
my ($HIT, $ENTRY, $NEXT) = (0, 1, 2);
my $rank = $self->count;
my $hits = 0;
my $wish = $HIT;
my $result = undef;
foreach (split(/(<p>|\n|<\/div>)/i, $res->content)) {
next if /^$/; # short circuit for blank lines
last if $wish == $NEXT;
if ($self->count == $self->maximum) {
$self->{request} = undef;
return $hits;
}
#print "#################################################\n";
#print $_, "\n";
#
# Ah,found some results. Get approximate results and wish to
# see the title/url of the first result.
#
if ($wish == $HIT && /Results.*?of.*?([0-9,]+).*?\./i) {
my $count = $1;
$self->approximate($count);
$wish = $ENTRY;
}
#
# Extract the url/title and wish to have abstract text
#
elsif ($wish == $ENTRY &&
/^<a href=(.*?)>(.*?)<\/a><br><font.*?>(.*?)$/i) {
my $url = $1;
my $title = $2;
my $abstract = $3;
$url =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
$url =~ s/(^http:\/\/|\/(index.htm|index.html)*$)//g;
$title =~ s/<.*?>//g;
$result = new VROOM::Search::Result;
$result->url($url);
$result->title(unescape_sequence($title));
$result->text(unescape_sequence($abstract));
$result->rank(++$rank);
$result->engine('Google');
$self->add_result($result);
$self->{pool}->insert($result) if $self->{pool};
$hits++;
}
#
# Extract the url for the next page
#
elsif ($wish == $ENTRY &&
/<td nowrap><a href=(.*?)>.*?<span.*?>Next<\/span><\/a>/i)
+{
$self->{request}->uri($self->{baseurl}.$1);
$wish = $NEXT;
}
}
#
# This is important. It signals the search agent not to fetch more
+ pages.
#
$self->{request} = undef if $wish != $NEXT;
return $hits;
}
1;
__END__
my perl code is this:
#! Perl\bin\perl -w
use VROOM::Search::Google;
open FILE1, "> sample1.txt" or die "$!";
my $oSearch = new VROOM::Search::Google( );
my $sQuery = VROOM::Search::Google::escape_query('"telefonos" "mundial
+"');
$oSearch->native_query($sQuery);
while ( my $oResult = $oSearch->next_result() ) {
print "Adding: ", $oResult->url, "\n";
print FILE1 $oResult->url, "\n";
}
print ref($oSearch);
I have still not been able to figure out how to amend the languages hl = es in tis case instead of en. Any help will be appreciated. |