in reply to Re^3: searching via www::search on alltheweb
in thread searching via www::search on alltheweb

I haven't been able to get vroom working, although I have manually copied and pasted the pm files into the appropriate c:\perl\site\lib\vroom\search\google.pm and c:\perl\site\lib\vroom\vroom.pm directories, it still says vroom\search\google not found and lists the above directories as to where it should be found, the package I am using is this:
package VROOM::Search::Google; use strict; use VROOM::Search qw(escape_query unescape_sequence); use Time::HiRes qw(gettimeofday); @VROOM::Search::Google::ISA = qw(VROOM::Search); sub prepare_request { my $self = shift; my $query = escape_query(shift); my $params = shift; my $uri = 'http://www.google.com'; $params->{baseurl} = $uri unless defined $params->{baseurl}; $params->{hl} = 'en' unless defined $params->{hl}; $self->{baseurl} = $uri = $params->{baseurl}; $uri .= '/search?q='.$query; while (my ($name, $value) = each %$params) { next if $name =~ /baseurl/; $uri .= '&'.$name.'='.$value; } $self->{initime} = $self->{endtime} = [gettimeofday]; $self->{request} = new HTTP::Request(GET => $uri); } sub store_results { my $self = shift; my $res = shift; $self->{endtime} = [gettimeofday]; if ($res->code != 200) { $self->{request} = undef; return undef; } # # Google doesn't return Content-Length, # so ($res->headers)->content_length will be zero. We're forced to # use Perl function - length. # $self->{fetch}++; $self->{pgsize} += length($res->content); # # If we reach here, HTTP response is OK. Proceed to parse the html # document for search results # my ($HIT, $ENTRY, $NEXT) = (0, 1, 2); my $rank = $self->count; my $hits = 0; my $wish = $HIT; my $result = undef; foreach (split(/(<p>|\n|<\/div>)/i, $res->content)) { next if /^$/; # short circuit for blank lines last if $wish == $NEXT; if ($self->count == $self->maximum) { $self->{request} = undef; return $hits; } #print "#################################################\n"; #print $_, "\n"; # # Ah,found some results. Get approximate results and wish to # see the title/url of the first result. # if ($wish == $HIT && /Results.*?of.*?([0-9,]+).*?\./i) { my $count = $1; $self->approximate($count); $wish = $ENTRY; } # # Extract the url/title and wish to have abstract text # elsif ($wish == $ENTRY && /^<a href=(.*?)>(.*?)<\/a><br><font.*?>(.*?)$/i) { my $url = $1; my $title = $2; my $abstract = $3; $url =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; $url =~ s/(^http:\/\/|\/(index.htm|index.html)*$)//g; $title =~ s/<.*?>//g; $result = new VROOM::Search::Result; $result->url($url); $result->title(unescape_sequence($title)); $result->text(unescape_sequence($abstract)); $result->rank(++$rank); $result->engine('Google'); $self->add_result($result); $self->{pool}->insert($result) if $self->{pool}; $hits++; } # # Extract the url for the next page # elsif ($wish == $ENTRY && /<td nowrap><a href=(.*?)>.*?<span.*?>Next<\/span><\/a>/i) +{ $self->{request}->uri($self->{baseurl}.$1); $wish = $NEXT; } } # # This is important. It signals the search agent not to fetch more + pages. # $self->{request} = undef if $wish != $NEXT; return $hits; } 1; __END__
my perl code is this:
#! Perl\bin\perl -w use VROOM::Search::Google; open FILE1, "> sample1.txt" or die "$!"; my $oSearch = new VROOM::Search::Google( ); my $sQuery = VROOM::Search::Google::escape_query('"telefonos" "mundial +"'); $oSearch->native_query($sQuery); while ( my $oResult = $oSearch->next_result() ) { print "Adding: ", $oResult->url, "\n"; print FILE1 $oResult->url, "\n"; } print ref($oSearch);
I have still not been able to figure out how to amend the languages hl = es in tis case instead of en. Any help will be appreciated.