use URI::QueryParam;
use Web::Magic 0.008;
use XML::LibXML 1.94;
Web::Magic
-> new('http://www.google.co.uk/search', q => 'kittens')
-> assert_success
-> assert_content_type('text/html')
-> make_absolute_urls
-> findnodes('//*[@class="r"]/*[local-name()="a"]')
-> foreach(sub{
my $google_munged_url = URI->new($_->{href});
my $fixed_url = $google_munged_url->query_param('sa') eq 'U'
? $google_munged_url->query_param('q')
: $google_munged_url;
printf "%s <%s>\n", $_->textContent, $fixed_url;
});
Obviously you need to make sure that whatever scraping you're doing is allowed by the search engine's terms of service.
perl -E'sub Monkey::do{say$_,for@_,do{($monkey=[caller(0)]->[3])=~s{::}{ }and$monkey}}"Monkey say"->Monkey::do'
|