I changed the code to use sqlite (and fixed a few problems on the way such as that I could serve you SQL if I knew you're crawling my site :)) and it works just fine here. What do you mean by "jumbled" contents anyway?
Here's what works for me:
#!/usr/bin/perl
use MIME::Base64;
use Encode qw(encode);
use DBI;
use DBI qw(:sql_types);
require HTTP::Request;
require HTTP::Response;
use HTTP::Async;
#HTTP::Async timout is broken by default. Check the cpan page for how
+ to fix. It's in the bugs page.
my $async = HTTP::Async->new(timeout=>60,slots=>100); # I'm on a ter
+ribly slow line
use List::MoreUtils;
use strict;
open PIDFILE, ">$ENV{HOME}/pidfile" or die $!; # will run as user
print PIDFILE $$;
close PIDFILE;
#definition of variables
my $db="databasename";
my $host="localhost";
my $user="username";
my $password="password";
my $verbose_logging = -1;
while (1 == 1)
{
sleep rand 3;
my @sites;
my @db_row;
my @response_array;
my $dbh = DBI->connect ("DBI:SQLite:$db",'','')
or die "Can't connect to database: $DBI::errstr\n";
my $sth = $dbh->prepare( "SELECT `page_url`, `url2`, `idx`, `date_
+time`, `userid` FROM `queue` LIMIT 100");
$sth->execute( );
while ( my @row = $sth->fetchrow_array ) {
push @sites, $row[0];
push @db_row, [$row[0], $row[1], $row[2], $row[3], $row[4]];
}
foreach my $site(@sites) {
$async->add( HTTP::Request->new( GET => $site ) );
}
while ( $async->not_empty ) {
if ( my ($response, $id) = $async->wait_for_next_response ) {
print $async->info;
my $content = $response->decoded_content;
my $result_row = $id - 1;
my $urlcount = 0;
$urlcount++ while ($content =~ m/$db_row[$result_row][1]/g
+i);
$content = encode_base64(encode("UTF-8", $content));
my $QueueExecute = $dbh->prepare(
"INSERT INTO cheker (`active`,`page_url`,`url2`,`date_
+time`,`userid`,`html_source`)
VALUES (?,?,?,?,?,?);"
);
$QueueExecute->execute($urlcount,$db_row[$result_row][0],$
+db_row[$result_row][1],
$db_row[$result_row][3],$db_row[$result_row][4],$conte
+nt
);
warn "Problem in retrieving results", $sth->errstr( ), "\n
+" if $QueueExecute->err;
print "Inserted record into checker\n" if ($verbose_loggin
+g >= 0);
my $QueueExecute = $dbh->prepare("DELETE FROM queue WHERE
+`idx` = ?");
$QueueExecute->execute($db_row[$result_row][2]);
print "Deleted row from queue\n" if ($verbose_logging >= 0
+);
warn "Problem in retrieving results", $sth->errstr, "\n" i
+f $QueueExecute->err;
warn "Problem in retrieving results", $sth->errstr, "\n" i
+f $sth->err;
} else {
next;
}
}
}
SQLite stuff: $ sqlite3 databasename
CREATE TABLE cheker (active tinyint, page_url varchar, url2 varchar, d
+ate_time datetime, userid integer, html_source text);
CREATE TABLE queue(page_url varchar, url2 varchar, idx integer, date_t
+ime datetime, userid integer);
insert into queue values ("http://google.com/","",1,date(),1);
insert into queue values ("http://gmx.com/","",1,date(),1);
insert into queue values ("http://twitter.com/","",1,date(),1);
To test:
sqlite3 -list databasename "select html_source from cheker"|base64 -d|less |