#!/usr/bin/perl use MIME::Base64; use Encode qw(encode); use DBI; use DBI qw(:sql_types); require HTTP::Request; require HTTP::Response; use HTTP::Async; #HTTP::Async timout is broken by default. Check the cpan page for how to fix. It's in the bugs page. my $async = HTTP::Async->new(timeout=>60,slots=>100); # I'm on a terribly slow line use List::MoreUtils; use strict; open PIDFILE, ">$ENV{HOME}/pidfile" or die $!; # will run as user print PIDFILE $$; close PIDFILE; #definition of variables my $db="databasename"; my $host="localhost"; my $user="username"; my $password="password"; my $verbose_logging = -1; while (1 == 1) { sleep rand 3; my @sites; my @db_row; my @response_array; my $dbh = DBI->connect ("DBI:SQLite:$db",'','') or die "Can't connect to database: $DBI::errstr\n"; my $sth = $dbh->prepare( "SELECT `page_url`, `url2`, `idx`, `date_time`, `userid` FROM `queue` LIMIT 100"); $sth->execute( ); while ( my @row = $sth->fetchrow_array ) { push @sites, $row[0]; push @db_row, [$row[0], $row[1], $row[2], $row[3], $row[4]]; } foreach my $site(@sites) { $async->add( HTTP::Request->new( GET => $site ) ); } while ( $async->not_empty ) { if ( my ($response, $id) = $async->wait_for_next_response ) { print $async->info; my $content = $response->decoded_content; my $result_row = $id - 1; my $urlcount = 0; $urlcount++ while ($content =~ m/$db_row[$result_row][1]/gi); $content = encode_base64(encode("UTF-8", $content)); my $QueueExecute = $dbh->prepare( "INSERT INTO cheker (`active`,`page_url`,`url2`,`date_time`,`userid`,`html_source`) VALUES (?,?,?,?,?,?);" ); $QueueExecute->execute($urlcount,$db_row[$result_row][0],$db_row[$result_row][1], $db_row[$result_row][3],$db_row[$result_row][4],$content ); warn "Problem in retrieving results", $sth->errstr( ), "\n" if $QueueExecute->err; print "Inserted record into checker\n" if ($verbose_logging >= 0); my $QueueExecute = $dbh->prepare("DELETE FROM queue WHERE `idx` = ?"); $QueueExecute->execute($db_row[$result_row][2]); print "Deleted row from queue\n" if ($verbose_logging >= 0); warn "Problem in retrieving results", $sth->errstr, "\n" if $QueueExecute->err; warn "Problem in retrieving results", $sth->errstr, "\n" if $sth->err; } else { next; } } }