#! perl -slw use strict; use threads; use threads::shared; use Thread::Queue; use constant NTHREADS => 20; my @itemstoget = (); ## Get the items from somewhere? my $Qwork = new Thread::Queue; $Qwork->enqueue( join chr(0), @{ $_ } ) for @itemstoget; $Qwork->enqueue( (undef) x NTHREADS ); my $Qresults = new Thread::Queue; my $running : shared = 0; threads->new( \&thread, $Qwork, $Qresults )->detach for 1 .. NTHREADS; my $dbh = connectdb( 'blogdb' ); sleep 1 until $Qresults->pending; while( $running or $Qresults->pending ) { ## Modified condition sleep( 1 ), next unless $Qresults->pending; my( $id, $url, $html ) = split chr(0), $Qresults->dequeue; if( $html ne 'FAILED' ) { # if the html meets the criteria for at least one client that claims it, # extract the text if( checkhtml( $dbh, $id, $html ) ) { inserthtml( $dbh, $id, $html ); print "OK $id ",substr( $url, 0, 50 ),"\n"; } else { print "SKIP $url\n"; dosql( $dbh,"update blogitems set getattempts=999 where id=$id" ); } } else { print "FOREIGN or FAILED $url\n"; dosql($dbh,"update blogitems set getattempts=999 where id=$id"); } } $dbh->disconnect; exit; sub thread { { lock $running; ++$running } my( $Qwork, $Qresults ) = @_; my $user_agent = LWP::UserAgent->new; $user_agent->timeout( 30 ); while( my $item = $Qwork->dequeue ) { my( $id, $url, $title, $excerpt ) = split chr(0), $item; my $request = HTTP::Request->new( 'GET', $url ); my $response = $user_agent->request( $request ); my $html = ( $response->is_success and not isforeignlanguage( $response, $title, $excerpt, $url ) ) ? resolve_charset( $response->content ) : 'FAILED'; $Qresults->enqueue( join chr(0), $id, $url, $html ); } undef $user_agent; { lock $running; --$running } }