hhheng has asked for the wisdom of the Perl Monks concerning the following question:

I want the script to posting data to more than 1000 sites and grab the posting result. However, it will always stop around 100 from the following script
//This is just rough listings of my code, not exactly what it //is, I' +m just listing here to show my problems my $counter = 0; my $sth; my %form; my $sql = "SELECT name,url,sub_url,pr,link_type,dir_type,cb_name,cb_va +lue FROM free_dirs"; //Totally there are more than 1000 urls in the free_dirs table $sth = $dbh->prepare($sql); $sth->execute(); while ($db_row = $sth->fetchrow_hashref()) { my ( $name, $url, $sub_url, $pr, $link_type, $dir_type, $cb_name, $c +b_value ) = @{$db_row}{qw/name url sub_url pr link_type dir_type cb_n +ame cb_value/}; my $timeout = 30; //Set timeout as 30 seconds if fetching content of a site without succ +ess %form = (); $counter ++; my $page_content = grab_page_content($sub_url, $timeout); //Get the +content of the url if($page_content){ my ($cat_name, $cat_id); if($dir_type eq 'PHPLD_0'){ ($cat_name, $cat_id) = grab_cat_from_submit_page($page_content); }else{ $page_content = grab_page_content($url, $timeout); ($cat_name, $cat_id) = grab_cat($url, $page_content); } if(($cat_name, $cat_id)){ //Add data to %form here for posting $SIG{ALRM} = sub {print "timeout"}; my $res; eval{ alarm($timeout); $res = $ua -> post($sub_url, \%form); alarm(0); }; $page_content = $res->content; //print out the post result based on $page_content } } } sub grab_cat_from_submit_page{ //Grab cat name and id here //This sub will not call grab_page_content function } sub grab_cat{ my ($url, $page_content) = @_; my $timeout = 30; //Grab cat name and id here //This sub will call grab_page_content from 1 to maximum 10 times //I think this is the reason caused the problem } sub grab_page_content{ my($url, $timeout) = @_; my $res; eval{ local $SIG{ALRM} = sub { print "timeout"; }; alarm($timeout); $res = $ua -> get($url); alarm(0); }; $page_content = $res->content if $res->is_success; return $page_content; }
I tried to use Sys::SigAction in the grab_page_content as below, still same problem.
sub grab_page_content { my ($url, $method, $timeout) = @_; use Sys::SigAction qw( timeout_call ); our $res = undef; if( timeout_call( 5, sub {$res = $ua->get($url);}) ) { $res = HTTP::Response->new( 408 ); #408 is the HTTP timeout } my $page_content = $res->content if $res->is_success; #For phpld directories, if script error, the page will not output +html tags $page_content = 0 if $page_content !~ m{<html}si; return $page_content; }
Can anybody give me some solutions?

Replies are listed 'Best First'.
Re: Problem with timeout
by roboticus (Chancellor) on May 19, 2010 at 05:52 UTC
    A reply falls below the community's threshold of quality. You may see it by logging in.
Re: Problem with timeout
by BrowserUk (Patriarch) on May 19, 2010 at 12:33 UTC