my %urls; #hash to contain all the urls my @unique_urls; #Array contains all the urls for iteration my $base = "http://www.somedomain.com"; my $mech = WWW::Mechanize->new; $mech->get($base); #### Start point of %urls and for start crawling %urls = my_own_sub($mech->links); #My own sub to process&extract links from the page, with key is the link @unique_urls = keys %urls; lock_store \%url, 'url_ref'; lock_store \@unique_url, 'unique_ref'; my @child_pids; for($i=0; $i<10; $i++){ $pid = fork(); push @child_pids, $pid; die "Couldn't fork $!" unless defined $pid; unless($pid){ $url_ref = lock_retrieve('url_ref'); $unique_ref = lock_retrieve('unique_ref'); print "Number url: ", scalar(keys %$url_ref), "num-unique_url: ", scalar(@$unique_ref), "\n"; while($cnt<100 && (my $u=shift @$unique_ref)){ #each fork maximum process 100 urls; $mech->get($u); %links = my_own_sub($mech->links); foreach my $link(sort keys %links){ next if existed $url_ref->{$link}; push @$unique_ref, $link; $url_ref->{$link} = 1; } } lock_store $url_ref, 'url_ref'; lock_store $unique_ref, 'unique_ref'; sleep(1); exit(0); } } waitpid($_, 0) foreach @child_pids; $url_ref = lock_retrieve('url_ref'); $unique_ref = lock_retrieve('unique_ref'); print $_, "\n" foreach(sort keys %$url_ref); print "Number of links left to be crawled: ", scalar(@$unique_ref), "\n";