Something like this might work for you:
#! perl -slw use strict; use threads; use Thread::Queue; use LWP::Simple; ## (2 to 4) * $noOfCores ## depending upon your bandwidth, server response times ## and how hard you feel you should hit them! my $noOfThreads = 10; my $firstURL = 'http://www.example.com/thePage.htm'; sub retrieveInfo { my( $content ) = @_; my $info = parseContent( $content ); ## do something with the info return; } sub listParse { my( $url, $Qout ) = @_; ## Get the first page my $content = get $url; ## find the links and push them onto the queue while( $content =~ m[...]g ) { $Qout->enqueue( $_ ); } ## Push 1 undef per thread to terminate their loops $Qout->enqueue( (undef) x $noOfThreads ); } sub getHTML { my( $Qin ) = @_; ## Read a link while( my $link = $Qin->dequeue ) { ## Fetch the content my $content = get $link; ## And process it retrieveInfo( $content ); } } ## Create the queue my $Qlinks = new Thread::Queue; ## Start the threads. my @threads = map { threads->create( \&getHTML, $Qlinks ); } 1 .. $noOfThreads; ## Fetch and parse the first page; queue the links listParse( $firstURL, $Qlinks ); ## Join the threads $_->join for @threads;
In reply to Re: How to speed up my Html parsing program? (Concurrently run Subroutines?)
by BrowserUk
in thread How to speed up my Html parsing program? (Concurrently run Subroutines?)
by BobFishel
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |