sub ReadURLS { my ($queue,$filename) = @_; open my $urlFile,'<',$filename or die "ReadURLS: bad file!: $!" #Use three arg open for security reasons, die on errors so we don't spew nonsense or crash worse later. $queue->enqueue(<$urlFile>,undef); #Place each line into the queue, followed by undef to signal the end of data. return 1; # Success! Return true. Or, if you're a unixy person, return 0 or maybe even 0 but true. } # Inqueue should be the queue object passed to ReadURLS # Outqueue should be the queue object passed to ParseContent sub DownloadContent { my ($outqueue,$inqueue) = @_; # Each thread needs their own UA my $ua = LWP::UserAgent->new; $ua->agent("Mozilla/4.0 (compatible; MSIE 5.0; Windows 98; DigExt)"); $ua->timeout(15); while (my $url = $inqueue->dequeue){ #wait for data and abort when undef comes down the pipe (that means theres no more) #this part should look familiar print "Downloading: $url\n"; my $req = HTTP::Request->new(GET => $url); my $response = $ua->request($req); $outqueue->enqueue($response->content()); #this changes, send the output to the next task handler. } $outqueue->enqueue(undef); return 1; # See above return } # inqueue should be the outqueue from the downloader sub # outqueue should be passed to the output sub. # regex is of course, your regex. This allows for re-use of the code. You could also consider taking some parsing rules and using and HTML parser of some type... sub ParseContent { my ($outqueue,$inqueue,$regex) = @_; while (my $content = $inqueue->dequeue) { $outqueue->enqueue(join '',$content =~ m/$regex/m,"\n"); } $outqueue->enqueue(undef); return 1; } # queue should be the outqueue passed to ParseContent sub WriteOut { my ($queue,$filename) = @_; open my $outFH,'>>',$filename or die "WriteOut: open failed: $!"; while (my $data = $queue->dequeue) { print $outFH $data; } close $outFH; return 1; } #### my $nr_workers = 5; #set this value for the number of side by side downloaders and parsers. Better yet, take it as an argument my $urlfile = "url_planets.txt"; # see comment about arguments my $outfile = "planet_names.txt"; # arguments are nice here too, but not the current point my ($URLQueue,$ContentQueue,$ParsedQueue); $URLQueue = new Thread::Queue; $ContentQueue = new Thread::Queue; $ParsedQueue = new Thread::Queue; my @threadObjs; push @threadObjs,threads->create(&ReadURLS,$URLQueue,$urlfile); #create the reading thread, and store areference to it in the threadObjs array, this will be important later # Set up the workers, any number of them can manipulate the queues. for (1..$nr_workers) { push @threadObjs,threads->create(&DownloadContent,$ContentQueue,$URLQueue); push @threadObjs,threads->create(&ParseContent,$ParsedQueue,$ContentQueue,qr!Rotations(.*)!); } push @threadObjs,threads->create(&WriteOut,$ParsedQueue,$outfile); # Now that all the threads are created, the main thread should call join on all of its child thread objects to ask perl to clean up after them, and so it doesn't exit before they're done causing an abrupt termination. foreach my $thr (@threadObjs) { $thr->join(); # Join can have a return value, but checking it adds overhead, only if you really need to } # At this point, barring some horrible catastrophe, the specified $outfile should have the desired output.