This is untested on a multicore machine, but should be free from any deadlocking or syncing issues.
And it will definitely need tuning for best performance, but you could try this on your data and see how you fare?
Run as: scriptname inputfile >outputfile
#! perl -sw use strict; use threads; use threads::shared; use Thread::Queue; sub worker { my( $Qin, $outRef ) = @_; while( my $in = $Qin->dequeue ) { my( $no, $text ) = split ':', $in, 2; ########## Manipulate data below ######## my $out = $text; lock $outRef; $outRef->[ $no ] = $out; } } my @out :shared; my $Qin = new Thread::Queue; my @threads = map threads->create( \&worker, $Qin, \@out ), 1 .. 4; my $last :shared = 0; async { while( <> ) { sleep 1 while $Qin->pending > 100; $Qin->enqueue( "$.:$_" ); lock $last; $last = $.; } }; my $next = 1; while( threads->list( threads->running ) ) { sleep 1 until defined $out[ $next ]; print $out[ $next ]; undef $out[ $next++ ]; } while( $next < $last ) { sleep 1 until defined $out[ $next ]; print $out[ $next ]; undef $out[ $next++ ]; } $_->join for @threads;
In reply to Re: How do you parallelize STDIN for large file processing?
by BrowserUk
in thread How do you parallelize STDIN for large file processing?
by forsaken75
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |