in reply to Return all the data from child to parent with Parallel::Forkmanager

Here's a way that avoids (for no particular good reason ) writing disk files by using perl's forked open. It also avoids eval (for maybe slightly better reasons ).

#!/usr/bin/perl # http://perlmonks.org/?node_id=1197603 use strict; use warnings; use Data::Dumper; use Storable qw( freeze thaw ); use IO::Select; my $sel = IO::Select->new; my %returndata; # this way you don't need to do an eval my @subs = (\&read_genome, \&read_mapfile, \&read_GTF, \&read_RM); for my $sub (@subs) # start all forks { if(open my $fh, '-|') { $sel->add($fh); $returndata{$fh} = ''; } else # child { print freeze $sub->(); exit; } } while( $sel->count ) # get return data { for my $fh ( $sel->can_read ) { if( 0 >= sysread $fh, $returndata{$fh}, 16 * 1024, length $returndata{$fh} ) { my $answer = thaw delete $returndata{$fh}; $sel->remove($fh); print Dumper $answer; # or whatever you want to do with it } } } sub read_genome { # do something select undef, undef, undef, .1 + rand 1; # simulate processing tim +e return { from => 'read_genome', results => { 1..4} }; } sub read_mapfile { # do something select undef, undef, undef, .1 + rand 1; # simulate processing tim +e return { from => 'read_mapfile', results => { 5..8} }; } sub read_GTF { # do something select undef, undef, undef, .1 + rand 1; # simulate processing tim +e return { from => 'read_GTF', results => { 1..10} }; } sub read_RM { # do something select undef, undef, undef, .1 + rand 1; # simulate processing tim +e return { from => 'read_RM', results => { 2..5} }; }

Prints (in a different order each time it's run)

$VAR1 = { 'results' => { '2' => 3, '4' => 5 }, 'from' => 'read_RM' }; $VAR1 = { 'results' => { '5' => 6, '1' => 2, '9' => 10, '7' => 8, '3' => 4 }, 'from' => 'read_GTF' }; $VAR1 = { 'from' => 'read_genome', 'results' => { '3' => 4, '1' => 2 } }; $VAR1 = { 'from' => 'read_mapfile', 'results' => { '7' => 8, '5' => 6 } };

Replies are listed 'Best First'.
Re^2: Return all the data from child to parent with Parallel::Forkmanager
by marioroy (Prior) on Aug 19, 2017 at 15:54 UTC

    Hi tybalt89,

    Using your solution, I tried adding exception support in the event the worker died.

    #!/usr/bin/perl use strict; use warnings; use feature 'say'; use Storable qw( freeze thaw ); use IO::Select; my $threads = 4; my $sel = IO::Select->new; my %seldata; my @name = ('read_genome', 'read_mapfile', 'read_GTF', 'read_RM'); my @task = (\&read_genome, \&read_mapfile, \&read_GTF, \&read_RM); my %ret; if ($threads == 1) { for my $id (0 .. $#task) { $ret{$id} = {}; $ret{$id}{result} = eval { $task[$id]->() }; $ret{$id}{error} = $@; } } else { # start all forks for my $id (0 .. $#task) { if (open my $fh, '-|') { $sel->add($fh); # parent $seldata{$fh} = ''; } else { my $data = {}; # child $data->{id} = $id; $data->{result} = eval { $task[$id]->() }; $data->{error} = $@; print freeze $data; exit; } } # acquire data while ( $sel->count ) { for my $fh ( $sel->can_read ) { if ( 0 >= sysread $fh, $seldata{$fh}, 16 * 1024, length $seldata{$fh} ) { my $answer = thaw delete $seldata{$fh}; $sel->remove($fh); $ret{ $answer->{id} } = { result => delete $answer->{result}, error => delete $answer->{error} }; } } } } sub read_genome { # do something return { 'aa' => 'bb' }; } sub read_mapfile { # do something return { 'cc' => 'dd' }; } sub read_GTF { # do something die 'exception'; return { 'ee' => 'ff' }; # not reached } sub read_RM { # do something return { 'gg' => 'hh' }; } # use data generated in the subroutines use Data::Dumper; for my $id (0 .. $#task) { say "## ", $name[$id]; if (length $ret{$id}{error}) { say "ERROR: ", $ret{$id}{error}; next; } say Dumper($ret{$id}{result}); }

    Output:

    ## read_genome $VAR1 = { 'aa' => 'bb' }; ## read_mapfile $VAR1 = { 'cc' => 'dd' }; ## read_GTF ERROR: exception at j0.pl line 71. ## read_RM $VAR1 = { 'gg' => 'hh' };

    Regards, Mario