Category: Utility
Author/Contact Info Sam Vilain <sam@vilain.net>
Description: Run some commands in parallel, saving the output to some files. An illustrative exercise rather than production code, but fairly easily adaptable. I wrote this a while ago and am posting it so I can refer to an old snippet.
#!/usr/bin/perl
#
# thread pooler (public domain)
#

sub do_action ($@) {
    my ($logfile, @cmd) = (@_);

    print "Spawning $logfile as pid $$ - @cmd...\n";
    open STDOUT, ">>log.$logfile";
    open STDERR, ">&STDOUT";
    open STDIN, "</dev/null";
    exec @cmd;
}


sub do_actions($$@) {
# keep four threads
    my $NUM_THREADS = 4;
    local $active_threads = 0;

# %pidtracker is a mapping of pids to module names
# %modstatus is a mapping of module names to exit codes
    local (%pidtracker, %modstatus, %starttime, %finishtime);

# donelist is added to by the signal handler that reaps children
    local @donelist;
    sub showdone {
 while ($#donelist >= 0) {
     $a=pop @donelist;
     $delta = $finishtime{$pidtracker{$a}} -
  $starttime{$pidtracker{$a}}; 
     if ($modstatus{$pidtracker{$a}} & 255) {
  print "We culled $pidtracker{$a} with signal " .
      "$modstatus{$pidtracker{$a}}\n";
     } else {
  $rtncode = ($modstatus{$pidtracker{$a}} >> 8);
  if ($rtncode == 0) {
      print "Module $pidtracker{$a} returned ok after ${delta}s\n";
  } else {
      print "Module $pidtracker{$a} exited with error code " .
   "$rtncode after ${delta}s\n";
  }
     }
     delete $pidtracker{$a};
 }
    }

    $SIG{CHLD} = sub {  # a lot in a signal handler, but hey.
 my $pid = wait;
 --$active_threads;
 $modstatus{$pidtracker{$pid}} = $?;
 push @donelist, $pid;
 $finishtime{$pidtracker{$pid}} = time();
    };
    $SIG{ALRM} = sub { };

    my ($cmd,$timeout,@modules) = (@_);
    print "Running modules @modules\n";
    @modules = reverse @modules;

    while (($#modules >= 0) or ($active_threads > 0)) {
 if ($active_threads < $NUM_THREADS and ($#modules >= 0)) {
     my $this_module = pop @modules;
     $modstatus{$this_module} = "still running";
     $finishtime{$this_module} = 0;
     my $pid;
     if ($pid = fork) {
  ++$active_threads;
  $pidtracker{$pid} = $this_module;  # running (allocate space so
  # the signal handler doesn't core)
  $starttime{$this_module} = time();
     } else {
  do_action $this_module, "$cmd";
  die; # not reached
     }
 } else {
     for (values %pidtracker) {
  if (not $oldestchild or $oldestchild > $starttime{$_}) {
      $oldestchild = $_;
  }
     }
     $sleeptime = ($oldestchild + $timeout - time());
     if ($sleeptime < 2) {$sleeptime = 2};
     sleep $sleeptime;   # will abort if children return
   }
 
 for (keys %pidtracker) {
     my $age = time() - $starttime{$pidtracker{$_}} - $timeout;
     if ($age > 0) {
  if ($age > 5) {
      print "Killing hard $pidtracker{$_}\n";
      kill "KILL", $_;
  } else {
      print "Killing $pidtracker{$_} (timeout " .
   "${timeout}s)\n";
      kill "TERM", $_;
  }
     }
 }
 pause;
 &showdone;
    }
}

#---------------------------------------------------------------------
+--------
#  MAIN SECTION STARTS HERE
#---------------------------------------------------------------------
+--------

@modules = qw(foo bar baz bat banana snafu fubar as many we like);

do_actions "sleep `perl -e 'print int rand 20'`", 10, @modules;
Replies are listed 'Best First'.
Re (tilly) 1: Run some commands in parallel
by tilly (Archbishop) on Aug 21, 2001 at 05:39 UTC
    Be warned that Perl does not have reliable signal handling - it will occasionally dump core. And the more you put into the signals, the more likely it is to dump core!

    A more reliable approach to a related though slightly different problem is offered at Run commands in parallel. If you want to capture the output you could always open a series of pipes and use select (or IO::Select) to read from them. (Or write to files and read back. etc.)