I solved my problem, all I needed to do was move the print statement before the fork. It prints out all the commands now. I'll try the exec function, and make sure it works.
I only included the forking subroutine in the previous post. For the sake of completeness here is a condensed (runnable) version of my code:
#!/usr/bin/perl
use IO::File;
use POSIX ":sys_wait_h";
open(JUNKD,">test_rsh-commands.txt");
# Phase 1: Setup phase to spawn jobs
&spawn_jobs(1,handle_child);
for($i=1;$i<=24;$i++) # BEGIN: loop
{
$proc = "sp" . "$i"; # remote node to run command on
$cmd = "date"; # simple test command
# Phase 2: Spawn the jobs
&spawn_jobs(2,$proc,$cmd,1,2,3);
} # END: loop
# Phase 3: Wait for the jobs to finish
&spawn_jobs(3,26);
close JUNKD;
## BEGIN: Spwan children jobs on slave nodes ##
sub spawn_jobs
{
my @a=@_;
my $phase,$i,$proc,$nt,$mc,$um,$sleep,$sub;
$phase = $a[0];
if($phase==1) { $sub = $a[1] }
elsif($phase==2) { ($proc,$cmd,$nt,$mc,$um) = @a[1..5] }
elsif($phase==3) { $sleep = $a[1] }
if($phase==1) # Setup phase
{
# set up child signal handler
$SIG{'CHLD'} = \&$sub;
$|++;
%fhlist;
%fhlist2;
%fhlist3;
}
elsif($phase==2) # Spawn the jobs phase
{
# Create an anonymous file handle
print JUNKD "/usr/bin/rsh $proc $cmd\n";
$pid = fork();
if($pid < 0 or not defined $pid)
{
print LOG "$#-> Can't fork! Bad kernel!";
close LOG;
die "$#-> Can't fork! Bad kernel!";
}
elsif($pid == 0)
{
# child process
# system("/usr/bin/rsh $proc $cmd");
# I'm commmenting out the above line, since not everyone
# has 24 remote nodes to run on.
# system("$cmd");
exec("$cmd");
exit(0);
}
else
{
# Parent process, toss child file handle into the hash and move
+on with
# our lives.
$fhlist{"$pid"} = $nt;
$fhlist2{"$pid"} = $mc;
$fhlist3{"$pid"} = $um;
}
}
elsif($phase==3) # Wait till the children are done phase
{
while(1)
{
@kl = keys(%fhlist);
if($#kl >= 0)
{
# mo' to do...
sleep($sleep);
}
else
{
last;
}
}
}
}
### END: Spwan children jobs on slave nodes ##
sub handle_child
{
# This gets called when a child dies... maybe more than one
# died at the same time, so it's best to do this in a loop
my $temp, $mcopy, $umbr, $nbias, $nmat;
while(($dead_kid = waitpid(-1, WNOHANG)) > 0)
{
$temp = $fhlist{"$dead_kid"}; # get the file descriptor back
$mcopy = $fhlist2{"$dead_kid"};
$umbr = $fhlist3{"$dead_kid"};
delete($fhlist{"$dead_kid"});
delete($fhlist2{"$dead_kid"});
delete($fhlist3{"$dead_kid"});
}
}
I supplied a simple command (date), but in my full code I am issuing commands to run other programs on the remote node.
Thanks for everyone's input!
|