comment on

Hi, I am pretty new to perl, so please be understanding. Below is a perl subroutine, that I inherited, that is used to run a set of "X" tests simultaneously. This subroutine is called by a script that I will call runtasks.pl that takes a file containing a list of tests and runs them. Periodically I am finding that for some reason or another 1 or more of the tests we run do not complete. Each test has a timeout that will terminate the test if exceeds the time limit.

When I use system monitor to see what is going on, none of my tests are running anymore, but I see 2 or more instances of runtasks.pl running. Every instance of runtasks.pl, except 1, that are running, have a pid that relates to the test or tests that have not completed. I am assuming that pid of the process that I cannot match up is the pid of the terminal window where I originally ran runtasks.pl from.

Can somebody help me find what is wrong? I have done a lot of reading on waitpid and on the other commands used in the runall subroutine but I have not found any smoking gun.

Thanks. Mark

sub runAll {
    my $self = shift;
    my $doneThreadsLock : shared = 1;
    my $testQueueLock : shared = 1;

    my @workers;

    for (1..$self->{'maxtasks'}) {
        push @workers, threads::async {

            $SIG{'INT'} =
                sub {
                    my $tid = threads->tid();
                    # Tell user we've been terminated
                    if (exists $self->{'runningTasks'}->{$tid}) {
                        my $worker_task = $self->{'runningTasks'}->{$t
+id};
                        my $exec_name = getExecName(join(" ",@{$worker
+_task->{'command'}}));
                        my $pid = $worker_task->{'pid'};
                        kill(-12, $pid);
#                        print "Worker $tid >> Waiting$exec_name pid: 
+$pid\n";
                        waitpid($pid, 0);
#                        print "Worker $tid >> Done $exec_name pid: $p
+id\n";
                        $worker_task->{'endTime'} = time();
                    }
                    {
                        lock $doneThreadsLock;
                        $self->{'doneThreads'}->enqueue($tid);
                    }
                    print "INT: Worker $tid exiting\n";
                    threads->exit();
                };

            my $tid = threads->tid();
            my $rc;
            my $continueProcessing = 1;
            my $worker_task;
            while ($continueProcessing) {
                { 
                    lock $testQueueLock;
                    $worker_task = $self->{'testQueue'}->dequeue();
                }
                if(!$worker_task) {
                    $continueProcessing = 0;
                    next;
                } 
                my $exec_name = getExecName(join(" ",@{$worker_task->{
+'command'}}));

                chdir ($worker_task->{'workdir'});
                print "Running: $exec_name\n";
                $worker_task->{'startTime'} = time();
                $self->{'runningTasks'}->{$tid} = $worker_task;
                my $cmd = join(" ", @{$worker_task->{'command'}});

                my $pid = fork;
                # if we are child process, pid will be 0 otherwise we 
+are the master
                if ($pid == 0) {
#                    print "pid = 0, $exec_name running as child proce
+ss.\n";
                    my $logfile = FileHandle->new;
                    $logfile->open("> $worker_task->{'log'}");
                    $logfile->autoflush(1);
                    open(STDOUT, '>&=' . $logfile->fileno);

                    open(STDERR, '>&=' . $logfile->fileno);

                    select STDERR; $| = 1; # make unbuffered
                    select STDOUT; $| = 1; # make unbuffered
                    $logfile->close;

#                    print "Execing child process $exec_name pid($pid)
+.\n";
                    {
                        lock $doneThreadsLock;
                        $self->{'doneThreads'}->enqueue($tid);
                    }
                    setpgrp;
                    exec("$cmd");
                    exit(1);
                }
                $worker_task->{'pid'} = $pid;

                my $child_status;
#                print "Waiting for $exec_name $pid to exit\n";
                while (waitpid($pid, POSIX::WNOHANG) != -1) {
                    $child_status = $?;
                    sleep(1);
                }
#                print "Done waiting for $exec_name $pid to exit\n";
                $worker_task->{'endTime'} = time();
                $worker_task->{'status'} = $child_status;
#                print "Done waiting for $exec_name $pid to exit statu
+s ($child_status)\n";
            }
            {
                lock $doneThreadsLock;
                $self->{'doneThreads'}->enqueue($tid);
            }
            threads->exit();
        };
    }
    print "all tasks queued, Now waiting for tasks to exit before queu
+ing more.\n";
    {
        lock $testQueueLock;
        $self->{'testQueue'}->enqueue(undef) for @workers;
    }
    my $threads_exited = 0;
    my $stop_done = 0;
    while ($threads_exited < $self->{'maxtasks'}) {
        my $tid;
        {
            lock $doneThreadsLock;
            $tid = $self->{'doneThreads'}->dequeue_nb();
        }
        if (defined($tid)) {
            threads->object($tid)->join();
            $threads_exited++;
            print "threads_exited($threads_exited) maxtasks:($self->{'
+maxtasks'}).\n";
        } else {
            if ($self->{stop} && !$stop_done) {
                $stop_done = 1;
                foreach my $thr (threads->list()) {
                    $thr->kill('INT');
                }
            } else {
               sleep(1);
            }
        }
    }
}
[download]

In reply to Problem with monitoring running tasks by cariddiEMC

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.