#!/bin/perl use strict; use warnings; # Known problems: # -Issues for runs started around the new year # -Broken runs started at the same time as long # good runs will continue as long as the good run # if the 'ps' output changes order, than may cancel the wrong process # Get the currently in-process run times - grep is to specify only actual run lines my $rundate = `/opt/lsdyna/License/lstc_qrun | grep @ | cut -c 57-68`; #cleandate subroutine is at the bottom of the file &cleandate($rundate); #splitting the dates into an array - allows easier 'foreach' loops my @rundates = split(/\s+/,"$rundate"); my @nodes = ("node2", "node3", "node4", "node5", "node6", "node7", "node8", "node9", "node10"); #this main, big loop allows each node to be checked foreach my $nodenum (@nodes) { print "Testing $nodenum: "; #pull the mpp processes from the node, then take and clean process ids my $procid = `ssh $nodenum ps -eo pid,lstart,cmd | grep mpp | cut -c 1-5`; $procid =~ s/ //g; my @procids = split(/\s+/,"$procid"); # print "@procids\n"; #These two are purely for output, make sure that it's running my $procnum = scalar(@procids); print "$procnum processes found.\n"; #pull the mpp processes from the node, then take and clean process dates my $procdate = `ssh $nodenum ps -eo pid,lstart,cmd | grep mpp | cut -c 11-22`; &cleandate($procdate); my @procdates = split(/\s+/,"$procdate"); #This is the comparison loop; each of the dates in @procdates is compared to #each of the dates in @rundates. If all of the comparisons are more than an #hour apart (tested through $counter), then the process is killed. $procloop #is just an index, so that the corresponding part of $procid is killed. my $procloop = 0; my $counter = 0; foreach my $pd (@procdates) { $counter = 0; foreach my $rd (@rundates) { my $timedif = abs($pd-$rd); if ($timedif >= 60) { $counter++ } } if ($counter == scalar(@rundates)) { print "$procids[$procloop] is outside expected time range.\n" ; my $cmd = "ssh $nodenum kill -9 $procids[$procloop]"; #print is to see if anything is killed, and make sure command syntax is correct print "$cmd\n"; system ($cmd); } $procloop++; } } sub cleandate { #This takes the Mon DD HR:MN format and makes it into a numeric value for comparison #first line adds leading 0 to single-digit dates $_[0] =~ s/\s(\d\s)/0$1/g; #change month to a numeric value $_[0] =~ s/Jan/01/g; $_[0] =~ s/Feb/02/g; $_[0] =~ s/Mar/03/g; $_[0] =~ s/Apr/04/g; $_[0] =~ s/May/05/g; $_[0] =~ s/Jun/06/g; $_[0] =~ s/Jul/07/g; $_[0] =~ s/Aug/08/g; $_[0] =~ s/Sep/09/g; $_[0] =~ s/Oct/10/g; $_[0] =~ s/Nov/11/g; $_[0] =~ s/Dec/12/g; #remove colons $_[0] =~ s/://g; #remove spaces $_[0] =~ s/ //g; #page breaks from grepping are left, to allow splitting above. }