There is a smallish (8 node) computing cluster where I work, and
there has recently been some contention over sharing CPU time.
Three seperate groups "bought into" the cluster in differing
ammounts (5 shares for one group, 2 shares for a second, and
one share for a third). Each group thus "owns" that many nodes,
and should get first priority on those they own. However, if
they are not running anything on their node(s), the spare
CPU cycles are fair game for the other groups. Thus was born
the Autonicer.
Currently, It requires BSD-style 'ps' for collecting data, and does
take into account how long the processes has
been running (although, ps does to some extent).
#!/usr/bin/perl -wT
delete $ENV{PATH};
use strict;
# Script to auto-renice jobs based on who is running them, and where.
# This is based on unix group membership, so make sure that
# /etc/groups is correct.
#
# "Hogs" are processes that are using lots of CPU, don't
# belong to root and are not already niced.
# Who "owns" each node. The key should be the value returned
# by 'hostname', and the value should be a valid unix group.
my %nodes = (
'node0' => 'groupA',
'node1' => 'groupA',
'node2' => 'groupA',
'node3' => 'groupB',
'node4' => 'groupC',
'node5' => 'groupA',
'node6' => 'groupA',
'node7' => 'groupA'
);
#location and arguments to ps
my $ps = "/bin/ps --noheader -eo user,pid,pcpu";
# processes taking more than this %cpu are considered
my $hog_level = 25;
#This is the new nice level that procs are set to.
my $nice_level = 19;
my $node=`/bin/uname -n`;
chomp $node;
my (%cpu_hogs, @procs);
######################################################################
+#########
sub get_hogs { # find and return a pid=>user hash of process hogs
my $level=shift || 75;
my @procs=@_;
my (%hogs,$pid,$user,$name,$cpu,$priority,$time);
foreach (@procs) {
($user,$pid,$cpu) = split (/\s+/, $_);
#skip root-owned processes
next if $user eq "root";
#print STDERR "[$_] : [$pid] - [$user] - [$cpu]\n";
if ($cpu >= $level) {
$hogs{$pid}=$user;
}
}
return %hogs;
}
sub get_procs {
# One proc per line...
return split(/\n/, `$ps | sed 's/^ *//'`);
}
sub user_valid_on_this_node {
my $user=shift;
my $node=shift;
$user =~ /^([\w]+)/;
$user=$1;
my @output=split(/\s+/,`/usr/bin/groups $user | sed 's/^[^:]*://'`
+);
my $temp=$nodes{$node};
#print STDERR "checking $user on [$temp]($node) against ", join("_
+", @output),"\n";
if (grep (/$temp/, @output) ) {
return 1;
} else {
return 0;
}
}
#nice down the processes if they aren't nice already
sub make_nice {
my %hogs=@_;
my ($pid, $user, $prio);
while (($pid,$user)=each(%hogs)) {
$prio=getpriority(0,$pid);
print STDERR "checking $user [$pid], prio=$prio\n";
if ($prio < $nice_level && !user_valid_on_this_node($user,$nod
+e) ) {
setpriority(0,$pid,$nice_level);
print STDERR "Reniced [$pid] to ", getpriority(0,$pid),"\n
+";
}
}
}
######################################################################
#get a process list
@procs=get_procs();
# Find the hogs
%cpu_hogs=get_hogs($hog_level, @procs);
# nice the poeple who should be niced
make_nice(%cpu_hogs);
exit 0;
Edited:
~Thu Jul 25 21:08:54 2002 (GMT)
by footpad: Added <READMORE> tag