#!/os/dist/bin/perl -w use Solaris::Procfs qw/:procfiles/; # Get a ps -auxw w/o shelling use Net::SNPP; # Messages to pagers use Sys::Syslog; # All other messages use Data::Dumper; my $killing; my $DEBUG = 1; #--- # There is a very nasty race condition I am trying to avoid. It could be # possible that, between the call to get_procfs and the kill more processes # could be spawned. We need to make sure we kill them all. Thus, the # killing flag is set and we will kill until there are no more to be killed. # # The loop has three basic portions: # 1. If the killing flag isn't set and the number of running ypserv's # is between 1 and 8, we sleep and loop. # 2. If there are no ypserv's running, we fork a child and exec a # ypstart in the child. When the child is reaped, we return to the # top of the loop # 3. Otherwise, we kill everything named ypserv. I sort on the PPID, # hoping to kill the one spawning everybody else first. #--- while ( 1 ) { my %procs = (); get_procs(\%procs); my $procnum = keys %procs; #--- # Nothing wrong, go back to sleep #--- unless ( $killing || $procnum > 8 ) { LogIt( 'Nothing to kill', 'info' ); sleep 180; next; } #--- # If no ypservs are found, then we likely killed them all and it is time # to ( hopefully ) fork a new ypserv process. #--- unless ( $procnum ) { #--- # We will do the fork and exec by hand. That way we can flag errors # and attempt to be smart. #--- my ($kid,$dead,$looped); if ( $DEBUG ) { LogIt( 'Would have forked ypstart', 'debug' ); sleep 300; next; } if ( $kid = fork ) { do { select(undef,undef,undef,0.5); $looped++; $dead = waitpid(-1,1); } until $dead == -1 || $looped > 20; if ( $looped > 20 ) { LogIt( "Could not reap ypstart", 'err' ); next; } $killing = 0; } elsif ( defined( $kid ) ) { exec "/usr/lib/netsvc/yp/ypstart"; } else { LogIt("Couldn't fork: $!",'err'); } } #--- # Start with the cement shoes and overcoats. I try to get the ( I hope ) # ring leader first - which will have, I assume, a PPID of 1. #--- if ( $DEBUG ) { LogIt( "$procnum ypservs running, would have waxed $total", 'debug' ); sleep 300; next; } $killing = 1; $total = kill 9, sort { $procs{$a}{PPID} <=> $procs{$b}{PPID} } keys %procs; LogIt( "$procnum ypservs running, $total waxed", 'warning' ); } #--- # All the functions for Procfs seem to rely upon the pid. The only way I can # think of getting this information is to grope /proc and get the information # for each directory I find. #--- sub get_procs { my $ref = shift; print localtime, "opening procfs\n"; opendir PIDS, "/proc" or die "Coudln't open /proc: $!"; while ( $pid = readdir PIDS ) { next if ( $pid =~ /^\.\.?\z/ ); my $info = psinfo $pid; unless ( defined( $info ) && ref $info eq 'HASH' ) { warn "Couldn't grope pid $pid: $!\n"; next; } #--- # Do nothing unless this is a ypserv #--- next unless ( $info->{pr_fname} eq 'ypserv' ); $ref->{PID} = $pid; $ref->{PPID} = $info->{pp_ppid}; $info = undef; } closedir PIDS; } #--- # Stupid logging function - sends a page for anything more important than info # and syslogs them all #--- sub LogIt { my ($message, $level ) = @_; my $paged = 0; $level ||= 'daemon'; if ( $level eq 'debug' ) { my $snpp = Net::SNPP->new('snpphost'); $paged = $snpp->send( Pager => 'mik', Message => $message, ); return 1; } unless ( $level eq 'info' ) { my $snpp = Net::SNPP->new('snpphost'); $paged = $snpp->send( Pager => 'sysadm', Message => $message, ); } openlog('Sacco', 'pid', 'daemon'); syslog( $level, "$message\n" ); syslog( 'err', "Paging failed\n" ) unless ( $level eq 'info' || $paged ); closelog(); }