Failboat is an in-house tool we've written to keep track of how we're doing with our backups. It generates a nice HTML-formatted email showing us which systems are having ongoing problems, which systems aren't being covered, and provides us with reasons for why the jobs themselves are failing.
Failboat takes two arguments, a NetBackup server to query against, and an email address to dump the results to. The box which runs Failboat must be designated as a valid peer in NetBackup, else the query commands that Failboat relies upon won't work. In other words, this script should work when run on any NetBackup master server, media server, or client with authorization to run admin commands, but nowhere else.
(By the way, the first few lines of code have an HTML reference to a server which houses a nice logo that gets displayed at the top of the email. Feel free to change this to:
http://home.comcast.net/~bpoag/failboat-logo3.jpg
Code:
#!/usr/bin/perl
##
## Failboat 0.1 written 020910 by Bowie J. Poag
##
## Failboat tracks which NetBackup jobs have failed every night, and w
+arns if a particular client
## had had problems for more than 2 days running.
##
## Failboat is a Negative Nancy. It has nothing good to say about anyt
+hing, or anybody. Infact,
## it attempts to point out lapses in work ethic on the part of the ba
+ckup administrator. But,
## that's it's job.
##
## Being so negative all the time has really taken a toll on Failboat.
+ He's chronically depressed.
## I found out a week or so ago that Failboat has been seeing a psychi
+atrist for his problems..
## It doesn't help much that Failboat is going through a nasty divorce
+ and custody battle on
## top of it all. Talk about stress.. That bitch put him through as ba
+nkruptcy a few years ago,
## and now he can't get a loan to fix up the house in order to sell it
+ before she gets her hands
## on it once the divorce is finalized. It's just ridiculous. Poor Fai
+lboat. Don't even get me
## started on the kids.. His alimony is going to be through the roof.
##
## Regardless, attitude is everything in this business, and if you hav
+e a crappy attitude, it i
## just makes the work harder. I gotta give 'ol Failboat some credit f
+or trying, tho. As if having
## to work with NetBackup wasn't soul-crushing enough.....
##
use Date::Manip;
use Mail::Sendmail;
$DEBUG=0;
$now=&ParseDate("today");
$mailRecipients=$ARGV[1];
$masterServer=$ARGV[0];
spinUp();
collectJobs();
parseJobs();
spinDown();
sub spinUp()
{
push(@console, "<html><body bgcolor=#000000><font size=2 face=
+\"verdana\" color=#31bbad><img src=\"http://delphi/failboat-logo3.jpg
+\" align=left><br /><br/><br /><br><br>");
push(@console, "<br>Report generated ".`date`."<br><br>");
print "\nFailboat: Spinning up..\nFailboat:\n";
-e ("/usr/openv/netbackup/bin/admincmd/bpdbjobs") or die "Fail
+boat: Cannot find bpdbjobs binary. Nothing to do.\n\n";
}
sub collectJobs()
{
print "Failboat: Collecting data from NetBackup master server
+$masterServer..\n";
push(@console,"Failboat: Collecting data from NetBackup master
+ server $masterServer..<br>");
@tempJobsTable=`/usr/openv/netbackup/bin/admincmd/bpdbjobs -M
+$masterServer | grep -v Catalog`;
}
sub parseJobs()
{
print "Failboat:\nFailboat: The following is a list of clients
+ that are currently experiencing problems with their backups:\nFailbo
+at:\n";
push(@console, "Failboat:<br>Failboat: The following is a list
+ of clients that are currently experiencing problems with their backu
+ps:<br>Failboat:<br>");
foreach $item (@tempJobsTable)
{
$item=~s/\s+/ /g;
@thisJob=split(" ",$item);
if ($thisJob[3]>1 && $thisJob[0]>1)
{
$DEBUG && print "Failboat: $item\n";
$DEBUG && push(@console,"Failboat: $item<br>")
+;
$failedClients{$thisJob[6]}++;
if ($failureType{$thisJob[6]}=="")
{
$failureType{$thisJob[6]}=$thisJob[3];
}
}
}
$DEBUG && print "\n";
while (($client, $failCode)=each(%failureType))
{
$DEBUG && print "Failboat: Client $client most recentl
+y failed with error code $failCode.\n";
$DEBUG && push(@console, "Failboat: Client $client mos
+t recently failed with error code $failCode.<br>");
}
$DEBUG && print "\n";
while (($client, $failCount)=each(%failedClients))
{
$DEBUG && print "Failboat: Client $client has failed $
+failCount times in recent history.\n";
$DEBUG && push(@console, "Failboat: Client $client has
+ failed $failCount times in recent history.<br>");
}
while (($client, $failCount)=each(%failedClients))
{
if ($failCount>1)
{
$lastValidBackupTime=`/usr/openv/netbackup/bin
+/admincmd/bpcatlist -client $client 2>&1 | grep $client | head -n1 |
+awk '{print $2}'`;
$lastValidBackupTime=~s/\s+/ /g;
@temp=split(" ",$lastValidBackupTime);
$lastValidBackupTime="$temp[1] $temp[2] $temp[
+3] $temp[4]";
$errorExplanation="with an unrecognized error
+code ($failureType{$client})";
if ($failureType{$client}==21) { $errorExplana
+tion="because a socket could not be opened"; }
if ($failureType{$client}==40) { $errorExplana
+tion="because the network connection was broken"; }
if ($failureType{$client}==41) { $errorExplana
+tion="because the network connection timed out"; }
if ($failureType{$client}==50) { $errorExplana
+tion="because the client backup process aborted"; }
if ($failureType{$client}==58) { $errorExplana
+tion="because the client was unresponsive"; }
if ($failureType{$client}==59) { $errorExplana
+tion="because access to the client wasn't allowed"; }
if ($failureType{$client}==63) { $errorExplana
+tion="because the backup process was killed client-side"; }
if ($failureType{$client}==71) { $errorExplana
+tion="because none of the specified files were found"; }
if ($failureType{$client}==84) { $errorExplana
+tion="because there was a write error on the tape"; }
if ($failureType{$client}==90) { $errorExplana
+tion="because media manager didn't recieve any data"; }
if ($failureType{$client}==98) { $errorExplana
+tion="because there was a problem with loading the tape"; }
if ($failureType{$client}==150) { $errorExplan
+ation="because the job was manually cancelled by the backup admin"; }
if ($failureType{$client}==156) { $errorExplan
+ation="because there was a snapshot error on the client"; }
if ($failureType{$client}==196) { $errorExplan
+ation="because the job wasn't able to start on time"; }
if($lastValidBackupTime=~/\d/)
{
$delta=&DateCalc($lastValidBackupTime,
+$now,\$err);
@time=split(":",$delta);
$age=$time[3]+($time[2]*7);
$hoursAgo=$time[4]+($time[5]/60);
$hoursAgo=$time[4]+($time[5]/60);
$hoursAgo=int($hoursAgo+.5);
if ($age>=2)
{
print "Failboat: Client $clien
+t hasn't had a good backup since $lastValidBackupTime, $age days ago.
+\n";
push (@console, "Failboat: Cli
+ent <font color=#81f3ed>$client</font> hasn't had a good backup since
+ $lastValidBackupTime, $age days ago.<br>");
push (@reasons, "Failboat: The
+ last backup attempt on $client failed $errorExplanation.");
}
else
{
print "Failboat: Client $clien
+t has been failing occasionally, but had a successful backup about $h
+oursAgo hours ago.\n";
push (@console, "Failboat: Cli
+ent <font color=#81f3ed>$client</font> has been failing occasionally,
+ but had a successful backup about $hoursAgo hours ago.<br>");
push (@reasons, "Failboat: The
+ last backup attempt on $client failed $errorExplanation.");
}
}
else
{
print "Failboat: Client $client doesn'
+t have any valid backup images whatsoever. This is bad.\n";
push (@console, "Failboat: Client <fon
+t color=#81f3ed>$client</font> doesn't have any valid backup images w
+hatsoever. This is bad.<br>");
push (@reasons, "Failboat: The last ba
+ckup attempt on $client failed $errorExplanation.");
}
}
}
print "Failboat: \n";
push(@console,"Failboat: <br>");
push(@console,"Failboat: Reasons for the failures:<br>");
push(@console,"Failboat: <br>");
foreach $item (@reasons)
{
print "$item\n";
$item=~s/attempt on /attempt on <font color=#81f3ed>/;
$item=~s/failed/<\/font> failed/;
$item=$item."<br>";
push (@console, $item);
}
}
sub spinDown()
{
chomp($dateStamp=`date`);
$subjectLine="Failboat report for $dateStamp";
print "Failboat:\nFailboat: Scan completed at $dateStamp. Spin
+ning down..\n";
push(@console,"Failboat:<br>Failboat: Scan completed at $dateS
+tamp. Spinning down..<br>");
push(@console, "<br>End of report.<br><br><br><br><br><font co
+lor=#0d2522 size=1>Failboat v1.04 written 022410:1133 by Bowie J. Poa
+g </font></body></html>");
print "Failboat: Sending report to $mailRecipients..\n\n";
$mail{'SMTP'} = 'mail.tmcaz.com';
$mail{'FROM'} = 'Failboat <sysmon@foobar.com>';
$mail{'TO'} = $mailRecipients;
$mail{'SUBJECT'} = $subjectLine;
$mail{'CONTENT-TYPE'} = 'text/html; charset="us-ascii"';
$mail{'MESSAGE'} = join("",@console);
(sendmail %mail) || print "Send failed: $Mail::Sendmail::error
+<br>";
}