Newer
Older
# A script to report on ZFS backups
use warnings;
use strict;
use Hobbit;
use Sys::Hostname;
use Data::Dumper;
use DBI;

Dr Adam Thorn
committed
use WWW::Curl::Easy;
use Format::Human::Bytes;
my $CONFFILE="/etc/default/zfs-backup";
# Read the config file
my $fh;
my $l;
open ($fh,$CONFFILE) || die ("Error : $!\n");
while ($l=<$fh>) {
chomp($l);
$l =~ s/#.*//;
next if ($l eq '');
while ($l =~ /\\$/) {
$l =~ s/\\$//;
$l.=<$fh>;
chomp($l);
}
if ($l =~ /(.*)=(.*)/) {
$ENV{$1}=$2;
}
my $this_server = hostname;
$ENV{'PGPASSFILE'}=$ENV{'PGPASSFILE'}.'-hobbit';
$ENV{'PATH'}=$ENV{'PATH'}.':/sbin/';
# Get the list of relevant hosts from the database
my $dbh=DBI->connect("dbi:Pg:",,);
#print "Connected\n";
my $sql="select distinct hostname from hobbit_hosts";
my $sth=$dbh->prepare($sql);
$sth->execute() || die "Unable to run $sql: ".$dbh->errstr;
#print "Executed\n";
# Loop over these hosts reporting their backup state
my $all=$sth->fetchall_arrayref();
my $sqlb="select * from hobbit_view where hostname=?";
my $sti=$dbh->prepare($sqlb);
my $confdir='/etc/chem-zfs-backup-server/zfs-rsync.d';
my $sqlc = "select directory_source from zfs_rsync_detail where backup_task_id=?";
my $stj=$dbh->prepare($sqlc);
my $backuptask_count_query = "select count(backup_task_id) from backup_task natural join host where hostname=?";
my $backtask_count=$dbh->prepare($backuptask_count_query);
# if a task has never completed, it will be missing from hobbit_view. We identify this via a full join
# on the view and look for a null field.
my $missing_tasks_query = "select backup_task_id, backup_task.backup_task_name from backup_task natural join host full join hobbit_view using (backup_task_id) where host.hostname=? and hobbit_view.backup_task_name is null";
my $missing_tasks = $dbh->prepare($missing_tasks_query);
foreach my $hr (@{$all}) {
my $hostname=$hr->[0];
print "Considering $hostname\n";
my $bb=new Hobbit ({test=>'backup',hostname=>$hostname,ttl=>'60m'});
$bb->print("Backup status on $this_server\n\n");
# Although we could call $sti->rows after executing the query, the docs advise that
# one should either select count(*) or count yourself as you fetch all the rows when
# SELECTing. So we'll do the latter.
# http://search.cpan.org/dist/DBI/DBI.pm#rows
my $rows_for_this_host = 0;
$backtask_count->execute($hostname);
# The database view we query for logged results is deliberately written to only report
# the most recent successful backup. Thus, we later compare the rows we have to the
# number of configured backup tasks - if they are not equal, a task has never completed
# successfully..
my $required_rows_for_this_host = $backtask_count->fetchrow_hashref();
$required_rows_for_this_host = $required_rows_for_this_host->{'count'};
$rows_for_this_host += 1;
my $zfs_target = $row->{'zfs_target'};
if ($row->{'disabled'}) {
$bb->color_line('green',$row->{'backup_task_name'}." is disabled\n");
} else {
my $backup_task_name = $row->{'backup_task_name'};
$bb->print("Backup target: $zfs_target\n");
$stj->execute($row->{'backup_task_id'});
my $detail = $stj->fetchrow_arrayref();
my $filename = $hostname . '_' . @{$detail}[0];
$filename =~ s/\//./g;
my $conffile = $confdir . '/' . $filename;
if (-f $conffile) {
$bb->color_line('green', "$conffile exists\n");
} else {
$bb->color_line('red', "$conffile is missing\n");
}
my $thisquota = &thisquota($zfs_target);
my $parentquota = &parentquota($zfs_target);
if($thisquota ne 0) {
my $thisquota_friendly = Format::Human::Bytes::base2($thisquota, 2);
$bb->color_line('green', "Quota of $thisquota_friendly is set\n");
} elsif($parentquota ne 'none') {
my $parentquota_friendly = Format::Human::Bytes::base2($parentquota, 2);
$bb->color_line('green', "Parent has quota $parentquota_friendly set\n");
} else {
$bb->color_line('red', "Neither " . $zfs_target . " or its parent has a quota set\n");
}
my $avail = &thisproperty($zfs_target, 'avail');
my $used = &thisproperty($zfs_target,'used');
my $space_needed = $avail + $used;
if($thisquota gt $space_needed) {
$bb->color_line('red', "Quota ($thisquota) exceeds available+used ($space_needed) space\n");
}

Dr Adam Thorn
committed
my $backup_age;
if(defined($row->{'age'})) {
$backup_age = $row->{'age'};

Dr Adam Thorn
committed
$backup_age = 9999999999;
}
my $dboard_msg = 'xymondboard host=^' . $hostname . '$ test=^conn$ fields=lastchange,color';
my $response = sendToXymon($dboard_msg);
chomp $response;
my ($last_pingable, $conn_color) = split(/\|/, $response);
# to cover the case where e.g. $hostname has been rescinded
if(!$conn_color) { $conn_color = 'clear'; }
if(!$last_pingable) { $last_pingable = 0; }
my $time_since_online = 0;
if($conn_color ne 'green') {

Dr Adam Thorn
committed
my $url = "http://hobbit.ch.cam.ac.uk/pg-xymon-cgi/last_pingable.php?HOST=$hostname";
my $curl = WWW::Curl::Easy->new;
$curl->setopt(CURLOPT_CONNECTTIMEOUT, 10);
$curl->setopt(CURLOPT_TIMEOUT, 10);
my $response_body;
$curl->setopt(CURLOPT_WRITEDATA,\$response_body);
$curl->setopt(CURLOPT_URL, $url);
my $curl_response = $curl->perform;
my $response_code = $curl->getinfo(CURLINFO_HTTP_CODE);
if($response_code == 200 ) {
$last_pingable = $response_body;
}

Dr Adam Thorn
committed
$time_since_online = time() - $last_pingable;
}
# this is either:
# a) the time since last successful backup, for machines with a green conn test

Dr Adam Thorn
committed
# or, b) the time between the last successful backup and when the conn test was last green

Dr Adam Thorn
committed
my $timediff = $backup_age - $time_since_online;
my $backup_msg = '';
my $msg_online_host = "%s is online and was last backed up %f seconds ago.";
my $msg_offline_host = "%s last seen online %f seconds ago, and was last backed up %f seconds before that.";
if($conn_color eq 'green') {
$backup_msg = sprintf($msg_online_host, $backup_task_name, $timediff);
} else {
$backup_msg = sprintf($msg_offline_host, $backup_task_name, $time_since_online, $timediff);
}

Dr Adam Thorn
committed
if(($conn_color ne 'green') && $time_since_online > 3 * 30 * 86400) {
$bb->color_line('green', "offline for ~3 months\n");

Dr Adam Thorn
committed
} elsif ($timediff < $row->{'green_soft'}) {

Dr Adam Thorn
committed
$backup_msg .= " [<".$row->{'green_soft'}."] seconds ago.\n";
$bb->color_line('green', $backup_msg);
} elsif ($timediff>$row->{'green_soft'} && $timediff<$row->{'yellow_hard'}) {
$backup_msg .= " [<".$row->{'green_soft'}." and ".$row->{'yellow_hard'}."] seconds ago.\n";
$bb->color_line('yellow', $backup_msg);
} else {
$bb->color_line('red', $backup_msg);
$bb->print("Should be completed before $row->{'yellow_soft'} and $row->{'yellow_hard'}.\n");
}

Dr Adam Thorn
committed
# Check for each line; we'll probably need to rewrite the hobbit parser for this
my $used_frac=&space($zfs_target);
if ($used_frac > $row->{'space_red'}) {
$bb->color_line('red',"$zfs_target SpaceUsed: ".(100*$used_frac)."% [> ".(100*$row->{'space_red'})."% ]\n");
} elsif ($used_frac > $row->{'space_yellow'}) {
$bb->color_line('yellow',"$zfs_target SpaceUsed: ".(100*$used_frac)."% [> ".(100*$row->{'space_yellow'})."% ]\n");
$bb->color_line('green',"$zfs_target SpaceUsed: ".(100*$used_frac)."% [≤ ".(100*$row->{'space_yellow'})."% ]\n");
}
if ($row->{'prune_count'}==0) {
$bb->color_line('red','No pruning configured\n');
}
if (defined($row->{'exec'})) {
$bb->print("$zfs_target runtime: $row->{'exec'}\n");
my $count=&snapshots($zfs_target);
$bb->print("Snapshots: $count\n\n");
if($rows_for_this_host != $required_rows_for_this_host) {
my $msg = "$rows_for_this_host results logged for host but $required_rows_for_this_host backup tasks are configured. Check backup_log table: maybe there has not yet been a succesful backup?\n";
$bb->color_line('red', $msg);
$missing_tasks->execute($hostname);
while(my $missing_task = $missing_tasks->fetchrow_hashref) {
$bb->color_line('red', "missing backup for task id " . $missing_task->{'backup_task_id'} . " (" . $missing_task->{'backup_task_name'} . ")\n");
}
}
exit;
sub snapshots($) {
my $zfs=shift;
my $fh;
open($fh,"sudo zfs list -H -r -t snapshot $zfs | wc -l |");
my $count=<$fh>;
chomp($count);
close ($fh);
#print "Counted $count snapshots for $zfs\n";
return $count;
my $zfs=shift;
my $fh;
open($fh,"sudo zfs get -p -H used $zfs |");
my $line=<$fh>;
close ($fh);
if ($line) {
my @a=split(/\s+/,$line);
my $used=$a[2];
open ($fh,"sudo zfs get -p -H available $zfs | ");
$line=<$fh>;
close ($fh);
@a=split(/\s+/,$line);
my $available=$a[2];
if ($used+$available >0) {
return $used/($used+$available);
} else {
return 1;
}
}
sub thisproperty($$) {
my $zfs=shift;
my $property=shift;
my $quota=`sudo zfs get $property -Hp -ovalue $zfs`;
chomp $quota;
return $quota;
}
sub thisquota($) {
my $zfs=shift;
return thisproperty($zfs, 'quota');
}
sub parentquota($) {
my $zfs = shift;
my $parent = join('/', (split('/', $zfs))[0..1]);
chomp $parent;
return thisquota($parent);
}
sub sendToXymon {
use IO::Socket;
my $msg = shift;
my $response;
my $sock = new IO::Socket::INET (
PeerAddr => '131.111.112.27',
PeerPort => 1984,
Proto => 'tcp',
);
die "Could not create socket: $!\n" unless $sock;
print $sock $msg;
shutdown($sock, 1);
my $r = "";
while ($response=<$sock>) {
$r .= $response;
}
close($sock);
return $r;
}