#!/usr/bin/perl # A script to report on ZFS backups use warnings; use strict; use Hobbit; use Sys::Hostname; use Data::Dumper; use DBI; use WWW::Curl::Easy; use Format::Human::Bytes; my $CONFFILE="/etc/default/zfs-backup"; # Read the config file my $fh; my $l; open ($fh,$CONFFILE) || die ("Error : $!\n"); while ($l=<$fh>) { chomp($l); $l =~ s/#.*//; next if ($l eq ''); while ($l =~ /\\$/) { $l =~ s/\\$//; $l.=<$fh>; chomp($l); } if ($l =~ /(.*)=(.*)/) { $ENV{$1}=$2; } } my $this_server = hostname; $ENV{'PGPASSFILE'}=$ENV{'PGPASSFILE'}.'-hobbit'; $ENV{'PATH'}=$ENV{'PATH'}.':/sbin/'; # Get the list of relevant hosts from the database my $dbh=DBI->connect("dbi:Pg:",,); #print "Connected\n"; my $sql="select distinct hostname from hobbit_hosts"; my $sth=$dbh->prepare($sql); $sth->execute() || die "Unable to run $sql: ".$dbh->errstr; #print "Executed\n"; # Loop over these hosts reporting their backup state my $all=$sth->fetchall_arrayref(); my $sqlb="select * from hobbit_view where hostname=?"; my $sti=$dbh->prepare($sqlb); my $confdir='/etc/chem-zfs-backup-server/zfs-rsync.d'; my $sqlc = "select directory_source from zfs_rsync_detail where backup_task_id=?"; my $stj=$dbh->prepare($sqlc); my $backuptask_count_query = "select count(backup_task_id) from backup_task natural join host where hostname=?"; my $backtask_count=$dbh->prepare($backuptask_count_query); # if a task has never completed, it will be missing from hobbit_view. We identify this via a full join # on the view and look for a null field. my $missing_tasks_query = "select backup_task_id, backup_task.backup_task_name from backup_task natural join host full join hobbit_view using (backup_task_id) where host.hostname=? and hobbit_view.backup_task_name is null"; my $missing_tasks = $dbh->prepare($missing_tasks_query); foreach my $hr (@{$all}) { my $hostname=$hr->[0]; print "Considering $hostname\n"; my $bb=new Hobbit ({test=>'backup',hostname=>$hostname,ttl=>'60m'}); $bb->print("Backup status on $this_server\n\n"); $sti->execute($hostname); # Although we could call $sti->rows after executing the query, the docs advise that # one should either select count(*) or count yourself as you fetch all the rows when # SELECTing. So we'll do the latter. # http://search.cpan.org/dist/DBI/DBI.pm#rows my $rows_for_this_host = 0; $backtask_count->execute($hostname); # The database view we query for logged results is deliberately written to only report # the most recent successful backup. Thus, we later compare the rows we have to the # number of configured backup tasks - if they are not equal, a task has never completed # successfully.. my $required_rows_for_this_host = $backtask_count->fetchrow_hashref(); $required_rows_for_this_host = $required_rows_for_this_host->{'count'}; while (my $row=$sti->fetchrow_hashref) { $rows_for_this_host += 1; my $zfs_target = $row->{'zfs_target'}; if ($row->{'disabled'}) { $bb->color_line('green',$row->{'backup_task_name'}." is disabled\n"); } else { my $backup_task_name = $row->{'backup_task_name'}; $bb->print("Backup target: $zfs_target\n"); $stj->execute($row->{'backup_task_id'}); my $detail = $stj->fetchrow_arrayref(); my $filename = $hostname . '_' . @{$detail}[0]; $filename =~ s/\//./g; my $conffile = $confdir . '/' . $filename; if (-f $conffile) { $bb->color_line('green', "$conffile exists\n"); } else { $bb->color_line('red', "$conffile is missing\n"); } my $thisquota = &thisquota($zfs_target); my $parentquota = &parentquota($zfs_target); if($thisquota ne 0) { my $thisquota_friendly = Format::Human::Bytes::base2($thisquota, 2); $bb->color_line('green', "Quota of $thisquota_friendly is set\n"); } elsif($parentquota ne 'none') { my $parentquota_friendly = Format::Human::Bytes::base2($parentquota, 2); $bb->color_line('green', "Parent has quota $parentquota_friendly set\n"); } else { $bb->color_line('red', "Neither " . $zfs_target . " or its parent has a quota set\n"); } my $avail = &thisproperty($zfs_target, 'avail'); my $used = &thisproperty($zfs_target,'used'); my $space_needed = $avail + $used; if($thisquota gt $space_needed) { $bb->color_line('red', "Quota ($thisquota) exceeds available+used ($space_needed) space\n"); } my $backup_age; if(defined($row->{'age'})) { $backup_age = $row->{'age'}; } else { $backup_age = 9999999999; } my $dboard_msg = 'xymondboard host=^' . $hostname . '$ test=^conn$ fields=lastchange,color'; my $response = sendToXymon($dboard_msg); chomp $response; my ($last_pingable, $conn_color) = split(/\|/, $response); # to cover the case where e.g. $hostname has been rescinded if(!$conn_color) { $conn_color = 'clear'; } if(!$last_pingable) { $last_pingable = 0; } my $time_since_online = 0; if($conn_color ne 'green') { my $url = "http://hobbit.ch.cam.ac.uk/pg-xymon-cgi/last_pingable.php?HOST=$hostname"; my $curl = WWW::Curl::Easy->new; $curl->setopt(CURLOPT_CONNECTTIMEOUT, 10); $curl->setopt(CURLOPT_TIMEOUT, 10); my $response_body; $curl->setopt(CURLOPT_WRITEDATA,\$response_body); $curl->setopt(CURLOPT_URL, $url); my $curl_response = $curl->perform; my $response_code = $curl->getinfo(CURLINFO_HTTP_CODE); if($response_code == 200 ) { $last_pingable = $response_body; } $time_since_online = time() - $last_pingable; } # this is either: # a) the time since last successful backup, for machines with a green conn test # or, b) the time between the last successful backup and when the conn test was last green my $timediff = $backup_age - $time_since_online; my $backup_msg = ''; my $msg_online_host = "%s is online and was last backed up %f seconds ago."; my $msg_offline_host = "%s last seen online %f seconds ago, and was last backed up %f seconds before that."; if($conn_color eq 'green') { $backup_msg = sprintf($msg_online_host, $backup_task_name, $timediff); } else { $backup_msg = sprintf($msg_offline_host, $backup_task_name, $time_since_online, $timediff); } if(($conn_color ne 'green') && $time_since_online > 3 * 30 * 86400) { $bb->color_line('green', "offline for ~3 months\n"); } elsif ($timediff < $row->{'green_soft'}) { $backup_msg .= " [<".$row->{'green_soft'}."] seconds ago.\n"; $bb->color_line('green', $backup_msg); } elsif ($timediff>$row->{'green_soft'} && $timediff<$row->{'yellow_hard'}) { $backup_msg .= " [<".$row->{'green_soft'}." and ".$row->{'yellow_hard'}."] seconds ago.\n"; $bb->color_line('yellow', $backup_msg); } else { $bb->color_line('red', $backup_msg); $bb->print("Should be completed before $row->{'yellow_soft'} and $row->{'yellow_hard'}.\n"); } # Check for each line; we'll probably need to rewrite the hobbit parser for this my $used_frac=&space($zfs_target); if ($used_frac > $row->{'space_red'}) { $bb->color_line('red',"$zfs_target SpaceUsed: ".(100*$used_frac)."% [> ".(100*$row->{'space_red'})."% ]\n"); } elsif ($used_frac > $row->{'space_yellow'}) { $bb->color_line('yellow',"$zfs_target SpaceUsed: ".(100*$used_frac)."% [> ".(100*$row->{'space_yellow'})."% ]\n"); } else { $bb->color_line('green',"$zfs_target SpaceUsed: ".(100*$used_frac)."% [≤ ".(100*$row->{'space_yellow'})."% ]\n"); } if ($row->{'prune_count'}==0) { $bb->color_line('red','No pruning configured\n'); } if (defined($row->{'exec'})) { $bb->print("$zfs_target runtime: $row->{'exec'}\n"); } } my $count=&snapshots($zfs_target); $bb->print("Snapshots: $count\n\n"); } if($rows_for_this_host != $required_rows_for_this_host) { my $msg = "$rows_for_this_host results logged for host but $required_rows_for_this_host backup tasks are configured. Check backup_log table: maybe there has not yet been a succesful backup?\n"; $bb->color_line('red', $msg); $missing_tasks->execute($hostname); while(my $missing_task = $missing_tasks->fetchrow_hashref) { $bb->color_line('red', "missing backup for task id " . $missing_task->{'backup_task_id'} . " (" . $missing_task->{'backup_task_name'} . ")\n"); } } $bb->send; } exit; sub snapshots($) { my $zfs=shift; my $fh; open($fh,"sudo zfs list -H -r -t snapshot $zfs | wc -l |"); my $count=<$fh>; chomp($count); close ($fh); #print "Counted $count snapshots for $zfs\n"; return $count; } sub space($) { my $zfs=shift; my $fh; open($fh,"sudo zfs get -p -H used $zfs |"); my $line=<$fh>; close ($fh); if ($line) { my @a=split(/\s+/,$line); my $used=$a[2]; open ($fh,"sudo zfs get -p -H available $zfs | "); $line=<$fh>; close ($fh); @a=split(/\s+/,$line); my $available=$a[2]; if ($used+$available >0) { return $used/($used+$available); } else { return 1; } } } sub thisproperty($$) { my $zfs=shift; my $property=shift; my $quota=`sudo zfs get $property -Hp -ovalue $zfs`; chomp $quota; return $quota; } sub thisquota($) { my $zfs=shift; return thisproperty($zfs, 'quota'); } sub parentquota($) { my $zfs = shift; my $parent = join('/', (split('/', $zfs))[0..1]); chomp $parent; return thisquota($parent); } sub sendToXymon { use IO::Socket; my $msg = shift; my $response; my $sock = new IO::Socket::INET ( PeerAddr => '131.111.112.27', PeerPort => 1984, Proto => 'tcp', ); die "Could not create socket: $!\n" unless $sock; print $sock $msg; shutdown($sock, 1); my $r = ""; while ($response=<$sock>) { $r .= $response; } close($sock); return $r; }