FAQ | This is a LIVE service | Changelog

Skip to content
Snippets Groups Projects
zfs-backups 10.8 KiB
Newer Older

# A script to report on ZFS backups

use warnings;
use strict;
use Hobbit;
use Data::Dumper;
use DBI;

my $CONFFILE="/etc/default/zfs-backup";

# Read the config file
my $fh;
my $l;
open ($fh,$CONFFILE) || die ("Error : $!\n");
while ($l=<$fh>) {
Dr Adam Thorn's avatar
Dr Adam Thorn committed
    chomp($l);
    $l =~ s/#.*//;
    next if ($l eq '');
    while ($l =~ /\\$/) {
        $l =~ s/\\$//;
        $l.=<$fh>;
        chomp($l);
    }
    if ($l =~ /(.*)=(.*)/) {
        $ENV{$1}=$2;
    }
$ENV{'PGPASSFILE'}=$ENV{'PGPASSFILE'}.'-hobbit';
$ENV{'PATH'}=$ENV{'PATH'}.':/sbin/';

# Get the list of relevant hosts from the database
my $dbh=DBI->connect("dbi:Pg:",,);
#print "Connected\n";
my $sql="select distinct hostname from hobbit_hosts";
my $sth=$dbh->prepare($sql);
$sth->execute() || die "Unable to run $sql: ".$dbh->errstr;
#print "Executed\n";

# Loop over these hosts reporting their backup state
my $all=$sth->fetchall_arrayref();

my $sqlb="select * from hobbit_view where hostname=?";
my $sti=$dbh->prepare($sqlb);

my $confdir='/etc/chem-zfs-backup-server/zfs-rsync.d';
my $sqlc = "select directory_source from zfs_rsync_detail where backup_task_id=?";
my $stj=$dbh->prepare($sqlc);

my $backuptask_count_query = "select count(backup_task_id) from backup_task natural join host where hostname=?";
my $backtask_count=$dbh->prepare($backuptask_count_query);

# if a task has never completed, it will be missing from hobbit_view. We identify this via a full join
# on the view and look for a null field.
my $missing_tasks_query = "select backup_task_id, backup_task.backup_task_name from backup_task natural join host full join hobbit_view using (backup_task_id) where host.hostname=? and hobbit_view.backup_task_name is null";
my $missing_tasks = $dbh->prepare($missing_tasks_query);

foreach my $hr (@{$all}) {
Dr Adam Thorn's avatar
Dr Adam Thorn committed
    my $hostname=$hr->[0];
    print "Considering $hostname\n";
    my $bb=new Hobbit ({test=>'backup',hostname=>$hostname,ttl=>'60m'});
    $bb->print("Backup status on $this_server\n\n");
Dr Adam Thorn's avatar
Dr Adam Thorn committed

    $sti->execute($hostname);

    # Although we could call $sti->rows after executing the query, the docs advise that
    # one should either select count(*) or count yourself as you fetch all the rows when
    # SELECTing. So we'll do the latter.
    # http://search.cpan.org/dist/DBI/DBI.pm#rows
    my $rows_for_this_host = 0;
    $backtask_count->execute($hostname);

    # The database view we query for logged results is deliberately written to only report
    # the most recent successful backup. Thus, we later compare the rows we have to the
    # number of configured backup tasks - if they are not equal, a task has never completed
    # successfully..
    my $required_rows_for_this_host = $backtask_count->fetchrow_hashref();
    $required_rows_for_this_host = $required_rows_for_this_host->{'count'};
Dr Adam Thorn's avatar
Dr Adam Thorn committed
    while (my $row=$sti->fetchrow_hashref) {
        $rows_for_this_host += 1;
        my $zfs_target = $row->{'zfs_target'};

Dr Adam Thorn's avatar
Dr Adam Thorn committed
        if ($row->{'disabled'}) {
            $bb->color_line('green',$row->{'backup_task_name'}." is disabled\n");
        } else {

            my $backup_task_name = $row->{'backup_task_name'};
            $bb->print("Backup target: $zfs_target\n");

Dr Adam Thorn's avatar
Dr Adam Thorn committed
            $stj->execute($row->{'backup_task_id'});
            my $detail = $stj->fetchrow_arrayref();
            my $filename = $hostname . '_' . @{$detail}[0];
            $filename =~ s/\//./g;
            my $conffile = $confdir . '/' . $filename;
            if (-f $conffile) {
                $bb->color_line('green', "$conffile exists\n");
            } else {
                $bb->color_line('red', "$conffile is missing\n");
            }

            my $thisquota = &thisquota($zfs_target);
            my $parentquota = &parentquota($zfs_target);
            if($thisquota ne 0) {
                    my $thisquota_friendly = Format::Human::Bytes::base2($thisquota, 2);
                    $bb->color_line('green', "Quota of $thisquota_friendly is set\n");
            } elsif($parentquota ne 'none') {
                    my $parentquota_friendly = Format::Human::Bytes::base2($parentquota, 2);
                    $bb->color_line('green', "Parent has quota $parentquota_friendly set\n");
            } else {
                    $bb->color_line('red', "Neither " . $zfs_target . " or its parent has a quota set\n");
            }
            my $avail = &thisproperty($zfs_target, 'avail');
            my $used = &thisproperty($zfs_target,'used');
            my $space_needed = $avail + $used;

            if($thisquota gt $space_needed) {
                $bb->color_line('red', "Quota ($thisquota) exceeds available+used ($space_needed) space\n");
            }

            my $backup_age;
            if(defined($row->{'age'})) {
                $backup_age = $row->{'age'};
Dr Adam Thorn's avatar
Dr Adam Thorn committed
            } else {
                $backup_age = 9999999999;
            }

            my $dboard_msg = 'xymondboard host=^' . $hostname . '$ test=^conn$ fields=lastchange,color';
            my $response = sendToXymon($dboard_msg);
            chomp $response;
            my ($last_pingable, $conn_color) = split(/\|/, $response);

            # to cover the case where e.g. $hostname has been rescinded
            if(!$conn_color) { $conn_color = 'clear'; }
            if(!$last_pingable) { $last_pingable = 0; }

            my $time_since_online = 0;

            if($conn_color ne 'green') {
                my $url = "http://hobbit.ch.cam.ac.uk/pg-xymon-cgi/last_pingable.php?HOST=$hostname";
                my $curl = WWW::Curl::Easy->new;
                $curl->setopt(CURLOPT_CONNECTTIMEOUT, 10);
                $curl->setopt(CURLOPT_TIMEOUT, 10);

                my $response_body;
                $curl->setopt(CURLOPT_WRITEDATA,\$response_body);

                $curl->setopt(CURLOPT_URL, $url);
                my $curl_response = $curl->perform;
                my $response_code = $curl->getinfo(CURLINFO_HTTP_CODE);

                if($response_code == 200 ) {
                    $last_pingable = $response_body;
                }

                $time_since_online = time() - $last_pingable;
            }

            # this is either:
            #     a) the time since last successful backup, for machines with a green conn test
            # or, b) the time between the last successful backup and when the conn test was last green
            my $timediff = $backup_age - $time_since_online;
            my $backup_msg = '';

            my $msg_online_host = "%s is online and was last backed up %f seconds ago.";
            my $msg_offline_host = "%s last seen online %f seconds ago, and was last backed up %f seconds before that.";

            if($conn_color eq 'green') {
                $backup_msg = sprintf($msg_online_host, $backup_task_name, $timediff);
            } else {
                $backup_msg = sprintf($msg_offline_host, $backup_task_name, $time_since_online, $timediff);
            }

            if(($conn_color ne 'green') && $time_since_online > 3 * 30 * 86400) {
              $bb->color_line('green', "offline for ~3 months\n");
                $backup_msg .= " [&lt;".$row->{'green_soft'}."] seconds ago.\n";
                $bb->color_line('green', $backup_msg);
            } elsif ($timediff>$row->{'green_soft'} && $timediff<$row->{'yellow_hard'}) {
                $backup_msg .= " [&lt;".$row->{'green_soft'}." and ".$row->{'yellow_hard'}."] seconds ago.\n";
                $bb->color_line('yellow', $backup_msg);
            } else {
                $bb->color_line('red', $backup_msg);
Dr Adam Thorn's avatar
Dr Adam Thorn committed
                $bb->print("Should be completed before $row->{'yellow_soft'} and $row->{'yellow_hard'}.\n");
            }
Dr Adam Thorn's avatar
Dr Adam Thorn committed
            # Check for each line; we'll probably need to rewrite the hobbit parser for this
            my $used_frac=&space($zfs_target);
            if ($used_frac > $row->{'space_red'}) {
                $bb->color_line('red',"$zfs_target SpaceUsed: ".(100*$used_frac)."% [&gt; ".(100*$row->{'space_red'})."% ]\n");
            } elsif ($used_frac > $row->{'space_yellow'}) {
                $bb->color_line('yellow',"$zfs_target SpaceUsed: ".(100*$used_frac)."% [&gt; ".(100*$row->{'space_yellow'})."% ]\n");
Dr Adam Thorn's avatar
Dr Adam Thorn committed
            } else {
                $bb->color_line('green',"$zfs_target SpaceUsed: ".(100*$used_frac)."% [&le; ".(100*$row->{'space_yellow'})."% ]\n");
Dr Adam Thorn's avatar
Dr Adam Thorn committed
            }
            if ($row->{'prune_count'}==0) {
                $bb->color_line('red','No pruning configured\n');
            }
            if (defined($row->{'exec'})) {
                $bb->print("$zfs_target runtime: $row->{'exec'}\n");
        my $count=&snapshots($zfs_target);
        $bb->print("Snapshots: $count\n\n");
    if($rows_for_this_host != $required_rows_for_this_host) {
      my $msg = "$rows_for_this_host results logged for host but $required_rows_for_this_host backup tasks are configured. Check backup_log table: maybe there has not yet been a succesful backup?\n";
      $bb->color_line('red', $msg);

      $missing_tasks->execute($hostname);
      while(my $missing_task = $missing_tasks->fetchrow_hashref) {
         $bb->color_line('red', "missing backup for task id " . $missing_task->{'backup_task_id'} . " (" . $missing_task->{'backup_task_name'} . ")\n");
      }
Dr Adam Thorn's avatar
Dr Adam Thorn committed
    $bb->send;
Dr Adam Thorn's avatar
Dr Adam Thorn committed
    my $zfs=shift;
    my $fh;
    open($fh,"sudo zfs list -H -r -t snapshot $zfs | wc -l |");
    my $count=<$fh>;
    chomp($count);
    close ($fh);
    #print "Counted $count snapshots for $zfs\n";
    return $count;
Dr Adam Thorn's avatar
Dr Adam Thorn committed
    my $zfs=shift;
    my $fh;
    open($fh,"sudo zfs get -p -H used $zfs |");
    my $line=<$fh>;
    close ($fh);
    if ($line) {
        my @a=split(/\s+/,$line);
        my $used=$a[2];
        open ($fh,"sudo zfs get -p -H available $zfs | ");
        $line=<$fh>;
        close ($fh);
        @a=split(/\s+/,$line);
        my $available=$a[2];
        if ($used+$available >0) {
            return $used/($used+$available);
        } else { 
            return 1;
        }
    }
    my $property=shift;
    my $quota=`sudo zfs get $property -Hp -ovalue $zfs`;
sub thisquota($) {
    my $zfs=shift;
    return thisproperty($zfs, 'quota');
}

sub parentquota($) {
    my $zfs = shift;
    my $parent = join('/', (split('/', $zfs))[0..1]);
    chomp $parent;
    return thisquota($parent);
}

sub sendToXymon {
    use IO::Socket;
    my $msg = shift;
    my $response;
    my $sock = new IO::Socket::INET (
            PeerAddr => '131.111.112.27',
            PeerPort => 1984,
            Proto => 'tcp',
            );
    die "Could not create socket: $!\n" unless $sock;
    print $sock $msg;
    shutdown($sock, 1);
    my $r = "";
    while ($response=<$sock>) {
        $r .= $response;
    }
    close($sock);
    return $r;
}