#!/usr/bin/perl =head1 NAME check_drbd - Nagios plugin for DRBD =head1 SYNOPSIS B [B<--verbose> | B<-v>] =head1 DESCRIPTION B is a Nagios plugin for DRBD. It checks the connection state, resource roles and disk states for every configured DRBD resource, and produces a WARNING or CRITICAL alert if anything is amiss. The states of both the local and remote sides of each connection are monitored. =head2 Nagios status information The status information emitted by this plugin is similar to the information in F: drbd0: Connected Primary/Secondary UpToDate/UpToDate | | | | | | | | | | | Remote disk state | | | | Local disk state | | | Remote resource role | | Local resource role | Connection state DRBD device If more than one device is present, and all devices are OK, the output is summarised: drbd0: PriConUpT, drbd1: SecConUpT If any devices are not OK, the output contains their statuses in full. =head2 Nagios performance data Complete performance data is emitted for all configured DRBD resources: =over =item drbdI<*>_ns =item drbdI<*>_nr The volume of network data sent to and received from the peer, in kiB. =item drbdI<*>_dw =item drbdI<*>_dr The volume of network data written to and read from the local disk, in kiB. =item drbdI<*>_al The number of updates of the activity log area of the metadata. =item drbdI<*>_lo The number of open requests to the local I/O subsystem issued by DRBD. =item drbdI<*>_pe The number of requests sent to the peer but not yet been answered by the latter. =item drbdI<*>_ua The number of requests received by the peer but not yet been answered by the latter. =item drbdI<*>_ap The number of block I/O requests forwarded by DRBD, but not yet answered by DRBD. =item drbdI<*>_ep The number of epoch objects. =item drbdI<*>_oos The amount of storage currently out-of-sync, in kiB. =back =head1 OPTIONS =over =item B<-v>, B<--verbose> Increase the verbosity of the output messages. This disables the Nagios status information summarisation described above: all resources' statuses are printed in full. =back =head1 EXIT STATUS =over =item 0 All resources are OK. =item 1 Some resources are not OK, but do not need immediate attention. =item 2 Some resources are not OK and need immediate attention. =item 3 An error occurred while collecting the resources' statuses. =back =head1 FILES F =head1 SEE ALSO L =cut use strict; use warnings; use constant BASENAME => ($0 =~ m{.*/([^/]+)})[0] || 'check_drbd'; use constant STATE_FILE => '/proc/drbd'; use constant { OK => 0, WARNING => 1, CRITICAL => 2, UNKNOWN => 3, }; use Getopt::Long; use IO::File; sub help; sub usage; sub perfdata; sub ok; sub warning; sub critical; sub unknown; sub get_state; $SIG{__DIE__} = sub { die @_ if $^S; print @_; exit UNKNOWN; }; my $verbose; Getopt::Long::Configure('bundling', 'no_ignore_case'); GetOptions( 'verbose|v+' => \$verbose, 'help|?' => sub { help; exit 0 }, 'usage' => sub { usage; exit 0 }, ) and @ARGV == 0 or do { usage; exit UNKNOWN }; my @state = get_state; my $status = OK; print "<<>>\n"; foreach my $id (0 .. $#state) { my $device = $state[$id] or next; # Assume CRITICAL by default foreach (qw( cs )) { $device->{"${_}_level"} = { Connected => OK, Unconfigured => OK, StandAlone => WARNING, SyncingAll => WARNING, SyncingQuick => WARNING, SyncSource => WARNING, SyncTarget => WARNING, VerifyS => WARNING, VerifyT => WARNING, Disconnecting => WARNING, TearDown => WARNING, StartingSyncS => WARNING, StartingSyncT => WARNING, WFSyncUUID => WARNING, }->{$device->{$_}}; $device->{"${_}_level"} = CRITICAL unless defined $device->{"${_}_level"}; if ($device->{oos}) { $device->{oos_level} = { StartingSyncS => OK, StartingSyncT => OK, SyncSource => OK, SyncTarget => OK, PausedSyncS => OK, PausedSyncT => OK, }->{$device->{$_}}; $device->{oos_level} = CRITICAL unless defined $device->{oos_level}; } } foreach (qw( ro pro )) { $device->{"${_}_level"} = { Primary => OK, Secondary => OK, }->{$device->{$_}}; $device->{"${_}_level"} = CRITICAL unless defined $device->{"${_}_level"}; } foreach (qw( ds pds )) { $device->{"${_}_level"} = { UpToDate => OK, Consistent => OK, Negotiating => WARNING, Attaching => WARNING, }->{$device->{$_}}; $device->{"${_}_level"} = CRITICAL unless defined $device->{"${_}_level"}; } my @extra; if ($device->{oos}) { push @extra, sprintf '%d kiB out-of-sync', $device->{oos}; } if ($device->{iof} !~ /^r.--(.(-)?)?$/) { $device->{iof_level} = CRITICAL; push @extra, sprintf 'I/O flags: %s', $device->{iof}; } my $extra = @extra ? sprintf(' (%s)', join ', ', @extra) : ''; my $level = OK; foreach (grep /_level$/, keys %$device) { $level = $device->{$_} if $level < $device->{$_}; } $status = $level if $status < $level; $device->{level} = $level; $device->{info} = sprintf 'drbd%d:cs=%s|ro=%s|pro=%s|ds=%s|pds=%s|extra=%s', $id, $device->{cs}, $device->{ro}, $device->{pro}, $device->{ds}, $device->{pds}, $extra; $device->{short} = sprintf 'drbd%d: %0.3s%0.3s%0.3s%s', $id, $device->{ro}, $device->{cs}, $device->{ds}, $extra; # Role and connstate reversed, like old check_drbd foreach (qw( ns nr dw dr al bm )) { my $value = $device->{$_}; defined $value or next; perfdata "${_}=${value}"; } foreach (qw( lo pe ua ap oos )) { my $value = $device->{$_}; defined $value or next; perfdata "${_}=${value}"; } } @state or critical 'No DRBD volumes present'; if ($status) { my $message = join ', ', map $_->{info}, grep { defined and $_->{level} } @state; if ($status == WARNING) { warning $message; } else { critical $message; } } else { my $message = join ', ', map { ($verbose || @state == 1) ? $_->{info} : $_->{short} } grep defined, @state; ok $message; } die; ########################################################################### sub help { print <) { if (m(^ \s* (\d+): \s* cs:(\w+) \s+ (?:ro|st):(\w+)/(\w+) \s+ ds:(\w+)/(\w+) \s+ \S+ \s+ (\S+))x) { $device = $state[$1] = { cs => $2, ro => $3, pro => $4, ds => $5, pds => $6, iof => $7, }; next; }; $device or next; $device->{$1} = $2 while /(\w+):(\S+)/g; } @state; }