mirror of
https://github.com/librenms/librenms-agent.git
synced 2024-05-09 09:54:52 +00:00
373 lines
7.8 KiB
Perl
Executable File
373 lines
7.8 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
|
|
=head1 NAME
|
|
|
|
check_drbd - Nagios plugin for DRBD
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
B<check_drbd> [B<--verbose> | B<-v>]
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
B<check_drbd> is a Nagios plugin for DRBD. It checks the connection state,
|
|
resource roles and disk states for every configured DRBD resource, and
|
|
produces a WARNING or CRITICAL alert if anything is amiss. The states
|
|
of both the local and remote sides of each connection are monitored.
|
|
|
|
=head2 Nagios status information
|
|
|
|
The status information emitted by this plugin is similar to the information
|
|
in F</proc/drbd>:
|
|
|
|
drbd0: Connected Primary/Secondary UpToDate/UpToDate
|
|
| | | | | |
|
|
| | | | | Remote disk state
|
|
| | | | Local disk state
|
|
| | | Remote resource role
|
|
| | Local resource role
|
|
| Connection state
|
|
DRBD device
|
|
|
|
If more than one device is present, and all devices are OK, the output is
|
|
summarised:
|
|
|
|
drbd0: PriConUpT, drbd1: SecConUpT
|
|
|
|
If any devices are not OK, the output contains their statuses in full.
|
|
|
|
=head2 Nagios performance data
|
|
|
|
Complete performance data is emitted for all configured DRBD resources:
|
|
|
|
=over
|
|
|
|
=item drbdI<*>_ns
|
|
|
|
=item drbdI<*>_nr
|
|
|
|
The volume of network data sent to and received from the peer, in kiB.
|
|
|
|
=item drbdI<*>_dw
|
|
|
|
=item drbdI<*>_dr
|
|
|
|
The volume of network data written to and read from the local disk, in kiB.
|
|
|
|
=item drbdI<*>_al
|
|
|
|
The number of updates of the activity log area of the metadata.
|
|
|
|
=item drbdI<*>_lo
|
|
|
|
The number of open requests to the local I/O subsystem issued by DRBD.
|
|
|
|
=item drbdI<*>_pe
|
|
|
|
The number of requests sent to the peer but not yet been answered by the latter.
|
|
|
|
=item drbdI<*>_ua
|
|
|
|
The number of requests received by the peer but not yet been answered by the latter.
|
|
|
|
=item drbdI<*>_ap
|
|
|
|
The number of block I/O requests forwarded by DRBD, but not yet answered by DRBD.
|
|
|
|
=item drbdI<*>_ep
|
|
|
|
The number of epoch objects.
|
|
|
|
=item drbdI<*>_oos
|
|
|
|
The amount of storage currently out-of-sync, in kiB.
|
|
|
|
=back
|
|
|
|
=head1 OPTIONS
|
|
|
|
=over
|
|
|
|
=item B<-v>, B<--verbose>
|
|
|
|
Increase the verbosity of the output messages. This disables the Nagios status
|
|
information summarisation described above: all resources' statuses are printed
|
|
in full.
|
|
|
|
=back
|
|
|
|
=head1 EXIT STATUS
|
|
|
|
=over
|
|
|
|
=item 0
|
|
|
|
All resources are OK.
|
|
|
|
=item 1
|
|
|
|
Some resources are not OK, but do not need immediate attention.
|
|
|
|
=item 2
|
|
|
|
Some resources are not OK and need immediate attention.
|
|
|
|
=item 3
|
|
|
|
An error occurred while collecting the resources' statuses.
|
|
|
|
=back
|
|
|
|
=head1 FILES
|
|
|
|
F</proc/drbd>
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<The DRBD Home Page|http://www.drbd.org/>
|
|
|
|
=cut
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use constant BASENAME => ($0 =~ m{.*/([^/]+)})[0] || 'check_drbd';
|
|
|
|
use constant STATE_FILE => '/proc/drbd';
|
|
|
|
use constant {
|
|
OK => 0,
|
|
WARNING => 1,
|
|
CRITICAL => 2,
|
|
UNKNOWN => 3,
|
|
};
|
|
|
|
use Getopt::Long;
|
|
use IO::File;
|
|
|
|
sub help;
|
|
sub usage;
|
|
|
|
sub perfdata;
|
|
sub ok;
|
|
sub warning;
|
|
sub critical;
|
|
sub unknown;
|
|
|
|
sub get_state;
|
|
|
|
$SIG{__DIE__} = sub {
|
|
die @_ if $^S;
|
|
print @_;
|
|
exit UNKNOWN;
|
|
};
|
|
|
|
my $verbose;
|
|
|
|
Getopt::Long::Configure('bundling', 'no_ignore_case');
|
|
GetOptions(
|
|
'verbose|v+' => \$verbose,
|
|
'help|?' => sub { help; exit 0 },
|
|
'usage' => sub { usage; exit 0 },
|
|
) and @ARGV == 0
|
|
or do { usage; exit UNKNOWN };
|
|
|
|
my @state = get_state;
|
|
my $status = OK;
|
|
|
|
print "<<<drbd>>>\n";
|
|
|
|
foreach my $id (0 .. $#state) {
|
|
my $device = $state[$id]
|
|
or next;
|
|
|
|
# Assume CRITICAL by default
|
|
|
|
foreach (qw( cs )) {
|
|
$device->{"${_}_level"} = {
|
|
Connected => OK,
|
|
Unconfigured => OK,
|
|
StandAlone => WARNING,
|
|
SyncingAll => WARNING,
|
|
SyncingQuick => WARNING,
|
|
SyncSource => WARNING,
|
|
SyncTarget => WARNING,
|
|
VerifyS => WARNING,
|
|
VerifyT => WARNING,
|
|
Disconnecting => WARNING,
|
|
TearDown => WARNING,
|
|
StartingSyncS => WARNING,
|
|
StartingSyncT => WARNING,
|
|
WFSyncUUID => WARNING,
|
|
}->{$device->{$_}};
|
|
$device->{"${_}_level"} = CRITICAL unless defined $device->{"${_}_level"};
|
|
|
|
if ($device->{oos}) {
|
|
$device->{oos_level} = {
|
|
StartingSyncS => OK,
|
|
StartingSyncT => OK,
|
|
SyncSource => OK,
|
|
SyncTarget => OK,
|
|
PausedSyncS => OK,
|
|
PausedSyncT => OK,
|
|
}->{$device->{$_}};
|
|
$device->{oos_level} = CRITICAL unless defined $device->{oos_level};
|
|
}
|
|
}
|
|
|
|
foreach (qw( ro pro )) {
|
|
$device->{"${_}_level"} = {
|
|
Primary => OK,
|
|
Secondary => OK,
|
|
}->{$device->{$_}};
|
|
$device->{"${_}_level"} = CRITICAL unless defined $device->{"${_}_level"};
|
|
}
|
|
|
|
foreach (qw( ds pds )) {
|
|
$device->{"${_}_level"} = {
|
|
UpToDate => OK,
|
|
Consistent => OK,
|
|
Negotiating => WARNING,
|
|
Attaching => WARNING,
|
|
}->{$device->{$_}};
|
|
$device->{"${_}_level"} = CRITICAL unless defined $device->{"${_}_level"};
|
|
}
|
|
|
|
my @extra;
|
|
if ($device->{oos}) {
|
|
push @extra, sprintf '%d kiB out-of-sync', $device->{oos};
|
|
}
|
|
if ($device->{iof} !~ /^r.--(.(-)?)?$/) {
|
|
$device->{iof_level} = CRITICAL;
|
|
push @extra, sprintf 'I/O flags: %s', $device->{iof};
|
|
}
|
|
my $extra = @extra ? sprintf(' (%s)', join ', ', @extra) : '';
|
|
|
|
my $level = OK;
|
|
foreach (grep /_level$/, keys %$device) {
|
|
$level = $device->{$_} if $level < $device->{$_};
|
|
}
|
|
$status = $level if $status < $level;
|
|
|
|
$device->{level} = $level;
|
|
$device->{info} = sprintf 'drbd%d:cs=%s|ro=%s|pro=%s|ds=%s|pds=%s|extra=%s', $id, $device->{cs}, $device->{ro}, $device->{pro}, $device->{ds}, $device->{pds}, $extra;
|
|
$device->{short} = sprintf 'drbd%d: %0.3s%0.3s%0.3s%s', $id, $device->{ro}, $device->{cs}, $device->{ds}, $extra; # Role and connstate reversed, like old check_drbd
|
|
|
|
foreach (qw( ns nr dw dr al bm )) {
|
|
my $value = $device->{$_};
|
|
defined $value
|
|
or next;
|
|
perfdata "${_}=${value}";
|
|
}
|
|
|
|
foreach (qw( lo pe ua ap oos )) {
|
|
my $value = $device->{$_};
|
|
defined $value
|
|
or next;
|
|
perfdata "${_}=${value}";
|
|
}
|
|
}
|
|
|
|
@state
|
|
or critical 'No DRBD volumes present';
|
|
|
|
if ($status) {
|
|
my $message = join ', ', map $_->{info}, grep { defined and $_->{level} } @state;
|
|
if ($status == WARNING) {
|
|
warning $message;
|
|
} else {
|
|
critical $message;
|
|
}
|
|
} else {
|
|
my $message = join ', ', map { ($verbose || @state == 1) ? $_->{info} : $_->{short} } grep defined, @state;
|
|
ok $message;
|
|
}
|
|
|
|
die;
|
|
|
|
###########################################################################
|
|
|
|
sub help {
|
|
print <<EOF;
|
|
Usage: @{[BASENAME]} [OPTION...]
|
|
Check DRBD resources.
|
|
|
|
Plugin options:
|
|
-v, --verbose Increase verbosity
|
|
|
|
Help options:
|
|
-?, --help Give this help list
|
|
--usage Give a short usage message
|
|
EOF
|
|
}
|
|
|
|
sub usage {
|
|
print <<EOF;
|
|
Usage: @{[BASENAME]} [-v?] [--verbose] [--help] [--usage]
|
|
EOF
|
|
}
|
|
|
|
###########################################################################
|
|
|
|
{
|
|
my @perfdata;
|
|
|
|
sub perfdata { push @perfdata, @_ }
|
|
|
|
sub _exit {
|
|
my ($status, $message) = @_;
|
|
|
|
if (defined $message) {
|
|
print $message;
|
|
} else {
|
|
print qw( OK WARNING CRITICAL )[$status] || 'UNKNOWN';
|
|
}
|
|
if (my $perfdata = shift @perfdata) {
|
|
print "|$perfdata";
|
|
}
|
|
# print "\n";
|
|
if (@perfdata) {
|
|
print '|';
|
|
print map "$_|", @perfdata;
|
|
}
|
|
print "\n";
|
|
exit $status;
|
|
}
|
|
}
|
|
|
|
sub ok { _exit OK, @_ }
|
|
sub warning { _exit WARNING, @_ }
|
|
sub critical { _exit CRITICAL, @_ }
|
|
sub unknown { _exit UNKNOWN, @_ }
|
|
|
|
###########################################################################
|
|
|
|
sub get_state {
|
|
my $io = new IO::File(STATE_FILE)
|
|
or critical "Could not open @{[STATE_FILE]} for reading: $!";
|
|
|
|
# 0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r----
|
|
# ns:0 nr:20492 dw:20480 dr:124 al:5 bm:1296 lo:0 pe:0 ua:0 ap:0 ep:1 wo:d oos:0
|
|
|
|
my @state;
|
|
my $device;
|
|
while (<$io>) {
|
|
if (m(^ \s* (\d+): \s* cs:(\w+) \s+ (?:ro|st):(\w+)/(\w+) \s+ ds:(\w+)/(\w+) \s+ \S+ \s+ (\S+))x) {
|
|
$device = $state[$1] = {
|
|
cs => $2,
|
|
ro => $3,
|
|
pro => $4,
|
|
ds => $5,
|
|
pds => $6,
|
|
iof => $7,
|
|
};
|
|
next;
|
|
};
|
|
|
|
$device or next;
|
|
$device->{$1} = $2 while /(\w+):(\S+)/g;
|
|
}
|
|
|
|
@state;
|
|
}
|