mirror of
				https://github.com/librenms/librenms.git
				synced 2024-10-07 16:52:45 +00:00 
			
		
		
		
	git-svn-id: http://www.observium.org/svn/observer/trunk@3059 61d68cd4-352d-0410-923a-c4978735b2b8
		
			
				
	
	
		
			373 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			373 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/perl
 | 
						|
 | 
						|
=head1 NAME
 | 
						|
 | 
						|
check_drbd - Nagios plugin for DRBD
 | 
						|
 | 
						|
=head1 SYNOPSIS
 | 
						|
 | 
						|
B<check_drbd> [B<--verbose> | B<-v>]
 | 
						|
 | 
						|
=head1 DESCRIPTION
 | 
						|
 | 
						|
B<check_drbd> is a Nagios plugin for DRBD. It checks the connection state,
 | 
						|
resource roles and disk states for every configured DRBD resource, and
 | 
						|
produces a WARNING or CRITICAL alert if anything is amiss. The states
 | 
						|
of both the local and remote sides of each connection are monitored.
 | 
						|
 | 
						|
=head2 Nagios status information
 | 
						|
 | 
						|
The status information emitted by this plugin is similar to the information
 | 
						|
in F</proc/drbd>:
 | 
						|
 | 
						|
    drbd0: Connected Primary/Secondary UpToDate/UpToDate
 | 
						|
    |      |         |       |         |        |
 | 
						|
    |      |         |       |         |        Remote disk state
 | 
						|
    |      |         |       |         Local disk state
 | 
						|
    |      |         |       Remote resource role
 | 
						|
    |      |         Local resource role
 | 
						|
    |      Connection state
 | 
						|
    DRBD device
 | 
						|
 | 
						|
If more than one device is present, and all devices are OK, the output is
 | 
						|
summarised:
 | 
						|
 | 
						|
    drbd0: PriConUpT, drbd1: SecConUpT
 | 
						|
 | 
						|
If any devices are not OK, the output contains their statuses in full.
 | 
						|
 | 
						|
=head2 Nagios performance data
 | 
						|
 | 
						|
Complete performance data is emitted for all configured DRBD resources:
 | 
						|
 | 
						|
=over
 | 
						|
 | 
						|
=item drbdI<*>_ns
 | 
						|
 | 
						|
=item drbdI<*>_nr
 | 
						|
 | 
						|
The volume of network data sent to and received from the peer, in kiB.
 | 
						|
 | 
						|
=item drbdI<*>_dw
 | 
						|
 | 
						|
=item drbdI<*>_dr
 | 
						|
 | 
						|
The volume of network data written to and read from the local disk, in kiB.
 | 
						|
 | 
						|
=item drbdI<*>_al
 | 
						|
 | 
						|
The number of updates of the activity log area of the metadata.
 | 
						|
 | 
						|
=item drbdI<*>_lo
 | 
						|
 | 
						|
The number of open requests to the local I/O subsystem issued by DRBD.
 | 
						|
 | 
						|
=item drbdI<*>_pe
 | 
						|
 | 
						|
The number of requests sent to the peer but not yet been answered by the latter.
 | 
						|
 | 
						|
=item drbdI<*>_ua
 | 
						|
 | 
						|
The number of requests received by the peer but not yet been answered by the latter.
 | 
						|
 | 
						|
=item drbdI<*>_ap
 | 
						|
 | 
						|
The number of block I/O requests forwarded by DRBD, but not yet answered by DRBD.
 | 
						|
 | 
						|
=item drbdI<*>_ep
 | 
						|
 | 
						|
The number of epoch objects.
 | 
						|
 | 
						|
=item drbdI<*>_oos
 | 
						|
 | 
						|
The amount of storage currently out-of-sync, in kiB.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 OPTIONS
 | 
						|
 | 
						|
=over
 | 
						|
 | 
						|
=item B<-v>, B<--verbose>
 | 
						|
 | 
						|
Increase the verbosity of the output messages. This disables the Nagios status
 | 
						|
information summarisation described above: all resources' statuses are printed
 | 
						|
in full.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 EXIT STATUS
 | 
						|
 | 
						|
=over
 | 
						|
 | 
						|
=item 0
 | 
						|
 | 
						|
All resources are OK.
 | 
						|
 | 
						|
=item 1
 | 
						|
 | 
						|
Some resources are not OK, but do not need immediate attention.
 | 
						|
 | 
						|
=item 2
 | 
						|
 | 
						|
Some resources are not OK and need immediate attention.
 | 
						|
 | 
						|
=item 3
 | 
						|
 | 
						|
An error occurred while collecting the resources' statuses.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 FILES
 | 
						|
 | 
						|
F</proc/drbd>
 | 
						|
 | 
						|
=head1 SEE ALSO
 | 
						|
 | 
						|
L<The DRBD Home Page|http://www.drbd.org/>
 | 
						|
 | 
						|
=cut
 | 
						|
 | 
						|
use strict;
 | 
						|
use warnings;
 | 
						|
 | 
						|
use constant BASENAME => ($0 =~ m{.*/([^/]+)})[0] || 'check_drbd';
 | 
						|
 | 
						|
use constant STATE_FILE => '/proc/drbd';
 | 
						|
 | 
						|
use constant {
 | 
						|
	OK       => 0,
 | 
						|
	WARNING  => 1,
 | 
						|
	CRITICAL => 2,
 | 
						|
	UNKNOWN  => 3,
 | 
						|
};
 | 
						|
 | 
						|
use Getopt::Long;
 | 
						|
use IO::File;
 | 
						|
 | 
						|
sub help;
 | 
						|
sub usage;
 | 
						|
 | 
						|
sub perfdata;
 | 
						|
sub ok;
 | 
						|
sub warning;
 | 
						|
sub critical;
 | 
						|
sub unknown;
 | 
						|
 | 
						|
sub get_state;
 | 
						|
 | 
						|
$SIG{__DIE__} = sub {
 | 
						|
	die @_ if $^S;
 | 
						|
	print @_;
 | 
						|
	exit UNKNOWN;
 | 
						|
};
 | 
						|
 | 
						|
my $verbose;
 | 
						|
 | 
						|
Getopt::Long::Configure('bundling', 'no_ignore_case');
 | 
						|
GetOptions(
 | 
						|
	'verbose|v+' => \$verbose,
 | 
						|
	'help|?'     => sub { help; exit 0 },
 | 
						|
	'usage'      => sub { usage; exit 0 },
 | 
						|
) and @ARGV == 0
 | 
						|
	or do { usage; exit UNKNOWN };
 | 
						|
 | 
						|
my @state = get_state;
 | 
						|
my $status = OK;
 | 
						|
 | 
						|
print "<<<drbd>>>\n";
 | 
						|
 | 
						|
foreach my $id (0 .. $#state) {
 | 
						|
	my $device = $state[$id]
 | 
						|
		or next;
 | 
						|
 | 
						|
	# Assume CRITICAL by default
 | 
						|
 | 
						|
	foreach (qw( cs )) {
 | 
						|
		$device->{"${_}_level"} = {
 | 
						|
			Connected     => OK,
 | 
						|
			Unconfigured  => OK,
 | 
						|
			StandAlone    => WARNING,
 | 
						|
			SyncingAll    => WARNING,
 | 
						|
			SyncingQuick  => WARNING,
 | 
						|
			SyncSource    => WARNING,
 | 
						|
			SyncTarget    => WARNING,
 | 
						|
			VerifyS       => WARNING,
 | 
						|
			VerifyT       => WARNING,
 | 
						|
			Disconnecting => WARNING,
 | 
						|
			TearDown      => WARNING,
 | 
						|
			StartingSyncS => WARNING,
 | 
						|
			StartingSyncT => WARNING,
 | 
						|
			WFSyncUUID    => WARNING,
 | 
						|
		}->{$device->{$_}};
 | 
						|
		$device->{"${_}_level"} = CRITICAL unless defined $device->{"${_}_level"};
 | 
						|
 | 
						|
		if ($device->{oos}) {
 | 
						|
			$device->{oos_level} = {
 | 
						|
				StartingSyncS => OK,
 | 
						|
				StartingSyncT => OK,
 | 
						|
				SyncSource    => OK,
 | 
						|
				SyncTarget    => OK,
 | 
						|
				PausedSyncS   => OK,
 | 
						|
				PausedSyncT   => OK,
 | 
						|
			}->{$device->{$_}};
 | 
						|
			$device->{oos_level} = CRITICAL unless defined $device->{oos_level};
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	foreach (qw( ro pro )) {
 | 
						|
		$device->{"${_}_level"} = {
 | 
						|
			Primary   => OK,
 | 
						|
			Secondary => OK,
 | 
						|
		}->{$device->{$_}};
 | 
						|
		$device->{"${_}_level"} = CRITICAL unless defined $device->{"${_}_level"};
 | 
						|
	}
 | 
						|
 | 
						|
	foreach (qw( ds pds )) {
 | 
						|
		$device->{"${_}_level"} = {
 | 
						|
			UpToDate    => OK,
 | 
						|
			Consistent  => OK,
 | 
						|
			Negotiating => WARNING,
 | 
						|
			Attaching   => WARNING,
 | 
						|
		}->{$device->{$_}};
 | 
						|
		$device->{"${_}_level"} = CRITICAL unless defined $device->{"${_}_level"};
 | 
						|
	}
 | 
						|
 | 
						|
	my @extra;
 | 
						|
	if ($device->{oos}) {
 | 
						|
		push @extra, sprintf '%d kiB out-of-sync', $device->{oos};
 | 
						|
	}
 | 
						|
	if ($device->{iof} !~ /^r.--(.(-)?)?$/) {
 | 
						|
		$device->{iof_level} = CRITICAL;
 | 
						|
		push @extra, sprintf 'I/O flags: %s', $device->{iof};
 | 
						|
	}
 | 
						|
	my $extra = @extra ? sprintf(' (%s)', join ', ', @extra) : '';
 | 
						|
 | 
						|
	my $level = OK;
 | 
						|
	foreach (grep /_level$/, keys %$device) {
 | 
						|
		$level = $device->{$_} if $level < $device->{$_};
 | 
						|
	}
 | 
						|
	$status = $level if $status < $level;
 | 
						|
 | 
						|
	$device->{level} = $level;
 | 
						|
	$device->{info}  = sprintf 'drbd%d:cs=%s|ro=%s|pro=%s|ds=%s|pds=%s|extra=%s',  $id, $device->{cs}, $device->{ro}, $device->{pro}, $device->{ds}, $device->{pds}, $extra;
 | 
						|
	$device->{short} = sprintf 'drbd%d: %0.3s%0.3s%0.3s%s', $id, $device->{ro}, $device->{cs}, $device->{ds}, $extra; # Role and connstate reversed, like old check_drbd
 | 
						|
 | 
						|
	foreach (qw( ns nr dw dr al bm )) {
 | 
						|
		my $value = $device->{$_};
 | 
						|
		defined $value
 | 
						|
			or next;
 | 
						|
		perfdata "${_}=${value}";
 | 
						|
	}
 | 
						|
 | 
						|
	foreach (qw( lo pe ua ap oos )) {
 | 
						|
		my $value = $device->{$_};
 | 
						|
		defined $value
 | 
						|
			or next;
 | 
						|
		perfdata "${_}=${value}";
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
@state
 | 
						|
	or critical 'No DRBD volumes present';
 | 
						|
 | 
						|
if ($status) {
 | 
						|
	my $message = join ', ', map $_->{info}, grep { defined and $_->{level} } @state;
 | 
						|
	if ($status == WARNING) {
 | 
						|
		warning $message;
 | 
						|
	} else {
 | 
						|
		critical $message;
 | 
						|
	}
 | 
						|
} else {
 | 
						|
	my $message = join ', ', map { ($verbose || @state == 1) ? $_->{info} : $_->{short} } grep defined, @state;
 | 
						|
	ok $message;
 | 
						|
}
 | 
						|
 | 
						|
die;
 | 
						|
 | 
						|
###########################################################################
 | 
						|
 | 
						|
sub help {
 | 
						|
	print <<EOF;
 | 
						|
Usage: @{[BASENAME]} [OPTION...]
 | 
						|
Check DRBD resources.
 | 
						|
 | 
						|
 Plugin options:
 | 
						|
  -v, --verbose              Increase verbosity
 | 
						|
 | 
						|
 Help options:
 | 
						|
  -?, --help                 Give this help list
 | 
						|
      --usage                Give a short usage message
 | 
						|
EOF
 | 
						|
}
 | 
						|
 | 
						|
sub usage {
 | 
						|
	print <<EOF;
 | 
						|
Usage: @{[BASENAME]} [-v?] [--verbose] [--help] [--usage]
 | 
						|
EOF
 | 
						|
}
 | 
						|
 | 
						|
###########################################################################
 | 
						|
 | 
						|
{
 | 
						|
	my @perfdata;
 | 
						|
 | 
						|
	sub perfdata { push @perfdata, @_ }
 | 
						|
 | 
						|
	sub _exit {
 | 
						|
		my ($status, $message) = @_;
 | 
						|
 | 
						|
		if (defined $message) {
 | 
						|
			print $message;
 | 
						|
		} else {
 | 
						|
			print qw( OK WARNING CRITICAL )[$status] || 'UNKNOWN';
 | 
						|
		}
 | 
						|
		if (my $perfdata = shift @perfdata) {
 | 
						|
			print "|$perfdata";
 | 
						|
		}
 | 
						|
#		print "\n";
 | 
						|
		if (@perfdata) {
 | 
						|
			print '|';
 | 
						|
			print map "$_|", @perfdata;
 | 
						|
		}
 | 
						|
		print "\n";
 | 
						|
		exit $status;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
sub ok       { _exit OK,       @_ }
 | 
						|
sub warning  { _exit WARNING,  @_ }
 | 
						|
sub critical { _exit CRITICAL, @_ }
 | 
						|
sub unknown  { _exit UNKNOWN,  @_ }
 | 
						|
 | 
						|
###########################################################################
 | 
						|
 | 
						|
sub get_state {
 | 
						|
	my $io = new IO::File(STATE_FILE)
 | 
						|
		or critical "Could not open @{[STATE_FILE]} for reading: $!";
 | 
						|
 | 
						|
# 0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r----
 | 
						|
#    ns:0 nr:20492 dw:20480 dr:124 al:5 bm:1296 lo:0 pe:0 ua:0 ap:0 ep:1 wo:d oos:0
 | 
						|
 | 
						|
	my @state;
 | 
						|
	my $device;
 | 
						|
	while (<$io>) {
 | 
						|
		if (m(^ \s* (\d+): \s* cs:(\w+) \s+ (?:ro|st):(\w+)/(\w+) \s+ ds:(\w+)/(\w+) \s+ \S+ \s+ (\S+))x) {
 | 
						|
			$device = $state[$1] = {
 | 
						|
				cs  => $2,
 | 
						|
				ro  => $3,
 | 
						|
				pro => $4,
 | 
						|
				ds  => $5,
 | 
						|
				pds => $6,
 | 
						|
				iof => $7,
 | 
						|
			};
 | 
						|
			next;
 | 
						|
		};
 | 
						|
 | 
						|
		$device or next;
 | 
						|
		$device->{$1} = $2 while /(\w+):(\S+)/g;
 | 
						|
	}
 | 
						|
 | 
						|
	@state;
 | 
						|
}
 |