mirror of
				https://github.com/librenms/librenms-agent.git
				synced 2024-05-09 09:54:52 +00:00 
			
		
		
		
	
		
			
	
	
		
			373 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			373 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|  | #!/usr/bin/perl | ||
|  | 
 | ||
|  | =head1 NAME | ||
|  | 
 | ||
|  | check_drbd - Nagios plugin for DRBD | ||
|  | 
 | ||
|  | =head1 SYNOPSIS | ||
|  | 
 | ||
|  | B<check_drbd> [B<--verbose> | B<-v>] | ||
|  | 
 | ||
|  | =head1 DESCRIPTION | ||
|  | 
 | ||
|  | B<check_drbd> is a Nagios plugin for DRBD. It checks the connection state, | ||
|  | resource roles and disk states for every configured DRBD resource, and | ||
|  | produces a WARNING or CRITICAL alert if anything is amiss. The states | ||
|  | of both the local and remote sides of each connection are monitored. | ||
|  | 
 | ||
|  | =head2 Nagios status information | ||
|  | 
 | ||
|  | The status information emitted by this plugin is similar to the information | ||
|  | in F</proc/drbd>: | ||
|  | 
 | ||
|  |     drbd0: Connected Primary/Secondary UpToDate/UpToDate | ||
|  |     |      |         |       |         |        | | ||
|  |     |      |         |       |         |        Remote disk state | ||
|  |     |      |         |       |         Local disk state | ||
|  |     |      |         |       Remote resource role | ||
|  |     |      |         Local resource role | ||
|  |     |      Connection state | ||
|  |     DRBD device | ||
|  | 
 | ||
|  | If more than one device is present, and all devices are OK, the output is | ||
|  | summarised: | ||
|  | 
 | ||
|  |     drbd0: PriConUpT, drbd1: SecConUpT | ||
|  | 
 | ||
|  | If any devices are not OK, the output contains their statuses in full. | ||
|  | 
 | ||
|  | =head2 Nagios performance data | ||
|  | 
 | ||
|  | Complete performance data is emitted for all configured DRBD resources: | ||
|  | 
 | ||
|  | =over | ||
|  | 
 | ||
|  | =item drbdI<*>_ns | ||
|  | 
 | ||
|  | =item drbdI<*>_nr | ||
|  | 
 | ||
|  | The volume of network data sent to and received from the peer, in kiB. | ||
|  | 
 | ||
|  | =item drbdI<*>_dw | ||
|  | 
 | ||
|  | =item drbdI<*>_dr | ||
|  | 
 | ||
|  | The volume of network data written to and read from the local disk, in kiB. | ||
|  | 
 | ||
|  | =item drbdI<*>_al | ||
|  | 
 | ||
|  | The number of updates of the activity log area of the metadata. | ||
|  | 
 | ||
|  | =item drbdI<*>_lo | ||
|  | 
 | ||
|  | The number of open requests to the local I/O subsystem issued by DRBD. | ||
|  | 
 | ||
|  | =item drbdI<*>_pe | ||
|  | 
 | ||
|  | The number of requests sent to the peer but not yet been answered by the latter. | ||
|  | 
 | ||
|  | =item drbdI<*>_ua | ||
|  | 
 | ||
|  | The number of requests received by the peer but not yet been answered by the latter. | ||
|  | 
 | ||
|  | =item drbdI<*>_ap | ||
|  | 
 | ||
|  | The number of block I/O requests forwarded by DRBD, but not yet answered by DRBD. | ||
|  | 
 | ||
|  | =item drbdI<*>_ep | ||
|  | 
 | ||
|  | The number of epoch objects. | ||
|  | 
 | ||
|  | =item drbdI<*>_oos | ||
|  | 
 | ||
|  | The amount of storage currently out-of-sync, in kiB. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head1 OPTIONS | ||
|  | 
 | ||
|  | =over | ||
|  | 
 | ||
|  | =item B<-v>, B<--verbose> | ||
|  | 
 | ||
|  | Increase the verbosity of the output messages. This disables the Nagios status | ||
|  | information summarisation described above: all resources' statuses are printed | ||
|  | in full. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head1 EXIT STATUS | ||
|  | 
 | ||
|  | =over | ||
|  | 
 | ||
|  | =item 0 | ||
|  | 
 | ||
|  | All resources are OK. | ||
|  | 
 | ||
|  | =item 1 | ||
|  | 
 | ||
|  | Some resources are not OK, but do not need immediate attention. | ||
|  | 
 | ||
|  | =item 2 | ||
|  | 
 | ||
|  | Some resources are not OK and need immediate attention. | ||
|  | 
 | ||
|  | =item 3 | ||
|  | 
 | ||
|  | An error occurred while collecting the resources' statuses. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head1 FILES | ||
|  | 
 | ||
|  | F</proc/drbd> | ||
|  | 
 | ||
|  | =head1 SEE ALSO | ||
|  | 
 | ||
|  | L<The DRBD Home Page|http://www.drbd.org/> | ||
|  | 
 | ||
|  | =cut | ||
|  | 
 | ||
|  | use strict; | ||
|  | use warnings; | ||
|  | 
 | ||
|  | use constant BASENAME => ($0 =~ m{.*/([^/]+)})[0] || 'check_drbd'; | ||
|  | 
 | ||
|  | use constant STATE_FILE => '/proc/drbd'; | ||
|  | 
 | ||
|  | use constant { | ||
|  | 	OK       => 0, | ||
|  | 	WARNING  => 1, | ||
|  | 	CRITICAL => 2, | ||
|  | 	UNKNOWN  => 3, | ||
|  | }; | ||
|  | 
 | ||
|  | use Getopt::Long; | ||
|  | use IO::File; | ||
|  | 
 | ||
|  | sub help; | ||
|  | sub usage; | ||
|  | 
 | ||
|  | sub perfdata; | ||
|  | sub ok; | ||
|  | sub warning; | ||
|  | sub critical; | ||
|  | sub unknown; | ||
|  | 
 | ||
|  | sub get_state; | ||
|  | 
 | ||
|  | $SIG{__DIE__} = sub { | ||
|  | 	die @_ if $^S; | ||
|  | 	print @_; | ||
|  | 	exit UNKNOWN; | ||
|  | }; | ||
|  | 
 | ||
|  | my $verbose; | ||
|  | 
 | ||
|  | Getopt::Long::Configure('bundling', 'no_ignore_case'); | ||
|  | GetOptions( | ||
|  | 	'verbose|v+' => \$verbose, | ||
|  | 	'help|?'     => sub { help; exit 0 }, | ||
|  | 	'usage'      => sub { usage; exit 0 }, | ||
|  | ) and @ARGV == 0 | ||
|  | 	or do { usage; exit UNKNOWN }; | ||
|  | 
 | ||
|  | my @state = get_state; | ||
|  | my $status = OK; | ||
|  | 
 | ||
|  | print "<<<drbd>>>\n"; | ||
|  | 
 | ||
|  | foreach my $id (0 .. $#state) { | ||
|  | 	my $device = $state[$id] | ||
|  | 		or next; | ||
|  | 
 | ||
|  | 	# Assume CRITICAL by default | ||
|  | 
 | ||
|  | 	foreach (qw( cs )) { | ||
|  | 		$device->{"${_}_level"} = { | ||
|  | 			Connected     => OK, | ||
|  | 			Unconfigured  => OK, | ||
|  | 			StandAlone    => WARNING, | ||
|  | 			SyncingAll    => WARNING, | ||
|  | 			SyncingQuick  => WARNING, | ||
|  | 			SyncSource    => WARNING, | ||
|  | 			SyncTarget    => WARNING, | ||
|  | 			VerifyS       => WARNING, | ||
|  | 			VerifyT       => WARNING, | ||
|  | 			Disconnecting => WARNING, | ||
|  | 			TearDown      => WARNING, | ||
|  | 			StartingSyncS => WARNING, | ||
|  | 			StartingSyncT => WARNING, | ||
|  | 			WFSyncUUID    => WARNING, | ||
|  | 		}->{$device->{$_}}; | ||
|  | 		$device->{"${_}_level"} = CRITICAL unless defined $device->{"${_}_level"}; | ||
|  | 
 | ||
|  | 		if ($device->{oos}) { | ||
|  | 			$device->{oos_level} = { | ||
|  | 				StartingSyncS => OK, | ||
|  | 				StartingSyncT => OK, | ||
|  | 				SyncSource    => OK, | ||
|  | 				SyncTarget    => OK, | ||
|  | 				PausedSyncS   => OK, | ||
|  | 				PausedSyncT   => OK, | ||
|  | 			}->{$device->{$_}}; | ||
|  | 			$device->{oos_level} = CRITICAL unless defined $device->{oos_level}; | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	foreach (qw( ro pro )) { | ||
|  | 		$device->{"${_}_level"} = { | ||
|  | 			Primary   => OK, | ||
|  | 			Secondary => OK, | ||
|  | 		}->{$device->{$_}}; | ||
|  | 		$device->{"${_}_level"} = CRITICAL unless defined $device->{"${_}_level"}; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	foreach (qw( ds pds )) { | ||
|  | 		$device->{"${_}_level"} = { | ||
|  | 			UpToDate    => OK, | ||
|  | 			Consistent  => OK, | ||
|  | 			Negotiating => WARNING, | ||
|  | 			Attaching   => WARNING, | ||
|  | 		}->{$device->{$_}}; | ||
|  | 		$device->{"${_}_level"} = CRITICAL unless defined $device->{"${_}_level"}; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	my @extra; | ||
|  | 	if ($device->{oos}) { | ||
|  | 		push @extra, sprintf '%d kiB out-of-sync', $device->{oos}; | ||
|  | 	} | ||
|  | 	if ($device->{iof} !~ /^r.--(.(-)?)?$/) { | ||
|  | 		$device->{iof_level} = CRITICAL; | ||
|  | 		push @extra, sprintf 'I/O flags: %s', $device->{iof}; | ||
|  | 	} | ||
|  | 	my $extra = @extra ? sprintf(' (%s)', join ', ', @extra) : ''; | ||
|  | 
 | ||
|  | 	my $level = OK; | ||
|  | 	foreach (grep /_level$/, keys %$device) { | ||
|  | 		$level = $device->{$_} if $level < $device->{$_}; | ||
|  | 	} | ||
|  | 	$status = $level if $status < $level; | ||
|  | 
 | ||
|  | 	$device->{level} = $level; | ||
|  | 	$device->{info}  = sprintf 'drbd%d:cs=%s|ro=%s|pro=%s|ds=%s|pds=%s|extra=%s',  $id, $device->{cs}, $device->{ro}, $device->{pro}, $device->{ds}, $device->{pds}, $extra; | ||
|  | 	$device->{short} = sprintf 'drbd%d: %0.3s%0.3s%0.3s%s', $id, $device->{ro}, $device->{cs}, $device->{ds}, $extra; # Role and connstate reversed, like old check_drbd | ||
|  | 
 | ||
|  | 	foreach (qw( ns nr dw dr al bm )) { | ||
|  | 		my $value = $device->{$_}; | ||
|  | 		defined $value | ||
|  | 			or next; | ||
|  | 		perfdata "${_}=${value}"; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	foreach (qw( lo pe ua ap oos )) { | ||
|  | 		my $value = $device->{$_}; | ||
|  | 		defined $value | ||
|  | 			or next; | ||
|  | 		perfdata "${_}=${value}"; | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | @state | ||
|  | 	or critical 'No DRBD volumes present'; | ||
|  | 
 | ||
|  | if ($status) { | ||
|  | 	my $message = join ', ', map $_->{info}, grep { defined and $_->{level} } @state; | ||
|  | 	if ($status == WARNING) { | ||
|  | 		warning $message; | ||
|  | 	} else { | ||
|  | 		critical $message; | ||
|  | 	} | ||
|  | } else { | ||
|  | 	my $message = join ', ', map { ($verbose || @state == 1) ? $_->{info} : $_->{short} } grep defined, @state; | ||
|  | 	ok $message; | ||
|  | } | ||
|  | 
 | ||
|  | die; | ||
|  | 
 | ||
|  | ########################################################################### | ||
|  | 
 | ||
|  | sub help { | ||
|  | 	print <<EOF; | ||
|  | Usage: @{[BASENAME]} [OPTION...] | ||
|  | Check DRBD resources. | ||
|  | 
 | ||
|  |  Plugin options: | ||
|  |   -v, --verbose              Increase verbosity | ||
|  | 
 | ||
|  |  Help options: | ||
|  |   -?, --help                 Give this help list | ||
|  |       --usage                Give a short usage message | ||
|  | EOF | ||
|  | } | ||
|  | 
 | ||
|  | sub usage { | ||
|  | 	print <<EOF; | ||
|  | Usage: @{[BASENAME]} [-v?] [--verbose] [--help] [--usage] | ||
|  | EOF | ||
|  | } | ||
|  | 
 | ||
|  | ########################################################################### | ||
|  | 
 | ||
|  | { | ||
|  | 	my @perfdata; | ||
|  | 
 | ||
|  | 	sub perfdata { push @perfdata, @_ } | ||
|  | 
 | ||
|  | 	sub _exit { | ||
|  | 		my ($status, $message) = @_; | ||
|  | 
 | ||
|  | 		if (defined $message) { | ||
|  | 			print $message; | ||
|  | 		} else { | ||
|  | 			print qw( OK WARNING CRITICAL )[$status] || 'UNKNOWN'; | ||
|  | 		} | ||
|  | 		if (my $perfdata = shift @perfdata) { | ||
|  | 			print "|$perfdata"; | ||
|  | 		} | ||
|  | #		print "\n"; | ||
|  | 		if (@perfdata) { | ||
|  | 			print '|'; | ||
|  | 			print map "$_|", @perfdata; | ||
|  | 		} | ||
|  | 		print "\n"; | ||
|  | 		exit $status; | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | sub ok       { _exit OK,       @_ } | ||
|  | sub warning  { _exit WARNING,  @_ } | ||
|  | sub critical { _exit CRITICAL, @_ } | ||
|  | sub unknown  { _exit UNKNOWN,  @_ } | ||
|  | 
 | ||
|  | ########################################################################### | ||
|  | 
 | ||
|  | sub get_state { | ||
|  | 	my $io = new IO::File(STATE_FILE) | ||
|  | 		or critical "Could not open @{[STATE_FILE]} for reading: $!"; | ||
|  | 
 | ||
|  | # 0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r---- | ||
|  | #    ns:0 nr:20492 dw:20480 dr:124 al:5 bm:1296 lo:0 pe:0 ua:0 ap:0 ep:1 wo:d oos:0 | ||
|  | 
 | ||
|  | 	my @state; | ||
|  | 	my $device; | ||
|  | 	while (<$io>) { | ||
|  | 		if (m(^ \s* (\d+): \s* cs:(\w+) \s+ (?:ro|st):(\w+)/(\w+) \s+ ds:(\w+)/(\w+) \s+ \S+ \s+ (\S+))x) { | ||
|  | 			$device = $state[$1] = { | ||
|  | 				cs  => $2, | ||
|  | 				ro  => $3, | ||
|  | 				pro => $4, | ||
|  | 				ds  => $5, | ||
|  | 				pds => $6, | ||
|  | 				iof => $7, | ||
|  | 			}; | ||
|  | 			next; | ||
|  | 		}; | ||
|  | 
 | ||
|  | 		$device or next; | ||
|  | 		$device->{$1} = $2 while /(\w+):(\S+)/g; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	@state; | ||
|  | } |