mirror of
https://github.com/librenms/librenms.git
synced 2024-10-07 16:52:45 +00:00
Add support for sending events to Sensu (#11383)
* Add support for sending events to Sensu Sensu is an alerting and monitoring service (and much more) that has a nagios compatible check API. This transport translates LibreNMS alerts into Sensu events, and sends them to the agent API running on the same host as the poller. The transport has a few options, but none of them are required - if the Sensu agent is correctly configured, alerts will be sent as soon as the transport is enabled. There's a fair amount of code, as I've tried to translate as much data as possible between LibreNMS and Sensu. * Update Transports.md * If alerted is 0, send an "ok" alert dated rrd.step / 2 seconds ago This makes Sensu aware of the last time the check ran successfully (ish). If we don't send the initial "ok", Sensu will either display 'unknown', or an incorrectly high duration for the incident. Alerted gets set to 1 after the first alert is sent. We choose rrd.step / 2 as: * rrd.step is the maximum time ago the check could have succeeded * we halve it, so that if a check is flapping, it is not masked Basically, we guess that the check fails around halfway through the time since the poller last ran. * Add additional metadata * Improve codeclimate slightly * Consider names that are 2 or 3 components long
This commit is contained in:
@@ -186,6 +186,7 @@ class RunAlerts
|
||||
$obj['timestamp'] = $alert['time_logged'];
|
||||
$obj['contacts'] = $extra['contacts'];
|
||||
$obj['state'] = $alert['state'];
|
||||
$obj['alerted'] = $alert['alerted'];
|
||||
$obj['template'] = $template;
|
||||
return $obj;
|
||||
}
|
||||
@@ -352,7 +353,7 @@ class RunAlerts
|
||||
public function loadAlerts($where)
|
||||
{
|
||||
$alerts = [];
|
||||
foreach (dbFetchRows("SELECT alerts.id, alerts.device_id, alerts.rule_id, alerts.state, alerts.note, alerts.info FROM alerts WHERE $where") as $alert_status) {
|
||||
foreach (dbFetchRows("SELECT alerts.id, alerts.alerted, alerts.device_id, alerts.rule_id, alerts.state, alerts.note, alerts.info FROM alerts WHERE $where") as $alert_status) {
|
||||
$alert = dbFetchRow(
|
||||
'SELECT alert_log.id,alert_log.rule_id,alert_log.device_id,alert_log.state,alert_log.details,alert_log.time_logged,alert_rules.rule,alert_rules.severity,alert_rules.extra,alert_rules.name,alert_rules.query,alert_rules.builder,alert_rules.proc FROM alert_log,alert_rules WHERE alert_log.rule_id = alert_rules.id && alert_log.device_id = ? && alert_log.rule_id = ? && alert_rules.disabled = 0 ORDER BY alert_log.id DESC LIMIT 1',
|
||||
array($alert_status['device_id'], $alert_status['rule_id'])
|
||||
@@ -365,6 +366,7 @@ class RunAlerts
|
||||
} else {
|
||||
$alert['alert_id'] = $alert_status['id'];
|
||||
$alert['state'] = $alert_status['state'];
|
||||
$alert['alerted'] = $alert_status['alerted'];
|
||||
$alert['note'] = $alert_status['note'];
|
||||
if (!empty($alert['details'])) {
|
||||
$alert['details'] = json_decode(gzuncompress($alert['details']), true);
|
||||
|
||||
257
LibreNMS/Alert/Transport/Sensu.php
Normal file
257
LibreNMS/Alert/Transport/Sensu.php
Normal file
@@ -0,0 +1,257 @@
|
||||
<?php
|
||||
/* Copyright (C) 2020 Adam Bishop <adam@omega.org.uk>
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
/**
|
||||
* API Transport
|
||||
* @author Adam Bishop <adam@omega.org.uk>
|
||||
* @copyright 2020 Adam Bishop, LibreNMS
|
||||
* @license GPL
|
||||
* @package LibreNMS
|
||||
* @subpackage Alerts
|
||||
*/
|
||||
namespace LibreNMS\Alert\Transport;
|
||||
|
||||
use LibreNMS\Alert\Transport;
|
||||
use LibreNMS\Config;
|
||||
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\GuzzleException;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
|
||||
class Sensu extends Transport
|
||||
{
|
||||
// Sensu alert coding
|
||||
const OK = 0;
|
||||
const WARNING = 1;
|
||||
const CRITICAL = 2;
|
||||
const UNKNOWN = 3;
|
||||
|
||||
// LibreNMS alert coding
|
||||
const RECOVER = 0;
|
||||
const ALERT = 1;
|
||||
const ACK = 2;
|
||||
const WORSE = 3;
|
||||
const BETTER = 4;
|
||||
|
||||
private static $status = array(
|
||||
'ok' => Sensu::OK,
|
||||
'warning' => Sensu::WARNING,
|
||||
'critical' => Sensu::CRITICAL
|
||||
);
|
||||
|
||||
private static $severity = array(
|
||||
'recovered' => Sensu::RECOVER,
|
||||
'alert' => Sensu::ALERT,
|
||||
'acknowledged' => Sensu::ACK,
|
||||
'worse' => Sensu::WORSE,
|
||||
'better' => Sensu::BETTER,
|
||||
);
|
||||
|
||||
private static $client = null;
|
||||
|
||||
public function deliverAlert($obj, $opts)
|
||||
{
|
||||
$sensu_opts = [];
|
||||
$sensu_opts['url'] = $this->config['sensu-url'] ? $this->config['sensu-url'] : 'http://127.0.0.1:3031';
|
||||
$sensu_opts['namespace'] = $this->config['sensu-namespace'] ? $this->config['sensu-namespace'] : 'default';
|
||||
$sensu_opts['prefix'] = $this->config['sensu-prefix'];
|
||||
$sensu_opts['source-key'] = $this->config['sensu-source-key'];
|
||||
|
||||
Sensu::$client = new Client();
|
||||
|
||||
try {
|
||||
return $this->contactSensu($obj, $sensu_opts);
|
||||
} catch (GuzzleException $e) {
|
||||
return "Sending event to Sensu failed: " . $e->getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
public static function contactSensu($obj, $opts)
|
||||
{
|
||||
// The Sensu agent should be running on the poller - events can be sent directly to the backend but this has not been tested, and likely needs mTLS.
|
||||
// The agent API is documented at https://docs.sensu.io/sensu-go/latest/reference/agent/#create-monitoring-events-using-the-agent-api
|
||||
if (Sensu::$client->request('GET', $opts['url'] . '/healthz')->getStatusCode() !== 200) {
|
||||
return 'Sensu API is not responding';
|
||||
}
|
||||
|
||||
if ($obj['state'] !== Sensu::RECOVER && $obj['state'] !== Sensu::ACK && $obj['alerted'] === 0) {
|
||||
// If this is the first event, send a forced "ok" dated (rrd.step / 2) seconds ago to tell Sensu the last time the check was healthy
|
||||
$data = Sensu::generateData($obj, $opts, Sensu::OK, round(Config::get('rrd.step', 300) / 2));
|
||||
Log::debug('Sensu transport sent last good event to socket: ', $data);
|
||||
|
||||
$result = Sensu::$client->request('POST', $opts['url'] . '/events', ['json' => $data]);
|
||||
if ($result->getStatusCode() !== 202) {
|
||||
return $result->getReasonPhrase();
|
||||
}
|
||||
|
||||
sleep(5);
|
||||
}
|
||||
|
||||
$data = Sensu::generateData($obj, $opts, Sensu::calculateStatus($obj['state'], $obj['severity']));
|
||||
Log::debug('Sensu transport sent event to socket: ', $data);
|
||||
|
||||
$result = Sensu::$client->request('POST', $opts['url'] . '/events', ['json' => $data]);
|
||||
if ($result->getStatusCode() === 202) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return $result->getReasonPhrase();
|
||||
}
|
||||
|
||||
public static function generateData($obj, $opts, $status, $offset = 0)
|
||||
{
|
||||
return [
|
||||
'check' => [
|
||||
'metadata' => [
|
||||
'name' => Sensu::checkName($opts['prefix'], $obj['name']),
|
||||
'namespace' => $opts['namespace'],
|
||||
'annotations' => Sensu::generateAnnotations($obj),
|
||||
],
|
||||
'command' => sprintf('LibreNMS: %s', $obj['builder']),
|
||||
'executed' => time() - $offset,
|
||||
'interval' => Config::get('rrd.step', 300),
|
||||
'issued' => time() - $offset,
|
||||
'output' => $obj['msg'],
|
||||
'status' => $status,
|
||||
],
|
||||
'entity' => [
|
||||
'metadata' => [
|
||||
'name' => Sensu::getEntityName($obj, $opts['source-key']),
|
||||
'namespace' => $opts['namespace'],
|
||||
],
|
||||
'system' => [
|
||||
'hostname' => $obj['hostname'],
|
||||
'os' => $obj['os'],
|
||||
]
|
||||
],
|
||||
];
|
||||
}
|
||||
|
||||
public static function generateAnnotations($obj)
|
||||
{
|
||||
return array_filter([
|
||||
'generated-by' => 'LibreNMS',
|
||||
'acknowledged' => $obj['state'] === Sensu::ACK ? 'true' : 'false',
|
||||
'contact' => $obj['sysContact'],
|
||||
'description' => $obj['sysDescr'],
|
||||
'location' => $obj['location'],
|
||||
'documentation' => $obj['proc'],
|
||||
'librenms-notes' => $obj['notes'],
|
||||
'librenms-device-id' => strval($obj['device_id']),
|
||||
'librenms-rule-id' => strval($obj['rule_id']),
|
||||
'librenms-status-reason' => $obj['status_reason'],
|
||||
], 'strlen'); // strlen returns 0 for null, false or '', but 1 for integer 0 - unlike empty()
|
||||
}
|
||||
|
||||
public static function calculateStatus($state, $severity)
|
||||
{
|
||||
// Sensu only has a single short (status) to indicate both severity and status, so we need to map LibreNMS' state and severity onto it
|
||||
|
||||
if ($state === Sensu::RECOVER) {
|
||||
// LibreNMS alert is resolved, send ok
|
||||
return Sensu::OK;
|
||||
}
|
||||
|
||||
if (array_key_exists($severity, Sensu::$status)) {
|
||||
// Severity is known, map the LibreNMS severity to the Sensu status
|
||||
return Sensu::$status[$severity];
|
||||
}
|
||||
|
||||
// LibreNMS severity does not map to Sensu, send unknown
|
||||
return Sensu::UNKNOWN;
|
||||
}
|
||||
|
||||
public static function getEntityName($obj, $key)
|
||||
{
|
||||
if ($key === 'shortname') {
|
||||
return Sensu::shortenName($obj['hostname']);
|
||||
}
|
||||
|
||||
return $obj[$key];
|
||||
}
|
||||
|
||||
public static function shortenName($name)
|
||||
{
|
||||
// Shrink the last domain components - e.g. librenms.corp.example.net becomes librenms.cen
|
||||
$components = explode('.', $name);
|
||||
$count = count($components);
|
||||
$trim = min([3, $count - 1]);
|
||||
$result = '';
|
||||
|
||||
if ($count <= 2) { // Can't be shortened
|
||||
return $name;
|
||||
}
|
||||
|
||||
for ($i = $count - 1; $i >= $count - $trim; $i--) {
|
||||
// Walk the array in reverse order, taking the first letter from the $trim sections
|
||||
$result = sprintf('%s%s', substr($components[$i], 0, 1), $result);
|
||||
unset($components[$i]);
|
||||
}
|
||||
|
||||
return sprintf('%s.%s', implode('.', $components), $result);
|
||||
}
|
||||
|
||||
public static function checkName($prefix, $name)
|
||||
{
|
||||
$check = strtolower(str_replace(' ', '-', $name));
|
||||
|
||||
if ($prefix) {
|
||||
return sprintf('%s-%s', $prefix, $check);
|
||||
}
|
||||
|
||||
return $check;
|
||||
}
|
||||
|
||||
public static function configTemplate()
|
||||
{
|
||||
return [
|
||||
'config' => [
|
||||
[
|
||||
'title' => 'Sensu Endpoint',
|
||||
'name' => 'sensu-url',
|
||||
'descr' => 'To configure the agent API, see https://docs.sensu.io/sensu-go/latest/reference/agent/#api-configuration-flags (default: "http://localhost:3031")',
|
||||
'type' => 'text',
|
||||
],
|
||||
[
|
||||
'title' => 'Sensu Namespace',
|
||||
'name' => 'sensu-namespace',
|
||||
'descr' => 'The Sensu namespace that hosts exist in (default: "default")',
|
||||
'type' => 'text',
|
||||
],
|
||||
[
|
||||
'title' => 'Check Prefix',
|
||||
'name' => 'sensu-prefix',
|
||||
'descr' => 'An optional string to prefix the checks with',
|
||||
'type' => 'text',
|
||||
],
|
||||
[
|
||||
'title' => 'Source Key',
|
||||
'name' => 'sensu-source-key',
|
||||
'descr' => 'Should events be attributed to entities by hostname, sysName or shortname (default: hostname)',
|
||||
'type' => 'select',
|
||||
'options' => [
|
||||
'hostname' => 'hostname',
|
||||
'sysName' => 'sysName',
|
||||
'shortname' => 'shortname'
|
||||
],
|
||||
'default' => 'hostname'
|
||||
],
|
||||
],
|
||||
'validation' => [
|
||||
'sensu-url' => 'url',
|
||||
'sensu-source-key' => 'required|in:hostname,sysName,shortname',
|
||||
]
|
||||
];
|
||||
}
|
||||
}
|
||||
@@ -499,6 +499,50 @@ required value is for url, without this then no call to Rocket.chat will be made
|
||||
| Webhook URL | https://rocket.url/api/v1/chat.postMessage |
|
||||
| Rocket.chat Options | channel=#Alerting <br/> username=myname <br/> icon_url=http://someurl/image.gif <br/> icon_emoji=:smirk: |
|
||||
|
||||
## Sensu
|
||||
|
||||
The Sensu transport will POST an
|
||||
[Event](https://docs.sensu.io/sensu-go/latest/reference/events/) to the
|
||||
[Agent API](https://docs.sensu.io/sensu-go/latest/reference/agent/#create-monitoring-events-using-the-agent-api)
|
||||
upon an alert being generated.
|
||||
|
||||
It will be categorised (ok, warning or critical), and if you configure the
|
||||
alert to send recovery notifications, Sensu will also clear the alert
|
||||
automatically. No configuration is required - as long as you are running the
|
||||
Sensu Agent on your poller with the HTTP socket enabled on tcp/3031, LibreNMS
|
||||
will start generating Sensu events as soon as you create the transport.
|
||||
|
||||
Acknowledging alerts within LibreNMS is not directly supported, but an
|
||||
annotation (`acknowledged`) is set, so a mutator or silence, or even the
|
||||
handler could be written to look for it directly in the handler. There is also
|
||||
an annotation (`generated-by`) set, to allow you to treat LibreNMS events
|
||||
differently from agent events.
|
||||
|
||||
The 'shortname' option is a simple way to reduce the length of device names in
|
||||
configs. It replaces the last 3 domain components with single letters (e.g.
|
||||
websrv08.dc4.eu.corp.example.net gets shortened to websrv08.dc4.eu.cen).
|
||||
|
||||
### Limitations
|
||||
|
||||
- Only a single namespace is supported
|
||||
- Sensu will reject rules with special characters - the Transport will attempt
|
||||
to fix up rule names, but it's best to stick to letters, numbers and spaces
|
||||
- The transport only deals in absolutes - it ignores the got worse/got better
|
||||
states
|
||||
- The agent will buffer alerts, but LibreNMS will not - if your agent is
|
||||
offline, alerts will be dropped
|
||||
- There is no backchannel between Sensu and LibreNMS - if you make changes in
|
||||
Sensu to LibreNMS alerts, they'll be lost on the next event (silences will work)
|
||||
|
||||
**Example:**
|
||||
|
||||
| Config | Example |
|
||||
| --------------- | --------------------- |
|
||||
| Sensu Endpoint | http://localhost:3031 |
|
||||
| Sensu Namespace | eu-west |
|
||||
| Check Prefix | lnms |
|
||||
| Source Key | hostname |
|
||||
|
||||
## Slack
|
||||
|
||||
The Slack transport will POST the alert message to your Slack Incoming
|
||||
|
||||
Reference in New Issue
Block a user