1
0
mirror of https://github.com/dennypage/dpinger.git synced 2024-05-19 06:50:01 +00:00

4 Commits
v3.2 ... v3.3

Author SHA1 Message Date
Denny Page
ce7d88bddf Update version to 3.3 2023-01-18 19:17:16 -08:00
Denny Page
67b8ba1f6d Add option to explicitly control the hold time for alarms. 2023-01-18 18:24:20 -08:00
Denny Page
c845c582b4 Add examples for dpinger logging/monitoring with InfluxDB and Grafana 2022-05-14 14:50:40 -07:00
dennypage
fbc7e8f87f Update copyright year 2022-03-01 08:21:24 -08:00
6 changed files with 585 additions and 12 deletions

View File

@@ -1,4 +1,4 @@
Copyright (c) 2015-2020, Denny Page
Copyright (c) 2015-2022, Denny Page
All rights reserved.
Redistribution and use in source and binary forms, with or without

View File

@@ -1,6 +1,6 @@
//
// Copyright (c) 2015-2022, Denny Page
// Copyright (c) 2015-2023, Denny Page
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@@ -104,8 +104,9 @@ static unsigned long loss_alarm_threshold_percent = 0;
static char * alert_cmd = NULL;
static size_t alert_cmd_offset;
// Number of periods to wait to declare an alarm as cleared
#define ALARM_DECAY_PERIODS 10
// Interval before an alarm is cleared (hold time)
static unsigned long alarm_hold_msec = 0;
#define DEFAULT_HOLD_PERIODS 10
// Report file
static const char * report_name = NULL;
@@ -593,6 +594,7 @@ alert_thread(
unsigned long average_latency_usec;
unsigned long latency_deviation;
unsigned long average_loss_percent;
unsigned int alarm_hold_periods;
unsigned int latency_alarm_decay = 0;
unsigned int loss_alarm_decay = 0;
unsigned int alert = 0;
@@ -603,6 +605,9 @@ alert_thread(
sleeptime.tv_sec = alert_interval_msec / 1000;
sleeptime.tv_nsec = (alert_interval_msec % 1000) * 1000000;
// Set number of alarm hold periods
alarm_hold_periods = (alarm_hold_msec + alert_interval_msec - 1) / alert_interval_msec;
while (1)
{
r = nanosleep(&sleeptime, NULL);
@@ -622,7 +627,7 @@ alert_thread(
alert = 1;
}
latency_alarm_decay = ALARM_DECAY_PERIODS;
latency_alarm_decay = alarm_hold_periods;
}
else if (latency_alarm_decay)
{
@@ -643,7 +648,7 @@ alert_thread(
alert = 1;
}
loss_alarm_decay = ALARM_DECAY_PERIODS;
loss_alarm_decay = alarm_hold_periods;
}
else if (loss_alarm_decay)
{
@@ -843,9 +848,9 @@ get_length_arg(
static void
usage(void)
{
fprintf(stderr, "Dpinger version 3.2\n\n");
fprintf(stderr, "Dpinger version 3.3\n\n");
fprintf(stderr, "Usage:\n");
fprintf(stderr, " %s [-f] [-R] [-S] [-P] [-B bind_addr] [-s send_interval] [-l loss_interval] [-t time_period] [-r report_interval] [-d data_length] [-o output_file] [-A alert_interval] [-D latency_alarm] [-L loss_alarm] [-C alert_cmd] [-i identifier] [-u usocket] [-p pidfile] dest_addr\n\n", progname);
fprintf(stderr, " %s [-f] [-R] [-S] [-P] [-B bind_addr] [-s send_interval] [-l loss_interval] [-t time_period] [-r report_interval] [-d data_length] [-o output_file] [-A alert_interval] [-D latency_alarm] [-L loss_alarm] [-H hold_interval] [-C alert_cmd] [-i identifier] [-u usocket] [-p pidfile] dest_addr\n\n", progname);
fprintf(stderr, " options:\n");
fprintf(stderr, " -f run in foreground\n");
fprintf(stderr, " -R rewind output file between reports\n");
@@ -861,6 +866,7 @@ usage(void)
fprintf(stderr, " -A time interval between alerts (default 1s)\n");
fprintf(stderr, " -D time threshold for latency alarm (default none)\n");
fprintf(stderr, " -L percent threshold for loss alarm (default none)\n");
fprintf(stderr, " -H time interval to hold an alarm before clearing it (default 10x alert interval)\n");
fprintf(stderr, " -C optional command to be invoked via system() for alerts\n");
fprintf(stderr, " -i identifier text to include in output\n");
fprintf(stderr, " -u unix socket name for polling\n");
@@ -875,7 +881,8 @@ usage(void)
fprintf(stderr, " resolution of loss calculation is: 100 * send_interval / (time_period - loss_interval)\n\n");
fprintf(stderr, " the alert_cmd is invoked as \"alert_cmd dest_addr alarm_flag latency_avg loss_avg\"\n");
fprintf(stderr, " alarm_flag is set to 1 if either latency or loss is in alarm state\n");
fprintf(stderr, " alarm_flag will return to 0 when both have have cleared alarm state\n\n");
fprintf(stderr, " alarm_flag will return to 0 when both have have cleared alarm state\n");
fprintf(stderr, " alarm hold time begins when the source of the alarm retruns to normal\n\n");
}
@@ -916,7 +923,7 @@ parse_args(
progname = argv[0];
while((opt = getopt(argc, argv, "fRSPB:s:l:t:r:d:o:A:D:L:C:i:u:p:")) != -1)
while((opt = getopt(argc, argv, "fRSPB:s:l:t:r:d:o:A:D:L:H:C:i:u:p:")) != -1)
{
switch (opt)
{
@@ -1009,6 +1016,14 @@ parse_args(
}
break;
case 'H':
r = get_time_arg_msec(optarg, &alarm_hold_msec);
if (r)
{
fatal("invalid alarm hold interval %s\n", optarg);
}
break;
case 'C':
alert_cmd_offset = strlen(optarg);
alert_cmd = malloc(alert_cmd_offset + OUTPUT_MAX);
@@ -1403,6 +1418,12 @@ main(
fatal("getnameinfo of destination address failed\n");
}
// Default alarm hold if not explicitly set
if (alarm_hold_msec == 0)
{
alarm_hold_msec = alert_interval_msec * DEFAULT_HOLD_PERIODS;
}
if (bind_addr_len)
{
r = getnameinfo((struct sockaddr *) &bind_addr, bind_addr_len, bind_str, sizeof(bind_str), NULL, 0, NI_NUMERICHOST);
@@ -1412,9 +1433,9 @@ main(
}
}
logger("send_interval %lums loss_interval %lums time_period %lums report_interval %lums data_len %lu alert_interval %lums latency_alarm %lums loss_alarm %lu%% dest_addr %s bind_addr %s identifier \"%s\"\n",
logger("send_interval %lums loss_interval %lums time_period %lums report_interval %lums data_len %lu alert_interval %lums latency_alarm %lums loss_alarm %lu%% alarm_hold %lums dest_addr %s bind_addr %s identifier \"%s\"\n",
send_interval_msec, loss_interval_msec, time_period_msec, report_interval_msec, echo_data_len,
alert_interval_msec, latency_alarm_threshold_msec, loss_alarm_threshold_percent,
alert_interval_msec, latency_alarm_threshold_msec, loss_alarm_threshold_percent, alarm_hold_msec,
dest_str, bind_str, identifier);
// Set my echo id

19
influx/README.md Normal file
View File

@@ -0,0 +1,19 @@
Examples for dpinger logging/monitoring with InfluxDB and Grafana
<br>
Files:
dpinger_influx_logger
Python script for logging dpinger data in InfluxDB
dpinger_start.sh
Sample start script for dpinger influx logging
dpinger_grafana_dashboard.json
Example Grafana dashboard for monitoring dpinger data

View File

@@ -0,0 +1,456 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 3,
"iteration": 1652309379625,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"uid": "$source"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ms"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "loss"
},
"properties": [
{
"id": "unit",
"value": "percent"
}
]
},
{
"matcher": {
"id": "byName",
"options": "loss"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#e00000",
"mode": "fixed"
}
},
{
"id": "custom.fillOpacity",
"value": 100
},
{
"id": "custom.lineWidth",
"value": 0
},
{
"id": "unit",
"value": "percent"
},
{
"id": "max",
"value": 100
}
]
}
]
},
"gridPos": {
"h": 19,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max",
"min"
],
"displayMode": "table",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"alias": "latency",
"groupBy": [
{
"params": [
"$intervals"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "dpinger",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT mean(\"latency\") FROM \"wan\" WHERE $timeFilter GROUP BY time($__interval) fill(null)",
"queryType": "randomWalk",
"rawQuery": false,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"latency"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": [
{
"key": "name",
"operator": "=~",
"value": "/^$name$/"
}
]
},
{
"alias": "stddev",
"groupBy": [
{
"params": [
"$intervals"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "dpinger",
"orderByTime": "ASC",
"policy": "default",
"queryType": "randomWalk",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"stddev"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": [
{
"key": "name",
"operator": "=~",
"value": "/^$name$/"
}
]
},
{
"alias": "loss",
"groupBy": [
{
"params": [
"$intervals"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "dpinger",
"orderByTime": "ASC",
"policy": "default",
"queryType": "randomWalk",
"refId": "C",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"loss"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": [
{
"key": "name",
"operator": "=~",
"value": "/^$name$/"
}
]
}
],
"title": "$name - ${intervals} intervals",
"transformations": [],
"type": "timeseries"
}
],
"refresh": "1m",
"schemaVersion": 36,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "dpinger",
"value": "dpinger"
},
"hide": 0,
"includeAll": false,
"label": "Source",
"multi": false,
"name": "source",
"options": [],
"query": "influxdb",
"queryValue": "",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": "wan",
"value": "wan"
},
"datasource": {
"type": "influxdb",
"uid": "$source"
},
"definition": "SHOW TAG VALUES WITH KEY = \"name\"",
"hide": 0,
"includeAll": false,
"label": "Name",
"multi": false,
"name": "name",
"options": [],
"query": "SHOW TAG VALUES WITH KEY = \"name\"",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"auto": true,
"auto_count": 500,
"auto_min": "10s",
"current": {
"selected": false,
"text": "auto",
"value": "$__auto_interval_intervals"
},
"hide": 0,
"label": "Intervals",
"name": "intervals",
"options": [
{
"selected": true,
"text": "auto",
"value": "$__auto_interval_intervals"
},
{
"selected": false,
"text": "10s",
"value": "10s"
},
{
"selected": false,
"text": "30s",
"value": "30s"
},
{
"selected": false,
"text": "1m",
"value": "1m"
},
{
"selected": false,
"text": "2m",
"value": "2m"
},
{
"selected": false,
"text": "5m",
"value": "5m"
},
{
"selected": false,
"text": "10m",
"value": "10m"
},
{
"selected": false,
"text": "15m",
"value": "15m"
},
{
"selected": false,
"text": "30m",
"value": "30m"
},
{
"selected": false,
"text": "1h",
"value": "1h"
},
{
"selected": false,
"text": "6h",
"value": "6h"
},
{
"selected": false,
"text": "12h",
"value": "12h"
},
{
"selected": false,
"text": "1d",
"value": "1d"
},
{
"selected": false,
"text": "7d",
"value": "7d"
}
],
"query": "10s,30s,1m,2m,5m,10m,15m,30m,1h,6h,12h,1d,7d",
"queryValue": "",
"refresh": 2,
"skipUrlSync": false,
"type": "interval"
}
]
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"1m",
"5m"
]
},
"timezone": "",
"title": "WAN Latency",
"uid": "ThwrgHYMk",
"version": 46,
"weekStart": ""
}

70
influx/dpinger_influx_logger Executable file
View File

@@ -0,0 +1,70 @@
#!/usr/bin/python
dpinger_path = "/usr/local/bin/dpinger"
import os
import sys
import signal
import requests
from subprocess import Popen, PIPE
from requests import post
# Handle SIGINT
def signal_handler(signal, frame):
try:
dpinger.kill()
except:
pass
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
# Handle command line ars
progname = sys.argv.pop(0)
if (len(sys.argv) < 4):
print('Usage: {0} influx_url influx_db host name target [additional dpinger options]'.format(progname))
print(' influx_url URL of the Influx server')
print(' influx_db name of the Influx database')
print(' host value of "host" tag (example: output of hostname command)')
print(' name value of "name" tag (example: a circuit name such as "wan")')
print(' target IP address to monitor (also the value of the "target" tag)')
sys.exit(1)
influx_url = sys.argv.pop(0)
influx_db = sys.argv.pop(0)
host = sys.argv.pop(0)
name = sys.argv.pop(0)
target = sys.argv.pop(0)
influx_user = os.getenv('INFLUX_USER')
influx_pass = os.getenv('INFLUX_PASS')
# Set up dpinger command
cmd = [dpinger_path, "-f"]
cmd.extend(sys.argv)
cmd.extend(["-s", "1s", "-t", "60s", "-r", "10s", target])
# Set up formats
url = '{0}/write?db={1}'.format(influx_url, influx_db)
datafmt = "dpinger,host={0},name={1},target={2} latency={{0:.3f}},stddev={{1:.3f}},loss={{2}}i".format(host, name, target)
# Start up dpinger
try:
dpinger = Popen(cmd, stdout=PIPE, text=True, bufsize=0)
except:
print("failed to start dpinger")
sys.exit(1)
# Start the show
while True:
line = dpinger.stdout.readline()
if (len(line) == 0):
print("dpinger exited")
sys.exit(1)
[latency, stddev, loss] = line.split()
data = datafmt.format(float(latency) / 1000, float(stddev) / 1000, loss)
#print(data)
try:
post(url = url, auth = (influx_user, influx_pass), data = data)
except:
print("post failed")

7
influx/dpinger_start.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/bin/sh
INFLUX_URL="http://myinfluxhost:8086"
export INFLUX_USER="dpinger"
export INFLUX_PASS="myinfluxpass"
exec /usr/local/dpinger_influx_logger $INFLUX_URL dpinger `hostname` wan 8.8.8.8