1
0
mirror of https://github.com/dennypage/dpinger.git synced 2024-05-19 06:50:01 +00:00

10 Commits
v2.0 ... v3.0

Author SHA1 Message Date
Denny Page
2b032751e5 Enhance pid file support to detect running processes 2017-09-29 16:04:13 -07:00
Denny Page
84ee15b155 Clean up loss accuracy description 2017-09-29 15:13:48 -07:00
Denny Page
e10c51ad95 Move check for zero intervals back to caller. Prior commit broke disable of report interval. 2017-09-29 00:23:16 -07:00
Denny Page
579ae3d66b Detect (and reject) negative numbers in paramaters 2017-09-28 15:53:20 -07:00
Denny Page
64e644e7be Don't wait for send interval before sending first echo request 2017-09-28 14:20:21 -07:00
Denny Page
a18d82ab6e Update copyright 2017-09-28 14:00:40 -07:00
Denny Page
34b0bb924e Use accept4() 2017-09-28 13:04:16 -07:00
dennypage
4173834bbe Create NOTES.md 2017-09-27 13:36:52 -07:00
dennypage
2a8eaa0c8f Merge pull request #23 from joemiller/openbsd
problem: cannot build on openbsd
2017-08-22 16:57:09 -07:00
joe miller
edb883498d problem: cannot build on openbsd
solution: include socket.h before if.h since if.h relies on types
defined in socket.h
2017-03-15 08:16:00 -07:00
4 changed files with 136 additions and 58 deletions

View File

@@ -1,4 +1,4 @@
Copyright (c) 2015-2016, Denny Page
Copyright (c) 2015-2017, Denny Page
All rights reserved.
Redistribution and use in source and binary forms, with or without

View File

@@ -2,7 +2,7 @@
#WARNINGS=-Wall -Wextra -Wformat=2 -Wno-unused-result
CC=clang
WARNINGS=-Weverything -Wno-padded -Wno-disabled-macro-expansion
WARNINGS=-Weverything -Wno-padded -Wno-disabled-macro-expansion -Wno-reserved-id-macro
CFLAGS=${WARNINGS} -pthread -g -O2

13
NOTES.md Normal file
View File

@@ -0,0 +1,13 @@
<b>Loss accuracy</b>
In general, dpinger works a bit differently than other latency monitors. Rather than a "probe" that fires off and processes a handful of echo request/replies all at once, dpinger maintains a rolling array of echo requests spaced on the send interval. In other words, instead of waking up every second and sending 4 echo requests at once, dpinger sends an echo request every 250 milliseconds. When dpinger receives an echo reply, the time difference between the request packet and reply packet (latency) is recorded. There is nothing that times out an echo request/reply and records it as permanently lost.
When the alert check is made, or a report is generated, dpinger goes through the array and examines each echo request. If a reply has been received, it is used as part of the overall latency calculation. If a reply has not yet been received, the amount of time since the request is compared against the loss interval. If it is greater than the loss interval, the request/reply is counted as lost in the current report. However the concept of the request/reply being lost is not a permanent decision. In subsequent reports, if a the missing reply has been received, its latency will be used instead of being counted as lost.
It's important to keep in mind that latency and loss are reported as averages across the entire request set. The default time period for dpinger is 30 seconds, with an echo request being sent every 250 milliseconds. This means that the latency and loss will be reported as averages across 115-120 samples. The alert check runs every second by default. So each time, the 4 oldest entries in the set have been replaced by the 4 newest ones.
Note that if you want accurate loss reporting, it is important that the number of samples be sufficient. In order to achieve 1% loss resolution, you have need more than 100 samples in the set. The calculation for loss resolution is:
100 * send_interval / (time_period - loss_interval)
The default settings for dpinger report loss with an accuracy of 0.87%.

177
dpinger.c
View File

@@ -1,6 +1,6 @@
//
// Copyright (c) 2015-2016, Denny Page
// Copyright (c) 2015-2017, Denny Page
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@@ -27,6 +27,11 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Silly that this is required for accept4 on Linux
#define _GNU_SOURCE
#include <stdio.h>
#include <errno.h>
#include <string.h>
@@ -39,10 +44,11 @@
#include <signal.h>
#include <netdb.h>
#include <net/if.h>
#include <sys/socket.h>
#include <net/if.h>
#include <sys/un.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
@@ -52,17 +58,6 @@
#include <pthread.h>
#include <syslog.h>
// TODO:
//
// After December 31st, 2016, review use of fcntl() for setting non blocking
// and close on exec. It would be preferable to use accept4(), SOCK_CLOEXEC
// and SOCK_NONBLOCK. These are currently avoided to allow use on older
// systems such as FreeBSD 9.3, Linux 2.6.26.
// For Linux accept4() currently requires defining _GNU_SOURCE which we would
// like to avoid.
// For FreeBSD, these definitions were introduced with FreeBSD 10.0 and are
// not present in 9.3 which is supported through 2016.
// Who we are
static const char * progname;
@@ -192,6 +187,8 @@ static uint16_t echo_id;
static uint16_t next_sequence = 0;
static uint16_t sequence_limit;
// Receive thread ready
static unsigned int recv_ready = 0;
//
// Termination handler
@@ -334,18 +331,23 @@ send_thread(
echo_request->code = 0;
echo_request->id = echo_id;
// Give the recv thread a moment to initialize
sleeptime.tv_sec = 0;
sleeptime.tv_nsec = 10000; // 10us
do {
r = nanosleep(&sleeptime, NULL);
if (r == -1)
{
logger("nanosleep error in send thread waiting for recv thread: %d\n", errno);
}
} while (recv_ready == 0);
// Set up the timespec for nanosleep
sleeptime.tv_sec = send_interval_msec / 1000;
sleeptime.tv_nsec = (send_interval_msec % 1000) * 1000000;
while (1)
{
r = nanosleep(&sleeptime, NULL);
if (r == -1)
{
logger("nanosleep error in send thread: %d\n", errno);
}
// Set sequence number and checksum
echo_request->sequence = htons(next_sequence);
echo_request->cksum = 0;
@@ -364,6 +366,12 @@ send_thread(
next_slot = (next_slot + 1) % array_size;
next_sequence = (next_sequence + 1) % sequence_limit;
r = nanosleep(&sleeptime, NULL);
if (r == -1)
{
logger("nanosleep error in send thread: %d\n", errno);
}
}
}
@@ -384,6 +392,9 @@ recv_thread(
struct timespec now;
unsigned int array_slot;
// Thread startup complete
recv_ready = 1;
while (1)
{
src_addr_len = sizeof(src_addr);
@@ -688,9 +699,14 @@ usocket_thread(
while (1)
{
#if defined(DISABLE_ACCEPT4)
// Legacy
sock_fd = accept(usocket_fd, NULL, NULL);
(void) fcntl(sock_fd, F_SETFL, FD_CLOEXEC);
(void) fcntl(sock_fd, F_SETFL, fcntl(sock_fd, F_GETFL, 0) | O_NONBLOCK);
#else
sock_fd = accept4(usocket_fd, NULL, NULL, SOCK_NONBLOCK | SOCK_CLOEXEC);
#endif
report(&average_latency_usec, &latency_deviation, &average_loss_percent);
@@ -728,10 +744,10 @@ get_time_arg_msec(
const char * arg,
unsigned long * value)
{
unsigned long t;
long t;
char * suffix;
t = strtoul(arg, &suffix, 10);
t = strtol(arg, &suffix, 10);
if (*suffix == 'm')
{
// Milliseconds
@@ -744,13 +760,13 @@ get_time_arg_msec(
suffix++;
}
// Garbage in the number?
if (*suffix != 0)
// Invalid specification?
if (t < 0 || *suffix != 0)
{
return 1;
}
*value = t;
*value = (unsigned long) t;
return 0;
}
@@ -763,22 +779,22 @@ get_percent_arg(
const char * arg,
unsigned long * value)
{
unsigned long t;
long t;
char * suffix;
t = strtoul(arg, &suffix, 10);
t = strtol(arg, &suffix, 10);
if (*suffix == '%')
{
suffix++;
}
// Garbage in the number?
if (*suffix != 0 || t > 100)
// Invalid specification?
if (t < 0 || t > 100 || *suffix != 0)
{
return 1;
}
*value = t;
*value = (unsigned long) t;
return 0;
}
@@ -791,10 +807,10 @@ get_length_arg(
const char * arg,
unsigned long * value)
{
unsigned long t;
long t;
char * suffix;
t = strtoul(arg, &suffix, 10);
t = strtol(arg, &suffix, 10);
if (*suffix == 'b')
{
// Bytes
@@ -807,13 +823,13 @@ get_length_arg(
suffix++;
}
// Garbage in the number?
if (*suffix != 0)
// Invalid specification?
if (t < 0 || *suffix != 0)
{
return 1;
}
*value = t;
*value = (unsigned long) t;
return 0;
}
@@ -974,7 +990,7 @@ parse_args(
case 'D':
r = get_time_arg_msec(optarg, &latency_alarm_threshold_msec);
if (r || latency_alarm_threshold_msec == 0)
if (r)
{
fatal("invalid latency alarm threshold %s\n", optarg);
}
@@ -983,7 +999,7 @@ parse_args(
case 'L':
r = get_percent_arg(optarg, &loss_alarm_threshold_percent);
if (r || loss_alarm_threshold_percent == 0)
if (r)
{
fatal("invalid loss alarm threshold %s\n", optarg);
}
@@ -1130,10 +1146,13 @@ main(
char *argv[])
{
char bind_str[ADDR_STR_MAX] = "(none)";
char pidbuf[64];
int pidfile_fd = -1;
pid_t pid;
pthread_t thread;
struct sigaction act;
int buflen = PACKET_BUFLEN;
ssize_t len;
ssize_t rs;
int r;
@@ -1180,6 +1199,66 @@ main(
(void) setgid(getgid());
(void) setuid(getuid());
// Create pid file
if (pidfile_name)
{
pidfile_fd = open(pidfile_name, O_WRONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0644);
if (pidfile_fd != -1)
{
// Lock the pid file
r = flock(pidfile_fd, LOCK_EX | LOCK_NB);
if (r == -1)
{
perror("flock");
fatal("error locking pid file\n");
}
}
else
{
// Pid file already exists?
pidfile_fd = open(pidfile_name, O_RDWR | O_CREAT | O_CLOEXEC, 0644);
if (pidfile_fd == -1)
{
perror("open");
fatal("cannot create/open pid file %s\n", pidfile_name);
}
// Lock the pid file
r = flock(pidfile_fd, LOCK_EX | LOCK_NB);
if (r == -1)
{
fatal("pid file %s is in use by another process\n", pidfile_name);
}
// Check for existing pid
rs = read(pidfile_fd, pidbuf, sizeof(pidbuf) - 1);
if (rs > 0)
{
pidbuf[rs] = 0;
pid = (pid_t) strtol(pidbuf, NULL, 10);
if (pid > 0)
{
// Is the pid still alive?
r = kill(pid, 0);
if (r == 0)
{
fatal("pid file %s is in use by process %u\n", pidfile_name, (unsigned int) pid);
}
}
}
// Reset the pid file
(void) lseek(pidfile_fd, 0, 0);
r = ftruncate(pidfile_fd, 0);
if (r == -1)
{
perror("ftruncate");
fatal("cannot write pid file %s\n", pidfile_name);
}
}
}
// Create report file
if (report_name)
{
@@ -1239,31 +1318,20 @@ main(
}
}
// Create pid file
if (pidfile_name)
{
pidfile_fd = open(pidfile_name, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644);
if (pidfile_fd == -1)
{
perror("open");
fatal("cannot open/create pid file %s\n", pidfile_name);
}
}
// End of general errors from command line options
// Self background
if (foreground == 0)
{
r = fork();
pid = fork();
if (r == -1)
if (pid == -1)
{
perror("fork");
fatal("cannot background\n");
}
if (r)
if (pid)
{
_exit(EXIT_SUCCESS);
}
@@ -1280,16 +1348,13 @@ main(
// Write pid file
if (pidfile_fd != -1)
{
char buf[64];
ssize_t len;
len = snprintf(buf, sizeof(buf), "%u\n", (unsigned) getpid());
if (len < 0 || (size_t) len > sizeof(buf))
len = snprintf(pidbuf, sizeof(pidbuf), "%u\n", (unsigned) getpid());
if (len < 0 || (size_t) len > sizeof(pidbuf))
{
fatal("error formatting pidfile\n");
}
rs = write(pidfile_fd, buf, (size_t) len);
rs = write(pidfile_fd, pidbuf, (size_t) len);
if (rs == -1)
{
perror("write");