pping: Improve aggregated output format

Provide some statistics (min, mean, media, p95, max) instead of
dumping the raw bin counts.

While the raw bin counts provide more information and can be used for
further post processing, they are hard for a human to parse and make
sense of. Therefore, they are more suitable for a data-oriented
format, such as the JSON output.

Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
Simon Sundberg
2023-05-30 14:18:24 +02:00
parent a301900fbd
commit 989905e870
2 changed files with 130 additions and 13 deletions

122
pping/lhist.h Normal file
View File

@ -0,0 +1,122 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef LHIST_H
#define LHIST_H
#include <stdlib.h>
#include <math.h>
#include <linux/types.h>
#include <stdio.h>
/* Count total number of instances in histogram*/
static __u64 lhist_count(__u32 *bins, size_t size)
{
__u64 count = 0;
int i;
for (i = 0; i < size; i++)
count += bins[i];
return count;
}
static double lhist_bin_midval(int bin_idx, double bin_width, double left_edge)
{
return left_edge + (bin_width / 2) + bin_width * bin_idx;
}
/* Calculate an approximate minimum value from a linear histogram.
* The approximation is the middle of the first non-empty bin. */
static double lhist_min(__u32 *bins, size_t size, double bin_width,
double left_edge)
{
int i;
for (i = 0; i < size; i++) {
if (bins[i] > 0)
break;
}
return size < 1 || bins[i] == 0 ?
NAN :
lhist_bin_midval(i, bin_width, left_edge);
}
/* Calculate an approximate maximum value from a linear histogram.
* The approximation is the middle of the last non-empty bin. */
static double lhist_max(__u32 *bins, size_t size, double bin_width,
double left_edge)
{
int i, last_nonempty = 0;
for (i = 0; i < size; i++) {
if (bins[i] > 0)
last_nonempty = i;
}
return size < 1 || bins[last_nonempty] == 0 ?
NAN :
lhist_bin_midval(last_nonempty, bin_width, left_edge);
}
/* Calculate an apporximate arithmetic mean from a linear histogram.
* The approximation is based on the assumption that all instances are located
* in the middle of their respective bins. */
static double lhist_mean(__u32 *bins, size_t size, double bin_width,
double left_edge)
{
double sum = 0, mid_val = left_edge + (bin_width / 2);
__u64 count = 0;
int i;
for (i = 0; i < size; i++) {
count += bins[i];
sum += bins[i] * mid_val;
mid_val += bin_width;
}
return count ? sum / count : NAN;
}
/* Calculate an approximate percentile value from a linear histogram.
* The approximation is based on the assumption that all instances are located
* in the middle of their respective bins. Does linear interpolation for
* percentiles located between bins (similar to ex. numpy.percentile) */
static double lhist_percentile(__u32 *bins, double percentile, size_t size,
double bin_width, double left_edge)
{
__u64 n = lhist_count(bins, size);
double virt_idx, ret;
int i = 0, next_i;
__u64 count = 0;
if (n < 1)
return NAN;
virt_idx = percentile / 100 * (n - 1);
/* Check for out of bounds percentiles or rounding errors*/
if (virt_idx <= 0)
return lhist_min(bins, size, bin_width, left_edge);
else if (virt_idx >= n - 1)
return lhist_max(bins, size, bin_width, left_edge);
/* find bin the virtual index should lie in */
while (count <= virt_idx) {
count += bins[i++];
}
i--;
ret = lhist_bin_midval(i, bin_width, left_edge);
/* virtual index is between current bin and next (non-empty) bin
(count - 1 < virt_idx < count) */
if (virt_idx > count - 1) {
/* Find next non-empty bin to interpolate between */
next_i = i + 1;
while (bins[next_i] == 0) {
next_i++;
}
ret += (virt_idx - (count - 1)) * (next_i - i) * bin_width;
}
return ret;
}
#endif

View File

@ -29,6 +29,7 @@ static const char *__doc__ =
#include "json_writer.h"
#include "pping.h" //common structs for user-space and BPF parts
#include "lhist.h"
// Maximum string length for IP prefix (including /xx[x] and '\0')
#define INET_PREFIXSTRLEN (INET_ADDRSTRLEN + 3)
@ -1074,32 +1075,26 @@ static void handle_missed_events(void *ctx, int cpu, __u64 lost_cnt)
fprintf(stderr, "Lost %llu events on CPU %d\n", lost_cnt, cpu);
}
static void print_histogram(FILE *stream,
struct aggregated_rtt_stats *rtt_stats, int n_bins)
{
int i;
fprintf(stream, "[%u", rtt_stats->bins[0]);
for (i = 1; i < n_bins; i++)
fprintf(stream, ",%u", rtt_stats->bins[i]);
fprintf(stream, "]");
}
static void print_aggregated_rtts(FILE *stream, __u64 t,
struct ipprefix_key *prefix, int af,
__u8 prefix_len,
struct aggregated_rtt_stats *rtt_stats,
struct aggregation_config *agg_conf)
{
__u64 nb = agg_conf->n_bins, bw = agg_conf->bin_width;
char prefixstr[INET6_PREFIXSTRLEN] = { 0 };
format_ipprefix(prefixstr, sizeof(prefixstr), af, prefix, prefix_len);
print_ns_datetime(stream, t);
fprintf(stream,
": %s -> min=%.6g ms, max=%.6g ms, histogram=", prefixstr,
": %s -> count=%llu, min=%.6g ms, mean=%g ms, median=%g ms, p95=%g ms, max=%.6g ms",
prefixstr, lhist_count(rtt_stats->bins, nb),
(double)rtt_stats->min / NS_PER_MS,
lhist_mean(rtt_stats->bins, nb, bw, 0) / NS_PER_MS,
lhist_percentile(rtt_stats->bins, 50, nb, bw, 0) / NS_PER_MS,
lhist_percentile(rtt_stats->bins, 95, nb, bw, 0) / NS_PER_MS,
(double)rtt_stats->max / NS_PER_MS);
print_histogram(stream, rtt_stats, agg_conf->n_bins);
fprintf(stream, "\n");
}