pping: Add RTT-based sampling

Add an option (-R, --rtt-rate) to adapt the rate sampling based on the
RTT of the flow. The sampling rate will be C * RTT, where C is a
configurable constant (ex 1.0 to get one sample every RTT), and RTT
is either the current minimum (default) or smoothed RTT of the
flow (chosen via the -t or --rtt-type option).

The smoothed RTT (sRTT) is updated for each calculated RTT, and is
calculated in a similar manner to srtt in the kernel's TCP stack. The
sRTT is a moving average of all RTTs, and is calculated according to
the formula:

  srtt = 7/8 * prev_srtt + 1/8 * rtt

To allow the user to pass a non-integer C (ex 0.1 to get 10 RTT
samples for every RTT-period), fixed-point arithmetic has been used
in the eBPF programs (due to lack of support for floats). The maximum
value for C has been limited to 10000 in order for it to be unlikely
that the C * RTT calculation will overflow (with C = 10000, overflow
will only occur if RTT > 28 seconds).

Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
Simon Sundberg
2022-02-10 16:11:21 +01:00
parent c79c4e8571
commit 928a4144a9
3 changed files with 68 additions and 8 deletions

View File

@@ -100,6 +100,8 @@ static const struct option long_options[] = {
{ "help", no_argument, NULL, 'h' },
{ "interface", required_argument, NULL, 'i' }, // Name of interface to run on
{ "rate-limit", required_argument, NULL, 'r' }, // Sampling rate-limit in ms
{ "rtt-rate", required_argument, NULL, 'R' }, // Sampling rate in terms of flow-RTT (ex 1 sample per RTT-interval)
{ "rtt-type", required_argument, NULL, 't' }, // What type of RTT the RTT-rate should be applied to ("min" or "smoothed"), only relevant if rtt-rate is provided
{ "force", no_argument, NULL, 'f' }, // Overwrite any existing XDP program on interface, remove qdisc on cleanup
{ "cleanup-interval", required_argument, NULL, 'c' }, // Map cleaning interval in s, 0 to disable
{ "format", required_argument, NULL, 'F' }, // Which format to output in (standard/json/ppviz)
@@ -167,14 +169,14 @@ static int parse_bounded_double(double *res, const char *str, double low,
static int parse_arguments(int argc, char *argv[], struct pping_config *config)
{
int err, opt;
double rate_limit_ms, cleanup_interval_s;
double rate_limit_ms, cleanup_interval_s, rtt_rate;
config->ifindex = 0;
config->force = false;
config->bpf_config.track_tcp = false;
config->bpf_config.track_icmp = false;
while ((opt = getopt_long(argc, argv, "hfTCi:r:c:F:I:", long_options,
while ((opt = getopt_long(argc, argv, "hfTCi:r:R:t:c:F:I:", long_options,
NULL)) != -1) {
switch (opt) {
case 'i':
@@ -203,6 +205,26 @@ static int parse_arguments(int argc, char *argv[], struct pping_config *config)
config->bpf_config.rate_limit =
rate_limit_ms * NS_PER_MS;
break;
case 'R':
err = parse_bounded_double(&rtt_rate, optarg, 0, 10000,
"rtt-rate");
if (err)
return -EINVAL;
config->bpf_config.rtt_rate =
DOUBLE_TO_FIXPOINT(rtt_rate);
break;
case 't':
if (strcmp(optarg, "min") == 0) {
config->bpf_config.use_srtt = false;
}
else if (strcmp(optarg, "smoothed") == 0) {
config->bpf_config.use_srtt = true;
} else {
fprintf(stderr,
"rtt-type must be \"min\" or \"smoothed\"\n");
return -EINVAL;
}
break;
case 'c':
err = parse_bounded_double(&cleanup_interval_s, optarg,
0, 7 * S_PER_DAY,
@@ -482,7 +504,7 @@ static bool flow_timeout(void *key_ptr, void *val_ptr, __u64 now)
if (print_event_func) {
fe.event_type = EVENT_TYPE_FLOW;
fe.timestamp = now;
memcpy(&fe.flow, key_ptr, sizeof(struct network_tuple));
fe.flow = *(struct network_tuple *)key_ptr;
fe.event_info.event = FLOW_EVENT_CLOSING;
fe.event_info.reason = EVENT_REASON_FLOW_TIMEOUT;
fe.source = EVENT_SOURCE_USERSPACE;
@@ -976,7 +998,9 @@ int main(int argc, char *argv[])
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_egress_opts);
struct pping_config config = {
.bpf_config = { .rate_limit = 100 * NS_PER_MS },
.bpf_config = { .rate_limit = 100 * NS_PER_MS,
.rtt_rate = 0,
.use_srtt = false },
.cleanup_interval = 1 * NS_PER_SECOND,
.object_path = "pping_kern.o",
.ingress_prog = "pping_xdp_ingress",

View File

@@ -6,6 +6,11 @@
#include <linux/in6.h>
#include <stdbool.h>
typedef __u64 fixpoint64;
#define FIXPOINT_SHIFT 16
#define DOUBLE_TO_FIXPOINT(X) ((fixpoint64)((X) * (1UL << FIXPOINT_SHIFT)))
#define FIXPOINT_TO_UINT(X) ((X) >> FIXPOINT_SHIFT)
/* For the event_type members of rtt_event and flow_event */
#define EVENT_TYPE_FLOW 1
#define EVENT_TYPE_RTT 2
@@ -34,9 +39,11 @@ enum __attribute__((__packed__)) flow_event_source {
struct bpf_config {
__u64 rate_limit;
fixpoint64 rtt_rate;
bool use_srtt;
bool track_tcp;
bool track_icmp;
__u8 reserved[6];
__u8 reserved[5];
};
/*
@@ -67,6 +74,7 @@ struct network_tuple {
struct flow_state {
__u64 min_rtt;
__u64 srtt;
__u64 last_timestamp;
__u64 sent_pkts;
__u64 sent_bytes;

View File

@@ -324,6 +324,33 @@ static __u32 remaining_pkt_payload(struct parsing_context *ctx)
return parsed_bytes < ctx->pkt_len ? ctx->pkt_len - parsed_bytes : 0;
}
/*
* Calculate a smooted rtt similar to how TCP stack does it in
* net/ipv4/tcp_input.c/tcp_rtt_estimator().
*
* NOTE: Will cause roundoff errors, but if RTTs > 1000ns errors should be small
*/
static __u64 calculate_srtt(__u64 prev_srtt, __u64 rtt)
{
if (!prev_srtt)
return rtt;
// srtt = 7/8*prev_srtt + 1/8*rtt
return prev_srtt - (prev_srtt >> 3) + (rtt >> 3);
}
static bool is_rate_limited(__u64 now, __u64 last_ts, __u64 rtt)
{
if (now < last_ts)
return true;
// RTT-based rate limit
if (config.rtt_rate && rtt)
return now - last_ts < FIXPOINT_TO_UINT(config.rtt_rate * rtt);
// Static rate limit
return now - last_ts < config.rate_limit;
}
/*
* Fills in event_type, timestamp, flow, source and reserved.
* Does not fill in the flow_info.
@@ -403,8 +430,9 @@ static void pping_egress(void *ctx, struct parsing_context *pctx)
f_state->last_id = p_id.identifier;
// Check rate-limit
if (!new_flow && (now < f_state->last_timestamp ||
now - f_state->last_timestamp < config.rate_limit))
if (!new_flow &&
is_rate_limited(now, f_state->last_timestamp,
config.use_srtt ? f_state->srtt : f_state->min_rtt))
return;
/*
@@ -449,12 +477,12 @@ static void pping_ingress(void *ctx, struct parsing_context *pctx)
goto validflow_out;
re.rtt = now - *p_ts;
// Delete timestamp entry as soon as RTT is calculated
bpf_map_delete_elem(&packet_ts, &p_id);
if (f_state->min_rtt == 0 || re.rtt < f_state->min_rtt)
f_state->min_rtt = re.rtt;
f_state->srtt = calculate_srtt(f_state->srtt, re.rtt);
// Fill event and push to perf-buffer
re.event_type = EVENT_TYPE_RTT;