mirror of
https://github.com/xdp-project/bpf-examples.git
synced 2024-05-06 15:54:53 +00:00
pping: Add RTT-based sampling
Add an option (-R, --rtt-rate) to adapt the rate sampling based on the RTT of the flow. The sampling rate will be C * RTT, where C is a configurable constant (ex 1.0 to get one sample every RTT), and RTT is either the current minimum (default) or smoothed RTT of the flow (chosen via the -t or --rtt-type option). The smoothed RTT (sRTT) is updated for each calculated RTT, and is calculated in a similar manner to srtt in the kernel's TCP stack. The sRTT is a moving average of all RTTs, and is calculated according to the formula: srtt = 7/8 * prev_srtt + 1/8 * rtt To allow the user to pass a non-integer C (ex 0.1 to get 10 RTT samples for every RTT-period), fixed-point arithmetic has been used in the eBPF programs (due to lack of support for floats). The maximum value for C has been limited to 10000 in order for it to be unlikely that the C * RTT calculation will overflow (with C = 10000, overflow will only occur if RTT > 28 seconds). Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
@@ -100,6 +100,8 @@ static const struct option long_options[] = {
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ "interface", required_argument, NULL, 'i' }, // Name of interface to run on
|
||||
{ "rate-limit", required_argument, NULL, 'r' }, // Sampling rate-limit in ms
|
||||
{ "rtt-rate", required_argument, NULL, 'R' }, // Sampling rate in terms of flow-RTT (ex 1 sample per RTT-interval)
|
||||
{ "rtt-type", required_argument, NULL, 't' }, // What type of RTT the RTT-rate should be applied to ("min" or "smoothed"), only relevant if rtt-rate is provided
|
||||
{ "force", no_argument, NULL, 'f' }, // Overwrite any existing XDP program on interface, remove qdisc on cleanup
|
||||
{ "cleanup-interval", required_argument, NULL, 'c' }, // Map cleaning interval in s, 0 to disable
|
||||
{ "format", required_argument, NULL, 'F' }, // Which format to output in (standard/json/ppviz)
|
||||
@@ -167,14 +169,14 @@ static int parse_bounded_double(double *res, const char *str, double low,
|
||||
static int parse_arguments(int argc, char *argv[], struct pping_config *config)
|
||||
{
|
||||
int err, opt;
|
||||
double rate_limit_ms, cleanup_interval_s;
|
||||
double rate_limit_ms, cleanup_interval_s, rtt_rate;
|
||||
|
||||
config->ifindex = 0;
|
||||
config->force = false;
|
||||
config->bpf_config.track_tcp = false;
|
||||
config->bpf_config.track_icmp = false;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "hfTCi:r:c:F:I:", long_options,
|
||||
while ((opt = getopt_long(argc, argv, "hfTCi:r:R:t:c:F:I:", long_options,
|
||||
NULL)) != -1) {
|
||||
switch (opt) {
|
||||
case 'i':
|
||||
@@ -203,6 +205,26 @@ static int parse_arguments(int argc, char *argv[], struct pping_config *config)
|
||||
config->bpf_config.rate_limit =
|
||||
rate_limit_ms * NS_PER_MS;
|
||||
break;
|
||||
case 'R':
|
||||
err = parse_bounded_double(&rtt_rate, optarg, 0, 10000,
|
||||
"rtt-rate");
|
||||
if (err)
|
||||
return -EINVAL;
|
||||
config->bpf_config.rtt_rate =
|
||||
DOUBLE_TO_FIXPOINT(rtt_rate);
|
||||
break;
|
||||
case 't':
|
||||
if (strcmp(optarg, "min") == 0) {
|
||||
config->bpf_config.use_srtt = false;
|
||||
}
|
||||
else if (strcmp(optarg, "smoothed") == 0) {
|
||||
config->bpf_config.use_srtt = true;
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"rtt-type must be \"min\" or \"smoothed\"\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
err = parse_bounded_double(&cleanup_interval_s, optarg,
|
||||
0, 7 * S_PER_DAY,
|
||||
@@ -482,7 +504,7 @@ static bool flow_timeout(void *key_ptr, void *val_ptr, __u64 now)
|
||||
if (print_event_func) {
|
||||
fe.event_type = EVENT_TYPE_FLOW;
|
||||
fe.timestamp = now;
|
||||
memcpy(&fe.flow, key_ptr, sizeof(struct network_tuple));
|
||||
fe.flow = *(struct network_tuple *)key_ptr;
|
||||
fe.event_info.event = FLOW_EVENT_CLOSING;
|
||||
fe.event_info.reason = EVENT_REASON_FLOW_TIMEOUT;
|
||||
fe.source = EVENT_SOURCE_USERSPACE;
|
||||
@@ -976,7 +998,9 @@ int main(int argc, char *argv[])
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_egress_opts);
|
||||
|
||||
struct pping_config config = {
|
||||
.bpf_config = { .rate_limit = 100 * NS_PER_MS },
|
||||
.bpf_config = { .rate_limit = 100 * NS_PER_MS,
|
||||
.rtt_rate = 0,
|
||||
.use_srtt = false },
|
||||
.cleanup_interval = 1 * NS_PER_SECOND,
|
||||
.object_path = "pping_kern.o",
|
||||
.ingress_prog = "pping_xdp_ingress",
|
||||
|
@@ -6,6 +6,11 @@
|
||||
#include <linux/in6.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
typedef __u64 fixpoint64;
|
||||
#define FIXPOINT_SHIFT 16
|
||||
#define DOUBLE_TO_FIXPOINT(X) ((fixpoint64)((X) * (1UL << FIXPOINT_SHIFT)))
|
||||
#define FIXPOINT_TO_UINT(X) ((X) >> FIXPOINT_SHIFT)
|
||||
|
||||
/* For the event_type members of rtt_event and flow_event */
|
||||
#define EVENT_TYPE_FLOW 1
|
||||
#define EVENT_TYPE_RTT 2
|
||||
@@ -34,9 +39,11 @@ enum __attribute__((__packed__)) flow_event_source {
|
||||
|
||||
struct bpf_config {
|
||||
__u64 rate_limit;
|
||||
fixpoint64 rtt_rate;
|
||||
bool use_srtt;
|
||||
bool track_tcp;
|
||||
bool track_icmp;
|
||||
__u8 reserved[6];
|
||||
__u8 reserved[5];
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -67,6 +74,7 @@ struct network_tuple {
|
||||
|
||||
struct flow_state {
|
||||
__u64 min_rtt;
|
||||
__u64 srtt;
|
||||
__u64 last_timestamp;
|
||||
__u64 sent_pkts;
|
||||
__u64 sent_bytes;
|
||||
|
@@ -324,6 +324,33 @@ static __u32 remaining_pkt_payload(struct parsing_context *ctx)
|
||||
return parsed_bytes < ctx->pkt_len ? ctx->pkt_len - parsed_bytes : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate a smooted rtt similar to how TCP stack does it in
|
||||
* net/ipv4/tcp_input.c/tcp_rtt_estimator().
|
||||
*
|
||||
* NOTE: Will cause roundoff errors, but if RTTs > 1000ns errors should be small
|
||||
*/
|
||||
static __u64 calculate_srtt(__u64 prev_srtt, __u64 rtt)
|
||||
{
|
||||
if (!prev_srtt)
|
||||
return rtt;
|
||||
// srtt = 7/8*prev_srtt + 1/8*rtt
|
||||
return prev_srtt - (prev_srtt >> 3) + (rtt >> 3);
|
||||
}
|
||||
|
||||
static bool is_rate_limited(__u64 now, __u64 last_ts, __u64 rtt)
|
||||
{
|
||||
if (now < last_ts)
|
||||
return true;
|
||||
|
||||
// RTT-based rate limit
|
||||
if (config.rtt_rate && rtt)
|
||||
return now - last_ts < FIXPOINT_TO_UINT(config.rtt_rate * rtt);
|
||||
|
||||
// Static rate limit
|
||||
return now - last_ts < config.rate_limit;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fills in event_type, timestamp, flow, source and reserved.
|
||||
* Does not fill in the flow_info.
|
||||
@@ -403,8 +430,9 @@ static void pping_egress(void *ctx, struct parsing_context *pctx)
|
||||
f_state->last_id = p_id.identifier;
|
||||
|
||||
// Check rate-limit
|
||||
if (!new_flow && (now < f_state->last_timestamp ||
|
||||
now - f_state->last_timestamp < config.rate_limit))
|
||||
if (!new_flow &&
|
||||
is_rate_limited(now, f_state->last_timestamp,
|
||||
config.use_srtt ? f_state->srtt : f_state->min_rtt))
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -449,12 +477,12 @@ static void pping_ingress(void *ctx, struct parsing_context *pctx)
|
||||
goto validflow_out;
|
||||
|
||||
re.rtt = now - *p_ts;
|
||||
|
||||
// Delete timestamp entry as soon as RTT is calculated
|
||||
bpf_map_delete_elem(&packet_ts, &p_id);
|
||||
|
||||
if (f_state->min_rtt == 0 || re.rtt < f_state->min_rtt)
|
||||
f_state->min_rtt = re.rtt;
|
||||
f_state->srtt = calculate_srtt(f_state->srtt, re.rtt);
|
||||
|
||||
// Fill event and push to perf-buffer
|
||||
re.event_type = EVENT_TYPE_RTT;
|
||||
|
Reference in New Issue
Block a user