pping: Inital rate limit implementation

Add a per-flow rate limit, limiting how often new timestamp entries
can be created. As part of this, add per-flow state keeping track
of when last timestamp was created and last seen identifier for each
flow.

Additionally, remove timestamp entry as soon as RTT is
calculated, as last seen identifier is used to find first unique value
instead. Furthermore, remove packet_timestamp struct and only use
__u64 as timestamp, as used memeber is no longer needed.

This initial commit lacks cleanup of flow-state, user-configuration of
rate limit, mechanism to handle bursts etc.

Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
Simon Sundberg
2021-03-01 18:16:48 +01:00
parent 6e5136092d
commit cecd6b54f2
4 changed files with 102 additions and 31 deletions

View File

@@ -188,7 +188,7 @@ static int clean_map(int map_fd, __u64 max_age)
{
int removed = 0;
struct packet_id key, prev_key = { 0 };
struct packet_timestamp value;
__u64 value;
bool delete_prev = false;
__u64 now_nsec = get_time_ns();
@@ -207,8 +207,8 @@ static int clean_map(int map_fd, __u64 max_age)
}
if (bpf_map_lookup_elem(map_fd, &key, &value) == 0) {
if (now_nsec > value.timestamp &&
now_nsec - value.timestamp > max_age) {
if (now_nsec > value &&
now_nsec - value > max_age) {
delete_prev = true;
}
}

View File

@@ -34,17 +34,17 @@ struct network_tuple {
__u8 reserved;
};
struct flow_state {
__u64 last_timestamp;
__u32 last_id;
__u32 reserved;
};
struct packet_id {
struct network_tuple flow;
__u32 identifier; //tsval for TCP packets
};
struct packet_timestamp {
__u64 timestamp;
__u8 used;
__u8 reserved[7];
};
struct rtt_event {
__u64 rtt;
struct network_tuple flow;

View File

@@ -6,25 +6,46 @@
#include "pping.h"
#include "pping_helpers.h"
#define RATE_LIMIT \
100000000UL // 100ms. Temporary solution, should be set by userspace
#define BURST_DURATION \
4000000 // 4ms, duration for when it may burst packet timestamps
#define BURST_SIZE \
3 // Number of packets it may create timestamps for in a burst
char _license[] SEC("license") = "GPL";
#ifdef HAVE_TC_LIBBPF /* detected by configure script in config.mk */
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(struct packet_id));
__uint(value_size, sizeof(struct packet_timestamp));
__uint(value_size, sizeof(__u64));
__uint(max_entries, 16384);
__uint(pinning, LIBBPF_PIN_BY_NAME);
} ts_start SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(struct network_tuple));
__uint(value_size, sizeof(struct flow_state));
__uint(max_entries, 16384);
} flow_state SEC(".maps");
#else
struct bpf_elf_map SEC("maps") ts_start = {
.type = BPF_MAP_TYPE_HASH,
.size_key = sizeof(struct packet_id),
.size_value = sizeof(struct packet_timestamp),
.size_value = sizeof(__u64),
.max_elem = 16384,
.pinning = PIN_GLOBAL_NS,
};
struct bpf_elf_map SEC("maps") flow_state = {
.type = BPF_MAP_TYPE_HASH,
.size_key = sizeof(struct network_tuple),
.size_value = sizeof(struct flow_state),
.max_elem = 16384,
};
#endif
// TC-BFP for parsing packet identifier from egress traffic and add to map
@@ -32,18 +53,72 @@ SEC(TCBPF_PROG_SEC)
int tc_bpf_prog_egress(struct __sk_buff *skb)
{
struct packet_id p_id = { 0 };
struct packet_timestamp p_ts = { 0 };
__u64 p_ts;
struct parsing_context pctx = {
.data = (void *)(long)skb->data,
.data_end = (void *)(long)skb->data_end,
.pkt_len = skb->len,
.nh = { .pos = pctx.data },
};
struct flow_state *f_state;
struct flow_state new_state = { 0 }; // Rarely-ish used, but can't really dynamically allocate memory or?
if (parse_packet_identifier(&pctx, true, &p_id) < 0)
goto end;
p_ts.timestamp = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns
// Check flow state
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
if (!f_state) { // No previous state - attempt to create it
bpf_map_update_elem(&flow_state, &p_id.flow, &new_state,
BPF_NOEXIST);
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
if (!f_state)
goto end;
}
// Check if identfier is new
/* The gap between checking and updating last_id may cause concurrency
* issues where multiple packets may simultaneously think they are the
* first with a new identifier. As long as all of the identifiers are
* the same though, only one should be able to create a timestamp entry.
* A bigger issue is that older identifiers (for example due to
* out-of-order packets) may pass this check and update the current
* identifier to an old one. This means that both the packet with the
* old identifier itself, as well the next packet with the current
* identifier, may be considered packets with new identifiers (even if
* both have been seen before). For TCP timestamps this could be
* prevented by changing the check to '>=' instead, but it may not be
* suitable for other protocols, such as QUIC and its spinbit.
*
* For now, just hope that the rate limit saves us from creating an
* incorrect timestamp. That may however also fail, either due to the
* to it happening in a time it's not limited by rate sampling, or
* because of rate check failing due to concurrency issues.
*/
if (f_state->last_id == p_id.identifier)
goto end;
f_state->last_id = p_id.identifier;
// Check rate-limit
/*
* The window between checking and updating last_timestamp may cause
* concurrency issues, where multiple packets simultaneously pass the
* rate limit. However, as long as they have the same identifier, only
* a single timestamp entry should successfully be created.
*/
p_ts = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns
if (p_ts < f_state->last_timestamp ||
p_ts - f_state->last_timestamp < RATE_LIMIT)
goto end;
/*
* Updates attempt at creating timestamp, even if creation of timestamp
* fails (due to map being full). This should make the competition for
* the next available map slot somewhat fairer between heavy and sparse
* flows.
*/
f_state->last_timestamp = p_ts;
bpf_map_update_elem(&ts_start, &p_id, &p_ts, BPF_NOEXIST);
end:

View File

@@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(struct packet_id));
__uint(value_size, sizeof(struct packet_timestamp));
__uint(value_size, sizeof(__u64));
__uint(max_entries, 16384);
__uint(pinning, LIBBPF_PIN_BY_NAME);
} ts_start SEC(".maps");
@@ -26,7 +26,7 @@ SEC(XDP_PROG_SEC)
int xdp_prog_ingress(struct xdp_md *ctx)
{
struct packet_id p_id = { 0 };
struct packet_timestamp *p_ts;
__u64 *p_ts;
struct rtt_event event = { 0 };
struct parsing_context pctx = {
.data = (void *)(long)ctx->data,
@@ -39,24 +39,20 @@ int xdp_prog_ingress(struct xdp_md *ctx)
goto end;
p_ts = bpf_map_lookup_elem(&ts_start, &p_id);
if (!p_ts)
goto end;
// Only calculate RTT for first packet with matching identifer
if (p_ts && p_ts->used == 0) {
event.rtt = bpf_ktime_get_ns() - *p_ts;
/*
* As used is not set atomically with the lookup, could
* potentially have multiple "first" packets (on different
* CPUs), but all those should then also have very similar RTT,
* so don't consider it a significant issue
* Attempt to delete timestamp entry as soon as RTT is calculated.
* But could have potential concurrency issue where multiple packets
* manage to match against the identifier before it can be deleted.
*/
p_ts->used = 1;
// TODO - Optional delete of entry (if identifier is garantued unique)
bpf_map_delete_elem(&ts_start, &p_id);
__builtin_memcpy(&event.flow, &p_id.flow,
sizeof(struct network_tuple));
event.rtt = bpf_ktime_get_ns() - p_ts->timestamp;
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU,
&event, sizeof(event));
}
__builtin_memcpy(&event.flow, &p_id.flow, sizeof(struct network_tuple));
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event,
sizeof(event));
end:
return XDP_PASS;