mirror of
https://github.com/xdp-project/bpf-examples.git
synced 2024-05-06 15:54:53 +00:00
pping: Format code and add SPDX lincense tags
Format the code using the .clang-format from the kernel source tree, with a few manual tweaks here and there. Also, remove the TODO list from comment of pping.c and instead put it in TODO.md. Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
17
pping/TODO.md
Normal file
17
pping/TODO.md
Normal file
@@ -0,0 +1,17 @@
|
||||
# TODO
|
||||
|
||||
## For initial merge
|
||||
- [x] Clean up commits and add signed-off-by tags
|
||||
- [x] Add SPDX-license-identifier tags
|
||||
- [x] Format C-code in kernel style
|
||||
- [] Use existing funcionality to reuse maps by using BTF-defined maps
|
||||
|
||||
## Future
|
||||
- [] Use libxdp to load XDP program
|
||||
- [] Cleanup: Unload TC-BPF at program shutdown, and unpin and delete map - In userspace part
|
||||
- [] Add IPv6 support - In TC-BPF, XDP and userspace part
|
||||
- [] Check for existance of reverse flow before adding to hash-map (to avoid adding timestamps for flows that we can't see the reverse traffic for) - In TC-BPF part
|
||||
- This could miss the first few packets, would not be ideal for short flows
|
||||
- [] Keep track of minimum RTT for each flow (done by Pollere's pping, and helps identify buffer bloat) - In XDP part
|
||||
- [] Add configurable rate-limit for how often each flow can add entries to the map (prevent high-rate flows from quickly filling up the map) - In TCP-BPF part
|
||||
- [] Improve map cleaning: Use a dynamic time to live for hash map entries based on flow's RTT, instead of static 10s limit - In TC-BPF, XDP and userspace
|
545
pping/pping.c
545
pping/pping.c
@@ -1,3 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <linux/if_link.h>
|
||||
@@ -27,7 +28,7 @@
|
||||
#define TCBPF_PROG_SEC "pping_egress"
|
||||
#define XDP_FLAGS XDP_FLAGS_UPDATE_IF_NOEXIST
|
||||
#define MAP_NAME "ts_start"
|
||||
#define MAP_CLEANUP_INTERVAL 1*BILLION // Clean timestamp map once per second
|
||||
#define MAP_CLEANUP_INTERVAL 1 * BILLION // Clean timestamp map once per second
|
||||
#define PERF_BUFFER_NAME "rtt_events"
|
||||
#define PERF_BUFFER_PAGES 64 // Related to the perf-buffer size?
|
||||
#define PERF_POLL_TIMEOUT_MS 100
|
||||
@@ -35,334 +36,362 @@
|
||||
#define MAX_COMMAND_LEN 1024
|
||||
#define ERROR_MSG_MAX 1024
|
||||
#define MAX_PATH_LEN 1024
|
||||
#define TIMESTAMP_LIFETIME 10*BILLION // Clear out entries from ts_start if they're over 10 seconds
|
||||
#define TIMESTAMP_LIFETIME 10 * BILLION // Clear out entries from ts_start if they're over 10 seconds
|
||||
|
||||
/* BPF implementation of pping using libbpf
|
||||
/*
|
||||
* BPF implementation of pping using libbpf
|
||||
* Uses TC-BPF for egress and XDP for ingress
|
||||
* On egrees, packets are parsed for TCP TSval, if found added to hashmap using flow+TSval as key, and current time as value
|
||||
* On ingress, packets are parsed for TCP TSecr, if found looksup hashmap using reverse-flow+TSecr as key, and calculates RTT as different between now map value
|
||||
* Calculated RTTs are pushed to userspace (together with the related flow) and printed out
|
||||
*
|
||||
* TODOs:
|
||||
* - Cleanup: Unload TC-BPF at program shutdown, and unpin and delete map - In userspace part
|
||||
* - Add IPv6 support - In TC-BPF, XDP and userspace part
|
||||
* - Check for existance of reverse flow before adding to hash-map (to avoid adding timestamps for flows that we can't see the reverse traffic for) - In TC-BPF part
|
||||
* - This could miss the first few packets, and would not be ideal for short flows
|
||||
* - Keep track of minimum RTT for each flow (done by Pollere's pping, and helps identify buffer bloat) - In XDP part
|
||||
* - Add configurable rate-limit for how often each flow can add entries to the map (prevent high-rate flows from quickly filling up the map) - In TCP-BPF part
|
||||
* - Improve map cleaning: Use a dynamic time to live for hash map entries based on flow's RTT, instead of static 10s limit - In TC-BPF, XDP and userspace
|
||||
* - On egrees, packets are parsed for TCP TSval,
|
||||
* if found added to hashmap using flow+TSval as key,
|
||||
* and current time as value
|
||||
* - On ingress, packets are parsed for TCP TSecr,
|
||||
* if found looksup hashmap using reverse-flow+TSecr as key,
|
||||
* and calculates RTT as different between now map value
|
||||
* - Calculated RTTs are pushed to userspace
|
||||
* (together with the related flow) and printed out
|
||||
*/
|
||||
|
||||
|
||||
struct map_cleanup_args {
|
||||
int map_fd;
|
||||
__u64 max_age_ns;
|
||||
int map_fd;
|
||||
__u64 max_age_ns;
|
||||
};
|
||||
|
||||
static volatile int keep_running = 1;
|
||||
|
||||
void abort_program(int sig)
|
||||
{
|
||||
keep_running = 0;
|
||||
keep_running = 0;
|
||||
}
|
||||
|
||||
static int set_rlimit(long int lim)
|
||||
{
|
||||
struct rlimit rlim = {
|
||||
.rlim_cur = lim,
|
||||
.rlim_max = lim,
|
||||
};
|
||||
struct rlimit rlim = {
|
||||
.rlim_cur = lim,
|
||||
.rlim_max = lim,
|
||||
};
|
||||
|
||||
return !setrlimit(RLIMIT_MEMLOCK, &rlim) ? 0 : -errno;
|
||||
return !setrlimit(RLIMIT_MEMLOCK, &rlim) ? 0 : -errno;
|
||||
}
|
||||
|
||||
static int bpf_obj_open(struct bpf_object **obj, const char *obj_path, enum bpf_prog_type prog_type)
|
||||
static int bpf_obj_open(struct bpf_object **obj, const char *obj_path,
|
||||
enum bpf_prog_type prog_type)
|
||||
{
|
||||
struct bpf_object_open_attr attr = {
|
||||
.prog_type = prog_type,
|
||||
.file = obj_path,
|
||||
};
|
||||
*obj = bpf_object__open_xattr(&attr);
|
||||
return libbpf_get_error(*obj);
|
||||
struct bpf_object_open_attr attr = {
|
||||
.prog_type = prog_type,
|
||||
.file = obj_path,
|
||||
};
|
||||
*obj = bpf_object__open_xattr(&attr);
|
||||
return libbpf_get_error(*obj);
|
||||
}
|
||||
|
||||
static int bpf_obj_load(struct bpf_object *obj, enum bpf_prog_type prog_type)
|
||||
{
|
||||
struct bpf_program *prog;
|
||||
bpf_object__for_each_program(prog, obj) {
|
||||
bpf_program__set_type(prog, prog_type);
|
||||
}
|
||||
struct bpf_program *prog;
|
||||
bpf_object__for_each_program(prog, obj)
|
||||
{
|
||||
bpf_program__set_type(prog, prog_type);
|
||||
}
|
||||
|
||||
return bpf_object__load(obj);
|
||||
return bpf_object__load(obj);
|
||||
}
|
||||
|
||||
static int reuse_pinned_map(int *map_fd, const char *map_name, const char *pinned_dir, struct bpf_object *obj, struct bpf_map_info *expec_map_info)
|
||||
static int reuse_pinned_map(int *map_fd, const char *map_name,
|
||||
const char *pinned_dir, struct bpf_object *obj,
|
||||
struct bpf_map_info *expec_map_info)
|
||||
{
|
||||
struct bpf_map *map;
|
||||
struct bpf_map_info map_info = {0};
|
||||
__u32 info_len = sizeof(map_info);
|
||||
char pinned_map_path[MAX_PATH_LEN];
|
||||
int err;
|
||||
struct bpf_map *map;
|
||||
struct bpf_map_info map_info = { 0 };
|
||||
__u32 info_len = sizeof(map_info);
|
||||
char pinned_map_path[MAX_PATH_LEN];
|
||||
int err;
|
||||
|
||||
// Find map in object file
|
||||
map = bpf_object__find_map_by_name(obj, map_name);
|
||||
err = libbpf_get_error(map);
|
||||
if (err) {
|
||||
fprintf(stderr, "Could not find map %s in object\n", map_name);
|
||||
return err;
|
||||
}
|
||||
// Find map in object file
|
||||
map = bpf_object__find_map_by_name(obj, map_name);
|
||||
err = libbpf_get_error(map);
|
||||
if (err) {
|
||||
fprintf(stderr, "Could not find map %s in object\n", map_name);
|
||||
return err;
|
||||
}
|
||||
|
||||
// Find pinned map
|
||||
snprintf(pinned_map_path, sizeof(pinned_map_path), "%s/%s", pinned_dir, map_name);
|
||||
*map_fd = bpf_obj_get(pinned_map_path);
|
||||
if (*map_fd < 0) {
|
||||
fprintf(stderr, "Could not find map %s in path %s\n", map_name, pinned_dir);
|
||||
return *map_fd;
|
||||
}
|
||||
// Find pinned map
|
||||
snprintf(pinned_map_path, sizeof(pinned_map_path), "%s/%s", pinned_dir,
|
||||
map_name);
|
||||
*map_fd = bpf_obj_get(pinned_map_path);
|
||||
if (*map_fd < 0) {
|
||||
fprintf(stderr, "Could not find map %s in path %s\n", map_name,
|
||||
pinned_dir);
|
||||
return *map_fd;
|
||||
}
|
||||
|
||||
// Verify map has expected format
|
||||
err = bpf_obj_get_info_by_fd(*map_fd, &map_info, &info_len);
|
||||
if (err) {
|
||||
fprintf(stderr, "Could not get map info from %s\n", pinned_map_path);
|
||||
return err;
|
||||
}
|
||||
// Verify map has expected format
|
||||
err = bpf_obj_get_info_by_fd(*map_fd, &map_info, &info_len);
|
||||
if (err) {
|
||||
fprintf(stderr, "Could not get map info from %s\n",
|
||||
pinned_map_path);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (map_info.type != expec_map_info->type ||
|
||||
map_info.key_size != expec_map_info->key_size ||
|
||||
map_info.value_size != expec_map_info->value_size ||
|
||||
map_info.max_entries != expec_map_info->max_entries) {
|
||||
fprintf(stderr, "Pinned map at %s does not match expected format\n", pinned_map_path);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
// Try reusing map
|
||||
err = bpf_map__reuse_fd(map, *map_fd);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed reusing map fd\n");
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
if (map_info.type != expec_map_info->type ||
|
||||
map_info.key_size != expec_map_info->key_size ||
|
||||
map_info.value_size != expec_map_info->value_size ||
|
||||
map_info.max_entries != expec_map_info->max_entries) {
|
||||
fprintf(stderr,
|
||||
"Pinned map at %s does not match expected format\n",
|
||||
pinned_map_path);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
// Try reusing map
|
||||
err = bpf_map__reuse_fd(map, *map_fd);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed reusing map fd\n");
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xdp_detach(int ifindex, __u32 xdp_flags) {
|
||||
return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
|
||||
}
|
||||
|
||||
static int xdp_attach(struct bpf_object *obj, const char *sec, int ifindex, __u32 xdp_flags, bool force)
|
||||
static int xdp_detach(int ifindex, __u32 xdp_flags)
|
||||
{
|
||||
struct bpf_program *prog;
|
||||
int prog_fd;
|
||||
int err;
|
||||
return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
|
||||
}
|
||||
|
||||
if (sec)
|
||||
prog = bpf_object__find_program_by_title(obj, sec);
|
||||
else
|
||||
prog = bpf_program__next(NULL, obj);
|
||||
prog_fd = bpf_program__fd(prog);
|
||||
if (prog_fd < 0) {
|
||||
fprintf(stderr, "Could not find program to attach\n");
|
||||
return prog_fd;
|
||||
}
|
||||
static int xdp_attach(struct bpf_object *obj, const char *sec, int ifindex,
|
||||
__u32 xdp_flags, bool force)
|
||||
{
|
||||
struct bpf_program *prog;
|
||||
int prog_fd;
|
||||
int err;
|
||||
|
||||
if (force) // detach current (if any) xdp-program first
|
||||
xdp_detach(ifindex, xdp_flags);
|
||||
err = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "Failed loading xdp-program on interface %d\n", ifindex);
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
if (sec)
|
||||
prog = bpf_object__find_program_by_title(obj, sec);
|
||||
else
|
||||
prog = bpf_program__next(NULL, obj);
|
||||
prog_fd = bpf_program__fd(prog);
|
||||
if (prog_fd < 0) {
|
||||
fprintf(stderr, "Could not find program to attach\n");
|
||||
return prog_fd;
|
||||
}
|
||||
|
||||
if (force) // detach current (if any) xdp-program first
|
||||
xdp_detach(ifindex, xdp_flags);
|
||||
err = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "Failed loading xdp-program on interface %d\n",
|
||||
ifindex);
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __u64 get_time_ns(clockid_t clockid)
|
||||
{
|
||||
struct timespec t;
|
||||
if (clock_gettime(clockid, &t) != 0) // CLOCK_BOOTTIME if using bpf_get_ktime_boot_ns
|
||||
return 0;
|
||||
return (__u64)t.tv_sec * BILLION + (__u64)t.tv_nsec;
|
||||
struct timespec t;
|
||||
if (clock_gettime(clockid, &t) != 0) // CLOCK_BOOTTIME if using bpf_get_ktime_boot_ns
|
||||
return 0;
|
||||
return (__u64)t.tv_sec * BILLION + (__u64)t.tv_nsec;
|
||||
}
|
||||
|
||||
static int remove_old_entries_from_map(int map_fd, __u64 max_age)
|
||||
{
|
||||
int removed = 0, entries = 0;
|
||||
struct ts_key key, prev_key = {0};
|
||||
struct ts_timestamp value;
|
||||
bool delete_prev = false;
|
||||
__u64 now_nsec = get_time_ns(CLOCK_MONOTONIC);
|
||||
if (now_nsec == 0)
|
||||
return -errno;
|
||||
int removed = 0, entries = 0;
|
||||
struct ts_key key, prev_key = { 0 };
|
||||
struct ts_timestamp value;
|
||||
bool delete_prev = false;
|
||||
__u64 now_nsec = get_time_ns(CLOCK_MONOTONIC);
|
||||
if (now_nsec == 0)
|
||||
return -errno;
|
||||
|
||||
// Cannot delete current key because then loop will reset, see https://www.bouncybouncy.net/blog/bpf_map_get_next_key-pitfalls/
|
||||
while(bpf_map_get_next_key(map_fd, &prev_key, &key) == 0) {
|
||||
if (delete_prev) {
|
||||
bpf_map_delete_elem(map_fd, &prev_key);
|
||||
removed++;
|
||||
delete_prev = false;
|
||||
}
|
||||
// Cannot delete current key because then loop will reset, see https://www.bouncybouncy.net/blog/bpf_map_get_next_key-pitfalls/
|
||||
while (bpf_map_get_next_key(map_fd, &prev_key, &key) == 0) {
|
||||
if (delete_prev) {
|
||||
bpf_map_delete_elem(map_fd, &prev_key);
|
||||
removed++;
|
||||
delete_prev = false;
|
||||
}
|
||||
|
||||
if (bpf_map_lookup_elem(map_fd, &key, &value) == 0) {
|
||||
if (now_nsec > value.timestamp && now_nsec - value.timestamp > max_age) {
|
||||
delete_prev = true;
|
||||
}
|
||||
}
|
||||
entries++;
|
||||
prev_key = key;
|
||||
}
|
||||
if (delete_prev) {
|
||||
bpf_map_delete_elem(map_fd, &prev_key);
|
||||
removed++;
|
||||
}
|
||||
__u64 duration = get_time_ns(CLOCK_MONOTONIC) - now_nsec;
|
||||
printf("Gone through %d entries and removed %d of them in %llu.%09llu s\n", entries, removed, duration / BILLION, duration % BILLION);
|
||||
return removed;
|
||||
if (bpf_map_lookup_elem(map_fd, &key, &value) == 0) {
|
||||
if (now_nsec > value.timestamp &&
|
||||
now_nsec - value.timestamp > max_age) {
|
||||
delete_prev = true;
|
||||
}
|
||||
}
|
||||
entries++;
|
||||
prev_key = key;
|
||||
}
|
||||
if (delete_prev) {
|
||||
bpf_map_delete_elem(map_fd, &prev_key);
|
||||
removed++;
|
||||
}
|
||||
__u64 duration = get_time_ns(CLOCK_MONOTONIC) - now_nsec;
|
||||
printf("Gone through %d entries and removed %d of them in %llu.%09llu s\n",
|
||||
entries, removed, duration / BILLION, duration % BILLION);
|
||||
return removed;
|
||||
}
|
||||
|
||||
static void *periodic_map_cleanup(void *args)
|
||||
{
|
||||
struct map_cleanup_args *argp = args;
|
||||
struct timespec interval;
|
||||
interval.tv_sec = MAP_CLEANUP_INTERVAL / BILLION;
|
||||
interval.tv_nsec = MAP_CLEANUP_INTERVAL % BILLION;
|
||||
while (keep_running) {
|
||||
remove_old_entries_from_map(argp->map_fd, argp->max_age_ns);
|
||||
nanosleep(&interval, NULL);
|
||||
}
|
||||
pthread_exit(NULL);
|
||||
struct map_cleanup_args *argp = args;
|
||||
struct timespec interval;
|
||||
interval.tv_sec = MAP_CLEANUP_INTERVAL / BILLION;
|
||||
interval.tv_nsec = MAP_CLEANUP_INTERVAL % BILLION;
|
||||
while (keep_running) {
|
||||
remove_old_entries_from_map(argp->map_fd, argp->max_age_ns);
|
||||
nanosleep(&interval, NULL);
|
||||
}
|
||||
pthread_exit(NULL);
|
||||
}
|
||||
|
||||
static void handle_rtt_event(void *ctx, int cpu, void *data, __u32 data_size)
|
||||
{
|
||||
const struct rtt_event *e = data;
|
||||
struct in_addr saddr, daddr;
|
||||
saddr.s_addr = e->flow.saddr;
|
||||
daddr.s_addr = e->flow.daddr;
|
||||
printf("%llu.%06llu ms %s:%d+%s:%d\n", e->rtt / MILLION, e->rtt % MILLION,
|
||||
inet_ntoa(saddr), ntohs(e->flow.sport),
|
||||
inet_ntoa(daddr), ntohs(e->flow.dport));
|
||||
const struct rtt_event *e = data;
|
||||
struct in_addr saddr, daddr;
|
||||
saddr.s_addr = e->flow.saddr;
|
||||
daddr.s_addr = e->flow.daddr;
|
||||
printf("%llu.%06llu ms %s:%d+%s:%d\n", e->rtt / MILLION,
|
||||
e->rtt % MILLION, inet_ntoa(saddr), ntohs(e->flow.sport),
|
||||
inet_ntoa(daddr), ntohs(e->flow.dport));
|
||||
}
|
||||
|
||||
static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt)
|
||||
{
|
||||
fprintf(stderr, "Lost %llu RTT events on CPU %d\n", lost_cnt, cpu);
|
||||
fprintf(stderr, "Lost %llu RTT events on CPU %d\n", lost_cnt, cpu);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
printf("Usage: ./pping_user <dev>\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (argc < 2) {
|
||||
printf("Usage: ./pping_user <dev>\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
int err = 0, ifindex = 0;
|
||||
bool xdp_attached = false;
|
||||
struct perf_buffer *pb = NULL;
|
||||
int err = 0, ifindex = 0;
|
||||
bool xdp_attached = false;
|
||||
struct perf_buffer *pb = NULL;
|
||||
|
||||
// Increase rlimit
|
||||
err = set_rlimit(RMEMLIM);
|
||||
if (err) {
|
||||
fprintf(stderr, "Could not set rlimit to %ld bytes: %s\n", RMEMLIM, strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
// Increase rlimit
|
||||
err = set_rlimit(RMEMLIM);
|
||||
if (err) {
|
||||
fprintf(stderr, "Could not set rlimit to %ld bytes: %s\n",
|
||||
RMEMLIM, strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Get index of interface
|
||||
ifindex = if_nametoindex(argv[1]);
|
||||
if (ifindex == 0) {
|
||||
err = -errno;
|
||||
fprintf(stderr, "Could not get index of interface %s: %s\n", argv[1], strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
// Get index of interface
|
||||
ifindex = if_nametoindex(argv[1]);
|
||||
if (ifindex == 0) {
|
||||
err = -errno;
|
||||
fprintf(stderr, "Could not get index of interface %s: %s\n",
|
||||
argv[1], strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
//Load tc-bpf section on interface egress
|
||||
char tc_bpf_load[MAX_COMMAND_LEN];
|
||||
snprintf(tc_bpf_load, MAX_COMMAND_LEN, "%s --dev %s --obj %s --sec %s",
|
||||
TCBPF_LOADER_SCRIPT, argv[1], PPING_TCBPF_OBJ, TCBPF_PROG_SEC);
|
||||
err = system(tc_bpf_load);
|
||||
if (err) {
|
||||
fprintf(stderr, "Could not load section %s of %s on interface %s: %s\n",
|
||||
TCBPF_PROG_SEC, PPING_TCBPF_OBJ, argv[1], strerror(err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Reuse map pinned by tc for the xpd-program
|
||||
struct bpf_object *obj;
|
||||
int map_fd = 0;
|
||||
struct bpf_map_info expected_map_info = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.key_size = sizeof(struct ts_key),
|
||||
.value_size = sizeof(struct ts_timestamp),
|
||||
.max_entries = 16384,
|
||||
};
|
||||
//Load tc-bpf section on interface egress
|
||||
char tc_bpf_load[MAX_COMMAND_LEN];
|
||||
snprintf(tc_bpf_load, MAX_COMMAND_LEN, "%s --dev %s --obj %s --sec %s",
|
||||
TCBPF_LOADER_SCRIPT, argv[1], PPING_TCBPF_OBJ, TCBPF_PROG_SEC);
|
||||
err = system(tc_bpf_load);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"Could not load section %s of %s on interface %s: %s\n",
|
||||
TCBPF_PROG_SEC, PPING_TCBPF_OBJ, argv[1],
|
||||
strerror(err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
err = bpf_obj_open(&obj, PPING_XDP_OBJ, BPF_PROG_TYPE_XDP);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed opening object file %s: %s\n", PPING_XDP_OBJ, strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
// Reuse map pinned by tc for the xpd-program
|
||||
struct bpf_object *obj;
|
||||
int map_fd = 0;
|
||||
struct bpf_map_info expected_map_info = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.key_size = sizeof(struct ts_key),
|
||||
.value_size = sizeof(struct ts_timestamp),
|
||||
.max_entries = 16384,
|
||||
};
|
||||
|
||||
err = reuse_pinned_map(&map_fd, MAP_NAME, PINNED_DIR, obj, &expected_map_info);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed reusing fd for map %s: %s\n", MAP_NAME, strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
err = bpf_obj_open(&obj, PPING_XDP_OBJ, BPF_PROG_TYPE_XDP);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed opening object file %s: %s\n",
|
||||
PPING_XDP_OBJ, strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Load and attach XDP program
|
||||
err = bpf_obj_load(obj, BPF_PROG_TYPE_XDP);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed loading XDP program: %s\n", strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
err = xdp_attach(obj, XDP_PROG_SEC, ifindex, XDP_FLAGS, false);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed attaching XDP program to %s: %s\n", argv[1], strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
xdp_attached = true;
|
||||
err = reuse_pinned_map(&map_fd, MAP_NAME, PINNED_DIR, obj,
|
||||
&expected_map_info);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed reusing fd for map %s: %s\n", MAP_NAME,
|
||||
strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Setup periodic cleanup of ts_start
|
||||
pthread_t tid;
|
||||
struct map_cleanup_args args = {.map_fd = map_fd, .max_age_ns = TIMESTAMP_LIFETIME};
|
||||
err = pthread_create(&tid, NULL, periodic_map_cleanup, &args);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed starting thread to perform periodic map cleanup: %s\n", strerror(err));
|
||||
goto cleanup;
|
||||
}
|
||||
// Load and attach XDP program
|
||||
err = bpf_obj_load(obj, BPF_PROG_TYPE_XDP);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed loading XDP program: %s\n",
|
||||
strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
err = xdp_attach(obj, XDP_PROG_SEC, ifindex, XDP_FLAGS, false);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed attaching XDP program to %s: %s\n",
|
||||
argv[1], strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
xdp_attached = true;
|
||||
|
||||
// Set up perf buffer
|
||||
struct perf_buffer_opts pb_opts;
|
||||
pb_opts.sample_cb = handle_rtt_event;
|
||||
pb_opts.lost_cb = handle_missed_rtt_event;
|
||||
|
||||
pb = perf_buffer__new(bpf_object__find_map_fd_by_name(obj, PERF_BUFFER_NAME), PERF_BUFFER_PAGES, &pb_opts);
|
||||
err = libbpf_get_error(pb);
|
||||
if (err) {
|
||||
pb = NULL;
|
||||
fprintf(stderr, "Failed to open perf buffer %s: %s\n", PERF_BUFFER_NAME, strerror(err));
|
||||
goto cleanup;
|
||||
}
|
||||
// Setup periodic cleanup of ts_start
|
||||
pthread_t tid;
|
||||
struct map_cleanup_args args = { .map_fd = map_fd,
|
||||
.max_age_ns = TIMESTAMP_LIFETIME };
|
||||
err = pthread_create(&tid, NULL, periodic_map_cleanup, &args);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"Failed starting thread to perform periodic map cleanup: %s\n",
|
||||
strerror(err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Clean exit on Ctrl-C
|
||||
signal(SIGINT, abort_program);
|
||||
// Set up perf buffer
|
||||
struct perf_buffer_opts pb_opts;
|
||||
pb_opts.sample_cb = handle_rtt_event;
|
||||
pb_opts.lost_cb = handle_missed_rtt_event;
|
||||
|
||||
// Main loop
|
||||
while(keep_running) {
|
||||
if ((err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS)) < 0) {
|
||||
if (keep_running) // Only print polling error if it wasn't caused by program termination
|
||||
fprintf(stderr, "Error polling perf buffer: %s\n", strerror(-err));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
perf_buffer__free(pb);
|
||||
if (xdp_attached) {
|
||||
err = xdp_detach(ifindex, XDP_FLAGS);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed deatching program from ifindex %d: %s\n", ifindex, strerror(-err));
|
||||
}
|
||||
}
|
||||
// TODO: Unload TC-BPF program
|
||||
// TODO: Unpin ts_start map
|
||||
|
||||
return err != 0;
|
||||
pb = perf_buffer__new(bpf_object__find_map_fd_by_name(obj,
|
||||
PERF_BUFFER_NAME),
|
||||
PERF_BUFFER_PAGES, &pb_opts);
|
||||
err = libbpf_get_error(pb);
|
||||
if (err) {
|
||||
pb = NULL;
|
||||
fprintf(stderr, "Failed to open perf buffer %s: %s\n",
|
||||
PERF_BUFFER_NAME, strerror(err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Clean exit on Ctrl-C
|
||||
signal(SIGINT, abort_program);
|
||||
|
||||
// Main loop
|
||||
while (keep_running) {
|
||||
if ((err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS)) < 0) {
|
||||
if (keep_running) // Only print polling error if it wasn't caused by program termination
|
||||
fprintf(stderr,
|
||||
"Error polling perf buffer: %s\n",
|
||||
strerror(-err));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
perf_buffer__free(pb);
|
||||
if (xdp_attached) {
|
||||
err = xdp_detach(ifindex, XDP_FLAGS);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"Failed deatching program from ifindex %d: %s\n",
|
||||
ifindex, strerror(-err));
|
||||
}
|
||||
}
|
||||
// TODO: Unload TC-BPF program
|
||||
// TODO: Unpin ts_start map
|
||||
|
||||
return err != 0;
|
||||
}
|
||||
|
||||
|
@@ -1,3 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#ifndef TIMESTAMP_MAP_H
|
||||
#define TIMESTAMP_MAP_H
|
||||
#include <linux/types.h>
|
||||
|
@@ -1,3 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#ifndef PPING_HELPERS_H
|
||||
#define PPING_HELPERS_H
|
||||
|
||||
|
@@ -1,3 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <iproute2/bpf_elf.h>
|
||||
@@ -13,57 +14,57 @@
|
||||
#include "pping.h"
|
||||
#include "pping_helpers.h"
|
||||
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct bpf_elf_map SEC("maps") ts_start = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.size_key = sizeof(struct ts_key),
|
||||
.size_value = sizeof(struct ts_timestamp),
|
||||
.max_elem = 16384,
|
||||
.pinning = PIN_GLOBAL_NS,
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.size_key = sizeof(struct ts_key),
|
||||
.size_value = sizeof(struct ts_timestamp),
|
||||
.max_elem = 16384,
|
||||
.pinning = PIN_GLOBAL_NS,
|
||||
};
|
||||
|
||||
// TC-BFP for parsing TSVAL from egress traffic and add to map
|
||||
SEC("pping_egress")
|
||||
int tc_bpf_prog_egress(struct __sk_buff *skb)
|
||||
{
|
||||
void *data = (void *)(long)skb->data;
|
||||
void *data_end = (void *)(long)skb->data_end;
|
||||
void *data = (void *)(long)skb->data;
|
||||
void *data_end = (void *)(long)skb->data_end;
|
||||
|
||||
// bpf_printk("Sent packet of size %d bytes\n", data_end - data);
|
||||
|
||||
int proto = -1;
|
||||
struct hdr_cursor nh = {.pos = data};
|
||||
struct ethhdr *eth;
|
||||
struct iphdr *iph;
|
||||
struct tcphdr *tcph;
|
||||
//bpf_printk("Sent packet of size %d bytes\n", data_end - data);
|
||||
|
||||
proto = parse_ethhdr(&nh, data_end, ð);
|
||||
if (bpf_ntohs(proto) != ETH_P_IP)
|
||||
goto end; // Not IPv4 packet (or failed to parse ethernet header)
|
||||
proto = parse_iphdr(&nh, data_end, &iph);
|
||||
if (proto != IPPROTO_TCP)
|
||||
goto end; // Not a TCP packet (or failed to parse ethernet header)
|
||||
proto = parse_tcphdr(&nh, data_end, &tcph);
|
||||
if (proto < 0)
|
||||
goto end; // Failed parsing TCP-header
|
||||
int proto = -1;
|
||||
struct hdr_cursor nh = { .pos = data };
|
||||
struct ethhdr *eth;
|
||||
struct iphdr *iph;
|
||||
struct tcphdr *tcph;
|
||||
|
||||
//bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos);
|
||||
proto = parse_ethhdr(&nh, data_end, ð);
|
||||
if (bpf_ntohs(proto) != ETH_P_IP)
|
||||
goto end;
|
||||
proto = parse_iphdr(&nh, data_end, &iph);
|
||||
if (proto != IPPROTO_TCP)
|
||||
goto end;
|
||||
proto = parse_tcphdr(&nh, data_end, &tcph);
|
||||
if (proto < 0)
|
||||
goto end;
|
||||
|
||||
__u32 tsval, tsecr;
|
||||
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0) // No TCP timestamp
|
||||
goto end;
|
||||
// We have a TCP timestamp, try adding it to the map
|
||||
//bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr));
|
||||
struct ts_key key;
|
||||
fill_ipv4_flow(&(key.flow), iph->saddr, iph->daddr, tcph->source, tcph->dest);
|
||||
key.tsval = tsval;
|
||||
//bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos);
|
||||
|
||||
struct ts_timestamp ts = {0};
|
||||
ts.timestamp = bpf_ktime_get_ns(); // Consider using bpf_ktime_get_boot_ns if kernel supports it
|
||||
bpf_map_update_elem(&ts_start, &key, &ts, BPF_NOEXIST);
|
||||
__u32 tsval, tsecr;
|
||||
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
|
||||
goto end;
|
||||
// We have a TCP timestamp, try adding it to the map
|
||||
//bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr));
|
||||
struct ts_key key;
|
||||
fill_ipv4_flow(&(key.flow), iph->saddr, iph->daddr,
|
||||
tcph->source, tcph->dest);
|
||||
key.tsval = tsval;
|
||||
|
||||
end:
|
||||
return BPF_OK;
|
||||
struct ts_timestamp ts = { 0 };
|
||||
ts.timestamp = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns
|
||||
bpf_map_update_elem(&ts_start, &key, &ts, BPF_NOEXIST);
|
||||
|
||||
end:
|
||||
return BPF_OK;
|
||||
}
|
||||
|
@@ -1,3 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <xdp/parsing_helpers.h>
|
||||
@@ -15,63 +16,72 @@
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct bpf_map_def SEC("maps") ts_start = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.key_size = sizeof(struct ts_key),
|
||||
.value_size = sizeof(struct ts_timestamp),
|
||||
.max_entries = 16384,
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.key_size = sizeof(struct ts_key),
|
||||
.value_size = sizeof(struct ts_timestamp),
|
||||
.max_entries = 16384,
|
||||
};
|
||||
|
||||
struct bpf_map_def SEC("maps") rtt_events = {
|
||||
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
|
||||
.key_size = sizeof(__u32), // CPU ID
|
||||
.value_size = sizeof(__u32), // perf file descriptor?
|
||||
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
|
||||
.key_size = sizeof(__u32), // CPU ID
|
||||
.value_size = sizeof(__u32), // perf file descriptor?
|
||||
};
|
||||
|
||||
|
||||
// XDP program for parsing TSECR-val from ingress traffic and check for match in map
|
||||
SEC("pping_ingress")
|
||||
int xdp_prog_ingress(struct xdp_md *ctx)
|
||||
{
|
||||
void *data = (void *)(long)ctx->data;
|
||||
void *data_end = (void *)(long)ctx->data_end;
|
||||
int proto = -1;
|
||||
struct hdr_cursor nh = {.pos = data };
|
||||
struct ethhdr *eth;
|
||||
struct iphdr *iph;
|
||||
struct tcphdr *tcph;
|
||||
void *data = (void *)(long)ctx->data;
|
||||
void *data_end = (void *)(long)ctx->data_end;
|
||||
int proto = -1;
|
||||
struct hdr_cursor nh = { .pos = data };
|
||||
struct ethhdr *eth;
|
||||
struct iphdr *iph;
|
||||
struct tcphdr *tcph;
|
||||
|
||||
//bpf_printk("Received packet of length %d\n", (int)(data_end - data));
|
||||
proto = parse_ethhdr(&nh, data_end, ð);
|
||||
if (bpf_ntohs(proto) != ETH_P_IP)
|
||||
goto end; // Not IPv4 packet (or failed to parse ethernet header)
|
||||
proto = parse_iphdr(&nh, data_end, &iph);
|
||||
if (proto != IPPROTO_TCP)
|
||||
goto end; // Not a TCP packet (or failed to parse ethernet header)
|
||||
proto = parse_tcphdr(&nh, data_end, &tcph);
|
||||
if (proto < 0)
|
||||
goto end; // Failed parsing TCP-header
|
||||
//bpf_printk("Received packet of length %d\n", (int)(data_end - data));
|
||||
proto = parse_ethhdr(&nh, data_end, ð);
|
||||
if (bpf_ntohs(proto) != ETH_P_IP)
|
||||
goto end;
|
||||
proto = parse_iphdr(&nh, data_end, &iph);
|
||||
if (proto != IPPROTO_TCP)
|
||||
goto end;
|
||||
proto = parse_tcphdr(&nh, data_end, &tcph);
|
||||
if (proto < 0)
|
||||
goto end;
|
||||
|
||||
//bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos);
|
||||
//bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos);
|
||||
|
||||
__u32 tsval, tsecr;
|
||||
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0) // No TCP timestamp
|
||||
goto end;
|
||||
__u32 tsval, tsecr;
|
||||
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
|
||||
goto end;
|
||||
|
||||
// We have a TCP-timestamp - now we can check if it's in the map
|
||||
//bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr));
|
||||
struct ts_key key;
|
||||
fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr, tcph->dest, tcph->source); // Fill in reverse order of egress (dest <--> source)
|
||||
key.tsval = tsecr;
|
||||
struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key);
|
||||
if (ts && ts->used == 0) { // Only calculate RTT for first packet with matching TSecr
|
||||
// As used is not set atomically with the lookup, could potentially have multiple "first" packets (on different CPUs), but all those should then also have very similar RTT, so don't consider it a significant issue
|
||||
ts->used = 1;
|
||||
// We have a TCP-timestamp - now we can check if it's in the map
|
||||
//bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr));
|
||||
struct ts_key key;
|
||||
// Fill in reverse order of egress (dest <--> source)
|
||||
fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr,
|
||||
tcph->dest, tcph->source);
|
||||
key.tsval = tsecr;
|
||||
struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key);
|
||||
// Only calculate RTT for first packet with matching TSecr
|
||||
if (ts && ts->used == 0) {
|
||||
/*
|
||||
* As used is not set atomically with the lookup, could
|
||||
* potentially have multiple "first" packets (on different
|
||||
* CPUs), but all those should then also have very similar RTT,
|
||||
* so don't consider it a significant issue
|
||||
*/
|
||||
ts->used = 1;
|
||||
|
||||
struct rtt_event event = {0};
|
||||
memcpy(&(event.flow), &(key.flow), sizeof(struct ipv4_flow));
|
||||
event.rtt = bpf_ktime_get_ns() - ts->timestamp;
|
||||
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event, sizeof(event));
|
||||
//bpf_printk("Pushed rtt event with RTT: %llu\n", event.rtt);
|
||||
}
|
||||
end:
|
||||
return XDP_PASS;
|
||||
struct rtt_event event = { 0 };
|
||||
memcpy(&(event.flow), &(key.flow), sizeof(struct ipv4_flow));
|
||||
event.rtt = bpf_ktime_get_ns() - ts->timestamp;
|
||||
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU,
|
||||
&event, sizeof(event));
|
||||
//bpf_printk("Pushed rtt event with RTT: %llu\n", event.rtt);
|
||||
}
|
||||
end:
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
Reference in New Issue
Block a user