mirror of
https://github.com/xdp-project/bpf-examples.git
synced 2024-05-06 15:54:53 +00:00
pping: Major refactor and add -f and -c options
Merge the pping_kern_tc.c, pping_kern_xdp.c and pping_helpers.h into the single file pping_kern.c. Do not change any of the BPF code, except renaming the map ts_start to packet_ts. To handle both BPF programs kept in single ELF-file, change loading mechanism to extract and attach both tc and XDP programs from it. Also refactor main-method into several smaller functions to reduce its size. Finally, added the --force (-f) and --cleanup-interval (-c) options to the argument parsing, and improved the parsing of the --rate-limit (-r) option. NOTE: The verifier rejects program in it's current state as too large (over 1 million instructions). Setting the TCP_MAX_OPTIONS in pping_kern.c to 5 (or less) solves this. Unsure at the moment what causes the verifier to think the program is so large, as the code in pping_kern.c is identical to the one from the three files it was merged from. Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
@@ -1,12 +1,10 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
|
||||
|
||||
USER_TARGETS := pping
|
||||
TC_BPF_TARGETS := pping_kern_tc
|
||||
BPF_TARGETS := pping_kern_xdp
|
||||
BPF_TARGETS += $(TC_BPF_TARGETS)
|
||||
BPF_TARGETS := pping_kern
|
||||
|
||||
LDFLAGS += -pthread
|
||||
EXTRA_DEPS += pping.h pping_helpers.h
|
||||
EXTRA_DEPS += pping.h
|
||||
|
||||
LIB_DIR = ../lib
|
||||
|
||||
|
509
pping/pping.c
509
pping/pping.c
@@ -23,31 +23,18 @@ static const char *__doc__ =
|
||||
#include <time.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "pping.h" //key and value structs for the ts_start map
|
||||
#include "pping.h" //common structs for user-space and BPF parts
|
||||
|
||||
#define NS_PER_SECOND 1000000000UL
|
||||
#define NS_PER_MS 1000000UL
|
||||
|
||||
#define TCBPF_LOADER_SCRIPT "./bpf_egress_loader.sh"
|
||||
#define PINNED_DIR "/sys/fs/bpf/pping"
|
||||
#define PPING_XDP_OBJ "pping_kern_xdp.o"
|
||||
#define PPING_TCBPF_OBJ "pping_kern_tc.o"
|
||||
|
||||
#define XDP_FLAGS XDP_FLAGS_UPDATE_IF_NOEXIST
|
||||
|
||||
#define TS_MAP "ts_start"
|
||||
#define FLOW_MAP "flow_state"
|
||||
#define MAP_CLEANUP_INTERVAL \
|
||||
(1 * NS_PER_SECOND) // Clean timestamp map once per second
|
||||
#define TIMESTAMP_LIFETIME \
|
||||
(10 * NS_PER_SECOND) // Clear out packet timestamps if they're over 10 seconds
|
||||
#define FLOW_LIFETIME \
|
||||
#define FLOW_LIFETIME \
|
||||
(300 * NS_PER_SECOND) // Clear out flows if they're inactive over 300 seconds
|
||||
|
||||
#define DEFAULT_RATE_LIMIT \
|
||||
(100 * NS_PER_MS) // Allow one timestamp entry per flow every 100 ms
|
||||
|
||||
#define PERF_BUFFER "rtt_events"
|
||||
#define PERF_BUFFER_PAGES 64 // Related to the perf-buffer size?
|
||||
#define PERF_POLL_TIMEOUT_MS 100
|
||||
|
||||
@@ -68,16 +55,36 @@ static const char *__doc__ =
|
||||
|
||||
// Structure to contain arguments for clean_map (for passing to pthread_create)
|
||||
struct map_cleanup_args {
|
||||
__u64 cleanup_interval;
|
||||
int packet_map_fd;
|
||||
int flow_map_fd;
|
||||
};
|
||||
|
||||
// Store configuration values in struct to easily pass around
|
||||
struct pping_config {
|
||||
struct bpf_config bpf_config;
|
||||
__u64 cleanup_interval;
|
||||
int xdp_flags;
|
||||
int ifindex;
|
||||
char ifname[IF_NAMESIZE];
|
||||
bool force;
|
||||
char *object_path;
|
||||
char *ingress_sec;
|
||||
char *egress_sec;
|
||||
char *pin_dir;
|
||||
char *packet_map;
|
||||
char *flow_map;
|
||||
char *rtt_map;
|
||||
};
|
||||
|
||||
static volatile int keep_running = 1;
|
||||
|
||||
static const struct option long_options[] = {
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ "interface", required_argument, NULL, 'i' },
|
||||
{ "rate-limit", required_argument, NULL, 'r' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ "interface", required_argument, NULL, 'i' }, // Name of interface to run on
|
||||
{ "rate-limit", required_argument, NULL, 'r' }, // Sampling rate-limit in ms
|
||||
{ "force", no_argument, NULL, 'f' }, // Detach any existing XDP program on interface
|
||||
{ "cleanup-interval", required_argument, NULL, 'c' }, // Map cleaning interval in s
|
||||
{ 0, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
@@ -96,15 +103,98 @@ static void print_usage(char *argv[])
|
||||
printf(" --%-12s", long_options[i].name);
|
||||
if (long_options[i].flag != NULL)
|
||||
printf(" flag (internal value:%d)",
|
||||
*long_options[i].flag);
|
||||
*long_options[i].flag);
|
||||
else
|
||||
printf(" short-option: -%c",
|
||||
long_options[i].val);
|
||||
printf(" short-option: -%c", long_options[i].val);
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static double parse_positive_double_argument(const char *str,
|
||||
const char *parname)
|
||||
{
|
||||
char *endptr;
|
||||
double val;
|
||||
val = strtod(str, &endptr);
|
||||
if (strlen(str) != endptr - str) {
|
||||
fprintf(stderr, "%s %s is not a valid number\n", parname, str);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (val < 0) {
|
||||
fprintf(stderr, "%s must be positive\n", parname);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static int parse_arguments(int argc, char *argv[], struct pping_config *config)
|
||||
{
|
||||
int err, opt;
|
||||
double rate_limit_ms, cleanup_interval_s;
|
||||
|
||||
config->ifindex = 0;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "hfi:r:c:", long_options,
|
||||
NULL)) != -1) {
|
||||
switch (opt) {
|
||||
case 'i':
|
||||
if (strlen(optarg) > IF_NAMESIZE) {
|
||||
fprintf(stderr, "interface name too long\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
strncpy(config->ifname, optarg, IF_NAMESIZE);
|
||||
|
||||
config->ifindex = if_nametoindex(config->ifname);
|
||||
if (config->ifindex == 0) {
|
||||
err = -errno;
|
||||
fprintf(stderr,
|
||||
"Could not get index of interface %s: %s\n",
|
||||
config->ifname, strerror(err));
|
||||
return err;
|
||||
}
|
||||
break;
|
||||
case 'r':
|
||||
rate_limit_ms = parse_positive_double_argument(
|
||||
optarg, "rate-limit");
|
||||
if (rate_limit_ms < 0)
|
||||
return -EINVAL;
|
||||
|
||||
config->bpf_config.rate_limit =
|
||||
rate_limit_ms * NS_PER_MS;
|
||||
break;
|
||||
case 'c':
|
||||
cleanup_interval_s = parse_positive_double_argument(
|
||||
optarg, "cleanup-interval");
|
||||
if (cleanup_interval_s < 0)
|
||||
return -EINVAL;
|
||||
|
||||
config->cleanup_interval =
|
||||
cleanup_interval_s * NS_PER_SECOND;
|
||||
break;
|
||||
case 'f':
|
||||
config->force = true;
|
||||
break;
|
||||
case 'h':
|
||||
printf("HELP:\n");
|
||||
print_usage(argv);
|
||||
exit(0);
|
||||
default:
|
||||
fprintf(stderr, "Unknown option %s\n", argv[optind]);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (config->ifindex == 0) {
|
||||
fprintf(stderr,
|
||||
"An interface (-i or --interface) must be provided\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void abort_program(int sig)
|
||||
{
|
||||
keep_running = 0;
|
||||
@@ -121,7 +211,7 @@ static int set_rlimit(long int lim)
|
||||
}
|
||||
|
||||
static int bpf_obj_open(struct bpf_object **obj, const char *obj_path,
|
||||
char *map_path)
|
||||
const char *map_path)
|
||||
{
|
||||
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
|
||||
.pin_root_path = map_path);
|
||||
@@ -129,6 +219,50 @@ static int bpf_obj_open(struct bpf_object **obj, const char *obj_path,
|
||||
return libbpf_get_error(*obj);
|
||||
}
|
||||
|
||||
static int
|
||||
bpf_obj_run_prog_pindir_func(struct bpf_object *obj, const char *prog_title,
|
||||
const char *pin_dir,
|
||||
int (*func)(struct bpf_program *, const char *))
|
||||
{
|
||||
int len;
|
||||
struct bpf_program *prog;
|
||||
char path[MAX_PATH_LEN];
|
||||
|
||||
len = snprintf(path, MAX_PATH_LEN, "%s/%s", pin_dir, prog_title);
|
||||
if (len < 0)
|
||||
return len;
|
||||
if (len > MAX_PATH_LEN)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
prog = bpf_object__find_program_by_title(obj, prog_title);
|
||||
if (!prog || libbpf_get_error(prog))
|
||||
return prog ? libbpf_get_error(prog) : -EINVAL;
|
||||
|
||||
return func(prog, path);
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to bpf_object__pin_programs, but only attemps to pin a
|
||||
* single program prog_title at path pin_dir/prog_title
|
||||
*/
|
||||
static int bpf_obj_pin_program(struct bpf_object *obj, const char *prog_title,
|
||||
const char *pin_dir)
|
||||
{
|
||||
return bpf_obj_run_prog_pindir_func(obj, prog_title, pin_dir,
|
||||
bpf_program__pin);
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to bpf_object__unpin_programs, but only attempts to unpin a
|
||||
* single program prog_title at path pin_dir/prog_title.
|
||||
*/
|
||||
static int bpf_obj_unpin_program(struct bpf_object *obj, const char *prog_title,
|
||||
const char *pin_dir)
|
||||
{
|
||||
return bpf_obj_run_prog_pindir_func(obj, prog_title, pin_dir,
|
||||
bpf_program__unpin);
|
||||
}
|
||||
|
||||
static int xdp_detach(int ifindex, __u32 xdp_flags)
|
||||
{
|
||||
return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
|
||||
@@ -139,7 +273,6 @@ static int xdp_attach(struct bpf_object *obj, const char *sec, int ifindex,
|
||||
{
|
||||
struct bpf_program *prog;
|
||||
int prog_fd;
|
||||
int err;
|
||||
|
||||
if (sec)
|
||||
prog = bpf_object__find_program_by_title(obj, sec);
|
||||
@@ -147,21 +280,13 @@ static int xdp_attach(struct bpf_object *obj, const char *sec, int ifindex,
|
||||
prog = bpf_program__next(NULL, obj);
|
||||
|
||||
prog_fd = bpf_program__fd(prog);
|
||||
if (prog_fd < 0) {
|
||||
fprintf(stderr, "Could not find program to attach\n");
|
||||
if (prog_fd < 0)
|
||||
return prog_fd;
|
||||
}
|
||||
|
||||
if (force) // detach current (if any) xdp-program first
|
||||
xdp_detach(ifindex, xdp_flags);
|
||||
|
||||
err = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "Failed loading xdp-program on interface %d\n",
|
||||
ifindex);
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
return bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
|
||||
}
|
||||
|
||||
static int init_rodata(struct bpf_object *obj, void *src, size_t size)
|
||||
@@ -176,7 +301,7 @@ static int init_rodata(struct bpf_object *obj, void *src, size_t size)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int run_program(const char *path, char *const argv[])
|
||||
static int run_external_program(const char *path, char *const argv[])
|
||||
{
|
||||
int status;
|
||||
int ret = -1;
|
||||
@@ -196,22 +321,24 @@ static int run_program(const char *path, char *const argv[])
|
||||
}
|
||||
}
|
||||
|
||||
static int tc_bpf_attach(char *pin_dir, char *section, char *interface)
|
||||
static int tc_bpf_attach(const char *pin_dir, const char *section,
|
||||
char *interface)
|
||||
{
|
||||
char prog_path[MAX_PATH_LEN];
|
||||
char *const argv[] = { TCBPF_LOADER_SCRIPT, "--dev", interface, "--pinned", prog_path, NULL };
|
||||
char *const argv[] = { TCBPF_LOADER_SCRIPT, "--dev", interface,
|
||||
"--pinned", prog_path, NULL };
|
||||
|
||||
if(snprintf(prog_path, sizeof(prog_path), "%s/%s", pin_dir, section) < 0)
|
||||
if (snprintf(prog_path, sizeof(prog_path), "%s/%s", pin_dir, section) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
return run_program(TCBPF_LOADER_SCRIPT, argv);
|
||||
return run_external_program(TCBPF_LOADER_SCRIPT, argv);
|
||||
}
|
||||
|
||||
static int tc_bpf_clear(char *interface)
|
||||
{
|
||||
char *const argv[] = { TCBPF_LOADER_SCRIPT, "--dev", interface,
|
||||
"--remove", NULL };
|
||||
return run_program(TCBPF_LOADER_SCRIPT, argv);
|
||||
return run_external_program(TCBPF_LOADER_SCRIPT, argv);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -300,27 +427,18 @@ static int clean_map(int map_fd, size_t key_size, size_t value_size,
|
||||
duration % NS_PER_SECOND);
|
||||
#endif
|
||||
cleanup:
|
||||
if (key)
|
||||
free(key);
|
||||
if (prev_key)
|
||||
free(prev_key);
|
||||
if (value)
|
||||
free(value);
|
||||
free(key);
|
||||
free(prev_key);
|
||||
free(value);
|
||||
return removed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Periodically cleans out entries from both the packet timestamp map and the
|
||||
* flow state map. Maybe better to split up the cleaning of the maps into two
|
||||
* separate threads instead, to better utilize multi-threading and allow for
|
||||
* maps to be cleaned up at different intervals?
|
||||
*/
|
||||
static void *periodic_map_cleanup(void *args)
|
||||
{
|
||||
struct map_cleanup_args *argp = args;
|
||||
struct timespec interval;
|
||||
interval.tv_sec = MAP_CLEANUP_INTERVAL / NS_PER_SECOND;
|
||||
interval.tv_nsec = MAP_CLEANUP_INTERVAL % NS_PER_SECOND;
|
||||
interval.tv_sec = argp->cleanup_interval / NS_PER_SECOND;
|
||||
interval.tv_nsec = argp->cleanup_interval % NS_PER_SECOND;
|
||||
|
||||
while (keep_running) {
|
||||
clean_map(argp->packet_map_fd, sizeof(struct packet_id),
|
||||
@@ -366,23 +484,125 @@ static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt)
|
||||
fprintf(stderr, "Lost %llu RTT events on CPU %d\n", lost_cnt, cpu);
|
||||
}
|
||||
|
||||
static int load_attach_bpfprogs(struct bpf_object **obj,
|
||||
struct pping_config *config, bool *tc_attached,
|
||||
bool *xdp_attached)
|
||||
{
|
||||
// Open and load ELF file
|
||||
int err = bpf_obj_open(obj, config->object_path, config->pin_dir);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed opening object file %s: %s\n",
|
||||
config->object_path, strerror(-err));
|
||||
return err;
|
||||
}
|
||||
|
||||
err = init_rodata(*obj, &config->bpf_config,
|
||||
sizeof(config->bpf_config));
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed pushing user-configration to %s: %s\n",
|
||||
config->object_path, strerror(-err));
|
||||
return err;
|
||||
}
|
||||
|
||||
err = bpf_object__load(*obj);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed loading bpf program in %s: %s\n",
|
||||
config->object_path, strerror(-err));
|
||||
return err;
|
||||
}
|
||||
|
||||
// Attach tc program
|
||||
err = bpf_obj_pin_program(*obj, config->egress_sec, config->pin_dir);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed pinning tc program to %s/%s: %s\n",
|
||||
config->pin_dir, config->egress_sec, strerror(-err));
|
||||
return err;
|
||||
}
|
||||
|
||||
err = tc_bpf_attach(config->pin_dir, config->egress_sec,
|
||||
config->ifname);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"Failed attaching tc program on interface %s: %s\n",
|
||||
config->ifname, strerror(-err));
|
||||
return err;
|
||||
}
|
||||
*tc_attached = true;
|
||||
|
||||
// Attach XDP program
|
||||
err = xdp_attach(*obj, config->ingress_sec, config->ifindex,
|
||||
config->xdp_flags, config->force);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed attaching XDP program to %s%s: %s\n",
|
||||
config->ifname,
|
||||
config->force ? "" : ", ensure no other XDP program is already running on interface",
|
||||
strerror(-err));
|
||||
return err;
|
||||
}
|
||||
*xdp_attached = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_periodical_map_cleaning(struct bpf_object *obj,
|
||||
struct pping_config *config)
|
||||
{
|
||||
pthread_t tid;
|
||||
struct map_cleanup_args clean_args = {
|
||||
.cleanup_interval = config->cleanup_interval
|
||||
};
|
||||
int err;
|
||||
|
||||
clean_args.packet_map_fd =
|
||||
bpf_object__find_map_fd_by_name(obj, config->packet_map);
|
||||
if (clean_args.packet_map_fd < 0) {
|
||||
fprintf(stderr, "Could not get file descriptor of map %s: %s\n",
|
||||
config->packet_map,
|
||||
strerror(-clean_args.packet_map_fd));
|
||||
return clean_args.packet_map_fd;
|
||||
}
|
||||
|
||||
clean_args.flow_map_fd =
|
||||
bpf_object__find_map_fd_by_name(obj, config->flow_map);
|
||||
if (clean_args.flow_map_fd < 0) {
|
||||
fprintf(stderr, "Could not get file descriptor of map %s: %s\n",
|
||||
config->flow_map, strerror(-clean_args.flow_map_fd));
|
||||
return clean_args.packet_map_fd;
|
||||
}
|
||||
|
||||
err = pthread_create(&tid, NULL, periodic_map_cleanup, &clean_args);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"Failed starting thread to perform periodic map cleanup: %s\n",
|
||||
strerror(-err));
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int err = 0;
|
||||
int ifindex = 0;
|
||||
int opt, longindex = 0;
|
||||
char ifname[IF_NAMESIZE];
|
||||
unsigned long rate_limit_ms = -1;
|
||||
bool xdp_attached = false;
|
||||
|
||||
bool tc_attached = false;
|
||||
bool xdp_attached = false;
|
||||
|
||||
struct bpf_object *xdp_obj = NULL;
|
||||
struct bpf_object *tc_obj = NULL;
|
||||
struct bpf_object *obj = NULL;
|
||||
|
||||
struct user_config config = { .rate_limit = DEFAULT_RATE_LIMIT };
|
||||
|
||||
pthread_t tid;
|
||||
struct map_cleanup_args clean_args;
|
||||
struct pping_config config = {
|
||||
.bpf_config = { .rate_limit = 100 * NS_PER_MS },
|
||||
.cleanup_interval = 1 * NS_PER_SECOND,
|
||||
.object_path = "pping_kern.o",
|
||||
.ingress_sec = INGRESS_PROG_SEC,
|
||||
.egress_sec = EGRESS_PROG_SEC,
|
||||
.pin_dir = "/sys/fs/bpf/pping",
|
||||
.packet_map = "packet_ts",
|
||||
.flow_map = "flow_state",
|
||||
.rtt_map = "rtt_events",
|
||||
.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST,
|
||||
.force = false,
|
||||
};
|
||||
|
||||
struct perf_buffer *pb = NULL;
|
||||
struct perf_buffer_opts pb_opts = {
|
||||
@@ -404,150 +624,38 @@ int main(int argc, char *argv[])
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "hi:r:", long_options,
|
||||
&longindex)) != -1) {
|
||||
switch (opt) {
|
||||
case 'i':
|
||||
if (strlen(optarg) > IF_NAMESIZE) {
|
||||
fprintf(stderr, "interface name too long\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
strncpy(ifname, optarg, IF_NAMESIZE);
|
||||
ifindex = if_nametoindex(ifname);
|
||||
if (ifindex == 0) {
|
||||
err = -errno;
|
||||
fprintf(stderr,
|
||||
"Could not get index of interface %s: %s\n",
|
||||
ifname, strerror(-err));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
break;
|
||||
case 'r':
|
||||
rate_limit_ms = strtoul(optarg, NULL, 10);
|
||||
if (rate_limit_ms == ULONG_MAX) {
|
||||
fprintf(stderr,
|
||||
"rate-limit \"%s\" ms is invalid\n",
|
||||
optarg);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
config.rate_limit = rate_limit_ms * NS_PER_MS;
|
||||
break;
|
||||
case 'h':
|
||||
print_usage(argv);
|
||||
return 0;
|
||||
default:
|
||||
print_usage(argv);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
if (ifindex == 0) {
|
||||
fprintf(stderr,
|
||||
"An interface (-i or --interface) must be provided\n");
|
||||
err = parse_arguments(argc, argv, &config);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed parsing arguments: %s\n",
|
||||
strerror(-err));
|
||||
print_usage(argv);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Load and attach the XDP program
|
||||
err = bpf_obj_open(&xdp_obj, PPING_XDP_OBJ, PINNED_DIR);
|
||||
err = load_attach_bpfprogs(&obj, &config, &tc_attached, &xdp_attached);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed opening object file %s: %s\n",
|
||||
PPING_XDP_OBJ, strerror(-err));
|
||||
fprintf(stderr,
|
||||
"Failed loading and attaching BPF programs in %s\n",
|
||||
config.object_path);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
err = bpf_object__load(xdp_obj);
|
||||
err = setup_periodical_map_cleaning(obj, &config);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed loading XDP program: %s\n",
|
||||
fprintf(stderr, "Failed setting up map cleaning: %s\n",
|
||||
strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
err = xdp_attach(xdp_obj, XDP_PROG_SEC, ifindex, XDP_FLAGS, false);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed attaching XDP program to %s: %s\n",
|
||||
ifname, strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
xdp_attached = true;
|
||||
|
||||
// Load, pin and attach tc program on egress
|
||||
err = bpf_obj_open(&tc_obj, PPING_TCBPF_OBJ, PINNED_DIR);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed opening object file %s: %s\n",
|
||||
PPING_TCBPF_OBJ, strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
err = init_rodata(tc_obj, &config, sizeof(config));
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed pushing user-configration to %s: %s\n",
|
||||
PPING_TCBPF_OBJ, strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
err = bpf_object__load(tc_obj);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed loading tc program: %s\n",
|
||||
strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
err = bpf_object__pin_programs(tc_obj, PINNED_DIR);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed pinning tc program to %s: %s\n",
|
||||
PINNED_DIR, strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
err = tc_bpf_attach(PINNED_DIR, TCBPF_PROG_SEC, ifname);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"Failed attaching tc program on interface %s: %s\n",
|
||||
ifname, strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
tc_attached = true;
|
||||
|
||||
// Set up the periodical map cleaning
|
||||
clean_args.packet_map_fd =
|
||||
bpf_object__find_map_fd_by_name(xdp_obj, TS_MAP);
|
||||
if (clean_args.packet_map_fd < 0) {
|
||||
fprintf(stderr,
|
||||
"Could not get file descriptor of map %s in object %s: %s\n",
|
||||
TS_MAP, PPING_XDP_OBJ,
|
||||
strerror(-clean_args.packet_map_fd));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
clean_args.flow_map_fd =
|
||||
bpf_object__find_map_fd_by_name(tc_obj, FLOW_MAP);
|
||||
if (clean_args.flow_map_fd < 0) {
|
||||
fprintf(stderr,
|
||||
"Could not get file descriptor of map %s in object %s: %s\n",
|
||||
FLOW_MAP, PPING_TCBPF_OBJ,
|
||||
strerror(-clean_args.flow_map_fd));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
err = pthread_create(&tid, NULL, periodic_map_cleanup, &clean_args);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"Failed starting thread to perform periodic map cleanup: %s\n",
|
||||
strerror(err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Set up perf buffer
|
||||
pb = perf_buffer__new(bpf_object__find_map_fd_by_name(xdp_obj,
|
||||
PERF_BUFFER),
|
||||
pb = perf_buffer__new(bpf_object__find_map_fd_by_name(obj,
|
||||
config.rtt_map),
|
||||
PERF_BUFFER_PAGES, &pb_opts);
|
||||
err = libbpf_get_error(pb);
|
||||
if (err) {
|
||||
pb = NULL;
|
||||
fprintf(stderr, "Failed to open perf buffer %s: %s\n",
|
||||
PERF_BUFFER, strerror(err));
|
||||
config.rtt_map, strerror(err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@@ -569,31 +677,30 @@ cleanup:
|
||||
perf_buffer__free(pb);
|
||||
|
||||
if (xdp_attached) {
|
||||
err = xdp_detach(ifindex, XDP_FLAGS);
|
||||
err = xdp_detach(config.ifindex, config.xdp_flags);
|
||||
if (err)
|
||||
fprintf(stderr,
|
||||
"Failed deatching program from ifindex %d: %s\n",
|
||||
ifindex, strerror(-err));
|
||||
"Failed deatching program from ifindex %s: %s\n",
|
||||
config.ifname, strerror(-err));
|
||||
}
|
||||
|
||||
if (tc_attached) {
|
||||
err = tc_bpf_clear(ifname);
|
||||
err = tc_bpf_clear(config.ifname);
|
||||
if (err)
|
||||
fprintf(stderr,
|
||||
"Failed removing tc-bpf program from interface %s: %s\n",
|
||||
argv[1], strerror(-err));
|
||||
config.ifname, strerror(-err));
|
||||
}
|
||||
|
||||
if (tc_obj) {
|
||||
err = bpf_object__unpin_programs(tc_obj, PINNED_DIR);
|
||||
if (obj && !libbpf_get_error(obj)) {
|
||||
err = bpf_obj_unpin_program(obj, config.egress_sec,
|
||||
config.pin_dir);
|
||||
if (err)
|
||||
fprintf(stderr,
|
||||
"Failed unpinning tc program from %s: %s\n",
|
||||
PINNED_DIR, strerror(-err));
|
||||
}
|
||||
config.pin_dir, strerror(-err));
|
||||
|
||||
if (xdp_obj) {
|
||||
err = bpf_object__unpin_maps(xdp_obj, NULL);
|
||||
err = bpf_object__unpin_maps(obj, NULL);
|
||||
if (err)
|
||||
fprintf(stderr, "Failed unpinning maps: %s\n",
|
||||
strerror(-err));
|
||||
|
@@ -5,10 +5,10 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/in6.h>
|
||||
|
||||
#define XDP_PROG_SEC "xdp"
|
||||
#define TCBPF_PROG_SEC "classifier"
|
||||
#define INGRESS_PROG_SEC "xdp"
|
||||
#define EGRESS_PROG_SEC "classifier"
|
||||
|
||||
struct user_config {
|
||||
struct bpf_config {
|
||||
__u64 rate_limit;
|
||||
};
|
||||
|
||||
|
@@ -1,8 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#ifndef PPING_HELPERS_H
|
||||
#define PPING_HELPERS_H
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <xdp/parsing_helpers.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/in6.h>
|
||||
@@ -10,8 +8,8 @@
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/tcp.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "pping.h"
|
||||
|
||||
#define AF_INET 2
|
||||
@@ -26,25 +24,26 @@
|
||||
* header encloses.
|
||||
*/
|
||||
struct parsing_context {
|
||||
void *data; //Start of eth hdr
|
||||
void *data_end; //End of safe acessible area
|
||||
void *data; //Start of eth hdr
|
||||
void *data_end; //End of safe acessible area
|
||||
struct hdr_cursor nh; //Position to parse next
|
||||
__u32 pkt_len; //Full packet length (headers+data)
|
||||
bool is_egress; //Is packet on egress or ingress?
|
||||
};
|
||||
|
||||
static volatile const struct user_config config = {};
|
||||
char _license[] SEC("license") = "GPL";
|
||||
// Global config struct - set from userspace
|
||||
static volatile const struct bpf_config config = {};
|
||||
|
||||
// Timestamp map
|
||||
// Map definitions
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, struct packet_id);
|
||||
__type(value, __u64);
|
||||
__uint(max_entries, 16384);
|
||||
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
||||
} ts_start SEC(".maps");
|
||||
} packet_ts SEC(".maps");
|
||||
|
||||
// Flow state map
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, struct network_tuple);
|
||||
@@ -53,6 +52,14 @@ struct {
|
||||
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
||||
} flow_state SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(__u32));
|
||||
__uint(value_size, sizeof(__u32));
|
||||
} rtt_events SEC(".maps");
|
||||
|
||||
// Help functions
|
||||
|
||||
/*
|
||||
* Maps an IPv4 address into an IPv6 address according to RFC 4291 sec 2.5.5.2
|
||||
*/
|
||||
@@ -76,7 +83,8 @@ static int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u32 *tsval,
|
||||
void *opt_end = (void *)tcph + len;
|
||||
__u8 *pos = (__u8 *)(tcph + 1); //Current pos in TCP options
|
||||
__u8 i, opt;
|
||||
volatile __u8 opt_size; // Seems to ensure it's always read of from stack as u8
|
||||
volatile __u8
|
||||
opt_size; // Seems to ensure it's always read of from stack as u8
|
||||
|
||||
if (tcph + 1 > data_end || len <= sizeof(struct tcphdr))
|
||||
return -1;
|
||||
@@ -218,4 +226,132 @@ static int parse_packet_identifier(struct parsing_context *ctx,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
// Programs
|
||||
|
||||
// TC-BFP for parsing packet identifier from egress traffic and add to map
|
||||
SEC(EGRESS_PROG_SEC)
|
||||
int pping_egress(struct __sk_buff *skb)
|
||||
{
|
||||
struct packet_id p_id = { 0 };
|
||||
__u64 p_ts;
|
||||
struct parsing_context pctx = {
|
||||
.data = (void *)(long)skb->data,
|
||||
.data_end = (void *)(long)skb->data_end,
|
||||
.pkt_len = skb->len,
|
||||
.nh = { .pos = pctx.data },
|
||||
.is_egress = true,
|
||||
};
|
||||
bool flow_closing = false;
|
||||
struct flow_state *f_state;
|
||||
struct flow_state new_state = { 0 };
|
||||
|
||||
if (parse_packet_identifier(&pctx, &p_id, &flow_closing) < 0)
|
||||
goto out;
|
||||
|
||||
// Delete flow and create no timestamp entry if flow is closing
|
||||
if (flow_closing) {
|
||||
bpf_map_delete_elem(&flow_state, &p_id.flow);
|
||||
goto out;
|
||||
}
|
||||
|
||||
// Check flow state
|
||||
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
|
||||
if (!f_state) { // No previous state - attempt to create it
|
||||
bpf_map_update_elem(&flow_state, &p_id.flow, &new_state,
|
||||
BPF_NOEXIST);
|
||||
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
|
||||
if (!f_state)
|
||||
goto out;
|
||||
}
|
||||
|
||||
// Check if identfier is new
|
||||
/* The gap between checking and updating last_id may cause concurrency
|
||||
* issues where multiple packets may simultaneously think they are the
|
||||
* first with a new identifier. As long as all of the identifiers are
|
||||
* the same though, only one should be able to create a timestamp entry.
|
||||
|
||||
* A bigger issue is that older identifiers (for example due to
|
||||
* out-of-order packets) may pass this check and update the current
|
||||
* identifier to an old one. This means that both the packet with the
|
||||
* old identifier itself as well the next packet with the current
|
||||
* identifier may be considered packets with new identifiers (even if
|
||||
* both have been seen before). For TCP timestamps this could be
|
||||
* prevented by changing the check to '>=' instead, but it may not be
|
||||
* suitable for other protocols, such as QUIC and its spinbit.
|
||||
*
|
||||
* For now, just hope that the rate limit saves us from creating an
|
||||
* incorrect timestamp. That may however also fail, either due to the
|
||||
* to it happening in a time it's not limited by rate sampling, or
|
||||
* because of rate check failing due to concurrency issues.
|
||||
*/
|
||||
if (f_state->last_id == p_id.identifier)
|
||||
goto out;
|
||||
f_state->last_id = p_id.identifier;
|
||||
|
||||
// Check rate-limit
|
||||
/*
|
||||
* The window between checking and updating last_timestamp may cause
|
||||
* concurrency issues, where multiple packets simultaneously pass the
|
||||
* rate limit. However, as long as they have the same identifier, only
|
||||
* a single timestamp entry should successfully be created.
|
||||
*/
|
||||
p_ts = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns
|
||||
if (p_ts < f_state->last_timestamp ||
|
||||
p_ts - f_state->last_timestamp < config.rate_limit)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Updates attempt at creating timestamp, even if creation of timestamp
|
||||
* fails (due to map being full). This should make the competition for
|
||||
* the next available map slot somewhat fairer between heavy and sparse
|
||||
* flows.
|
||||
*/
|
||||
f_state->last_timestamp = p_ts;
|
||||
bpf_map_update_elem(&packet_ts, &p_id, &p_ts, BPF_NOEXIST);
|
||||
|
||||
out:
|
||||
return BPF_OK;
|
||||
}
|
||||
|
||||
// XDP program for parsing identifier in ingress traffic and check for match in map
|
||||
SEC(INGRESS_PROG_SEC)
|
||||
int pping_ingress(struct xdp_md *ctx)
|
||||
{
|
||||
struct packet_id p_id = { 0 };
|
||||
__u64 *p_ts;
|
||||
struct rtt_event event = { 0 };
|
||||
struct parsing_context pctx = {
|
||||
.data = (void *)(long)ctx->data,
|
||||
.data_end = (void *)(long)ctx->data_end,
|
||||
.pkt_len = pctx.data_end - pctx.data,
|
||||
.nh = { .pos = pctx.data },
|
||||
.is_egress = false,
|
||||
};
|
||||
bool flow_closing = false;
|
||||
|
||||
if (parse_packet_identifier(&pctx, &p_id, &flow_closing) < 0)
|
||||
goto out;
|
||||
|
||||
// Delete flow, but allow final attempt at RTT calculation
|
||||
if (flow_closing)
|
||||
bpf_map_delete_elem(&flow_state, &p_id.flow);
|
||||
|
||||
p_ts = bpf_map_lookup_elem(&packet_ts, &p_id);
|
||||
if (!p_ts)
|
||||
goto out;
|
||||
|
||||
event.rtt = bpf_ktime_get_ns() - *p_ts;
|
||||
/*
|
||||
* Attempt to delete timestamp entry as soon as RTT is calculated.
|
||||
* But could have potential concurrency issue where multiple packets
|
||||
* manage to match against the identifier before it can be deleted.
|
||||
*/
|
||||
bpf_map_delete_elem(&packet_ts, &p_id);
|
||||
|
||||
__builtin_memcpy(&event.flow, &p_id.flow, sizeof(struct network_tuple));
|
||||
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event,
|
||||
sizeof(event));
|
||||
|
||||
out:
|
||||
return XDP_PASS;
|
||||
}
|
@@ -1,97 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <iproute2/bpf_elf.h>
|
||||
|
||||
#include "pping.h"
|
||||
#include "pping_helpers.h"
|
||||
|
||||
#define RATE_LIMIT \
|
||||
100000000UL // 100ms. Temporary solution, should be set by userspace
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
// TC-BFP for parsing packet identifier from egress traffic and add to map
|
||||
SEC(TCBPF_PROG_SEC)
|
||||
int pping_egress(struct __sk_buff *skb)
|
||||
{
|
||||
struct packet_id p_id = { 0 };
|
||||
__u64 p_ts;
|
||||
struct parsing_context pctx = {
|
||||
.data = (void *)(long)skb->data,
|
||||
.data_end = (void *)(long)skb->data_end,
|
||||
.pkt_len = skb->len,
|
||||
.nh = { .pos = pctx.data },
|
||||
.is_egress = true,
|
||||
};
|
||||
bool flow_closing = false;
|
||||
struct flow_state *f_state;
|
||||
struct flow_state new_state = { 0 };
|
||||
|
||||
if (parse_packet_identifier(&pctx, &p_id, &flow_closing) < 0)
|
||||
goto out;
|
||||
|
||||
// Delete flow and create no timestamp entry if flow is closing
|
||||
if (flow_closing) {
|
||||
bpf_map_delete_elem(&flow_state, &p_id.flow);
|
||||
goto out;
|
||||
}
|
||||
|
||||
// Check flow state
|
||||
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
|
||||
if (!f_state) { // No previous state - attempt to create it
|
||||
bpf_map_update_elem(&flow_state, &p_id.flow, &new_state,
|
||||
BPF_NOEXIST);
|
||||
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
|
||||
if (!f_state)
|
||||
goto out;
|
||||
}
|
||||
|
||||
// Check if identfier is new
|
||||
/* The gap between checking and updating last_id may cause concurrency
|
||||
* issues where multiple packets may simultaneously think they are the
|
||||
* first with a new identifier. As long as all of the identifiers are
|
||||
* the same though, only one should be able to create a timestamp entry.
|
||||
|
||||
* A bigger issue is that older identifiers (for example due to
|
||||
* out-of-order packets) may pass this check and update the current
|
||||
* identifier to an old one. This means that both the packet with the
|
||||
* old identifier itself, as well the next packet with the current
|
||||
* identifier, may be considered packets with new identifiers (even if
|
||||
* both have been seen before). For TCP timestamps this could be
|
||||
* prevented by changing the check to '>=' instead, but it may not be
|
||||
* suitable for other protocols, such as QUIC and its spinbit.
|
||||
*
|
||||
* For now, just hope that the rate limit saves us from creating an
|
||||
* incorrect timestamp. That may however also fail, either due to the
|
||||
* to it happening in a time it's not limited by rate sampling, or
|
||||
* because of rate check failing due to concurrency issues.
|
||||
*/
|
||||
if (f_state->last_id == p_id.identifier)
|
||||
goto out;
|
||||
f_state->last_id = p_id.identifier;
|
||||
|
||||
// Check rate-limit
|
||||
/*
|
||||
* The window between checking and updating last_timestamp may cause
|
||||
* concurrency issues, where multiple packets simultaneously pass the
|
||||
* rate limit. However, as long as they have the same identifier, only
|
||||
* a single timestamp entry should successfully be created.
|
||||
*/
|
||||
p_ts = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns
|
||||
if (p_ts < f_state->last_timestamp ||
|
||||
p_ts - f_state->last_timestamp < config.rate_limit)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Updates attempt at creating timestamp, even if creation of timestamp
|
||||
* fails (due to map being full). This should make the competition for
|
||||
* the next available map slot somewhat fairer between heavy and sparse
|
||||
* flows.
|
||||
*/
|
||||
f_state->last_timestamp = p_ts;
|
||||
bpf_map_update_elem(&ts_start, &p_id, &p_ts, BPF_NOEXIST);
|
||||
|
||||
out:
|
||||
return BPF_OK;
|
||||
}
|
@@ -1,57 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
#include "pping.h"
|
||||
#include "pping_helpers.h"
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(__u32));
|
||||
__uint(value_size, sizeof(__u32));
|
||||
} rtt_events SEC(".maps");
|
||||
|
||||
// XDP program for parsing identifier in ingress traffic and check for match in map
|
||||
SEC(XDP_PROG_SEC)
|
||||
int pping_ingress(struct xdp_md *ctx)
|
||||
{
|
||||
struct packet_id p_id = { 0 };
|
||||
__u64 *p_ts;
|
||||
struct rtt_event event = { 0 };
|
||||
struct parsing_context pctx = {
|
||||
.data = (void *)(long)ctx->data,
|
||||
.data_end = (void *)(long)ctx->data_end,
|
||||
.pkt_len = pctx.data_end - pctx.data,
|
||||
.nh = { .pos = pctx.data },
|
||||
.is_egress = false,
|
||||
};
|
||||
bool flow_closing = false;
|
||||
|
||||
if (parse_packet_identifier(&pctx, &p_id, &flow_closing) < 0)
|
||||
goto out;
|
||||
|
||||
// Delete flow, but allow final attempt at RTT calculation
|
||||
if (flow_closing)
|
||||
bpf_map_delete_elem(&flow_state, &p_id.flow);
|
||||
|
||||
p_ts = bpf_map_lookup_elem(&ts_start, &p_id);
|
||||
if (!p_ts)
|
||||
goto out;
|
||||
|
||||
event.rtt = bpf_ktime_get_ns() - *p_ts;
|
||||
/*
|
||||
* Attempt to delete timestamp entry as soon as RTT is calculated.
|
||||
* But could have potential concurrency issue where multiple packets
|
||||
* manage to match against the identifier before it can be deleted.
|
||||
*/
|
||||
bpf_map_delete_elem(&ts_start, &p_id);
|
||||
|
||||
__builtin_memcpy(&event.flow, &p_id.flow, sizeof(struct network_tuple));
|
||||
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event,
|
||||
sizeof(event));
|
||||
|
||||
out:
|
||||
return XDP_PASS;
|
||||
}
|
Reference in New Issue
Block a user