diff --git a/pping/Makefile b/pping/Makefile index f323ee9..a8ffd91 100644 --- a/pping/Makefile +++ b/pping/Makefile @@ -6,29 +6,8 @@ BPF_TARGETS := pping_kern_xdp BPF_TARGETS += $(TC_BPF_TARGETS) LDFLAGS += -pthread -EXTRA_DEPS += config.mk pping.h pping_helpers.h +EXTRA_DEPS += pping.h pping_helpers.h LIB_DIR = ../lib include $(LIB_DIR)/common.mk -include config.mk - -all: config.mk - -config.mk: configure - @sh configure - -ifndef HAVE_TC_LIBBPF -# If the iproute2 'tc' tool doesn't understand BTF debug info -# use llvm-strip to remove this debug info from object file -# -# *BUT* cannot strip everything as it removes ELF elems needed for -# creating maps -# -.PHONY: strip_tc_obj -strip_tc_obj: ${TC_BPF_TARGETS:=.o} - $(Q) echo "TC don't support libbpf - strip BTF info" - $(Q) llvm-strip --no-strip-all --remove-section .BTF $? - -all: strip_tc_obj -endif diff --git a/pping/bpf_egress_loader.sh b/pping/bpf_egress_loader.sh index 9332ad7..f15de65 100755 --- a/pping/bpf_egress_loader.sh +++ b/pping/bpf_egress_loader.sh @@ -4,7 +4,7 @@ # License: GPLv2 # # Modified by Simon Sundberg to add support -# of optional section (--sec) option and changed default BPF_OBJ +# of optional section (--sec) option or attaching a pinned program # basedir=`dirname $0` source ${basedir}/functions.sh @@ -64,6 +64,16 @@ function tc_egress_bpf_attach() egress bpf da obj "$objfile" sec "$section" } +function tc_egress_bpf_attach_pinned() +{ + local device=${1:-$DEV} + local pinprog=${2:-$PIN_PROG} + shift 2 + + call_tc filter add dev "$device" pref 2 handle 2 \ + egress bpf da pinned "$pinprog" +} + function tc_egress_list() { local device=${1:-$DEV} @@ -77,7 +87,12 @@ if [[ -n $REMOVE ]]; then fi tc_init_clsact $DEV -tc_egress_bpf_attach $DEV $BPF_OBJ $SEC + +if [[ -n $PIN_PROG ]]; then + tc_egress_bpf_attach_pinned $DEV $PIN_PROG +else + tc_egress_bpf_attach $DEV $BPF_OBJ $SEC +fi # Practical to list egress filters after setup. # (It's a common mistake to have several progs loaded) diff --git a/pping/configure b/pping/configure deleted file mode 100644 index 2f4c54b..0000000 --- a/pping/configure +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) -# This is not an autoconf generated configure -# - -# Output file which is input to Makefile -CONFIG=config.mk - -# Assume tc is in $PATH -TC=tc - -check_tc_libbpf() -{ - tc_version=$($TC -V) - if echo $tc_version | grep -q libbpf; then - libbpf_version=${tc_version##*libbpf } - echo "HAVE_TC_LIBBPF:=y" >> $CONFIG - echo "BPF_CFLAGS += -DHAVE_TC_LIBBPF" >> $CONFIG - echo "yes ($libbpf_version)" - else - echo "no" - fi -} - -echo "# Generated config" > $CONFIG -echo "Detecting available features on system" - -echo -n " - libbpf support in tc tool: " -check_tc_libbpf \ No newline at end of file diff --git a/pping/parameters.sh b/pping/parameters.sh index c947e27..1a1a49a 100644 --- a/pping/parameters.sh +++ b/pping/parameters.sh @@ -6,7 +6,7 @@ # License: GPLv2 # # Modified by Simon Sundberg to add support -# of optional section (--sec) option +# of optional section (--sec) option or attaching a pinned program # function usage() { @@ -20,12 +20,13 @@ function usage() { echo " -l | --list : (\$LIST) List setup after setup" echo " --file | --obj : (\$BPF_OBJ) BPF-object file to load" echo " --sec : (\$SEC) Section of BPF-object to load" + echo " --pinned : (\$PIN_PROG) Path to pinned program to attach" echo "" } # Using external program "getopt" to get --long-options OPTIONS=$(getopt -o vshd:l \ - --long verbose,dry-run,remove,stats,list,help,dev:,file:,obj:,sec: -- "$@") + --long verbose,dry-run,remove,stats,list,help,dev:,file:,obj:,sec:,pinned: -- "$@") if (( $? != 0 )); then usage err 2 "Error calling getopt" @@ -50,6 +51,11 @@ while true; do info "Section to load: $SEC" >&2 shift 2 ;; + --pinned ) + export PIN_PROG=$2 + info "Pinned program path: $PIN_PROG" >&2 + shift 2 + ;; -v | --verbose) export VERBOSE=yes # info "Verbose mode: VERBOSE=$VERBOSE" >&2 diff --git a/pping/pping.c b/pping/pping.c index 8745104..0c059ba 100644 --- a/pping/pping.c +++ b/pping/pping.c @@ -24,7 +24,7 @@ #define NS_PER_MS 1000000UL #define TCBPF_LOADER_SCRIPT "./bpf_egress_loader.sh" -#define PINNED_DIR "/sys/fs/bpf/tc/globals" +#define PINNED_DIR "/sys/fs/bpf/pping" #define PPING_XDP_OBJ "pping_kern_xdp.o" #define PPING_TCBPF_OBJ "pping_kern_tc.o" @@ -78,20 +78,20 @@ static int set_rlimit(long int lim) return !setrlimit(RLIMIT_MEMLOCK, &rlim) ? 0 : -errno; } -static int mkdir_if_noexist(const char *path) -{ - int ret; - struct stat st = { 0 }; +/* static int mkdir_if_noexist(const char *path) */ +/* { */ +/* int ret; */ +/* struct stat st = { 0 }; */ - ret = stat(path, &st); - if (ret) { - if (errno != ENOENT) - return -errno; +/* ret = stat(path, &st); */ +/* if (ret) { */ +/* if (errno != ENOENT) */ +/* return -errno; */ - return mkdir(path, 0700) ? -errno : 0; - } - return S_ISDIR(st.st_mode) ? 0 : -EEXIST; -} +/* return mkdir(path, 0700) ? -errno : 0; */ +/* } */ +/* return S_ISDIR(st.st_mode) ? 0 : -EEXIST; */ +/* } */ static int bpf_obj_open(struct bpf_object **obj, const char *obj_path, char *map_path) @@ -157,10 +157,14 @@ static int run_program(const char *path, char *const argv[]) } } -static int tc_bpf_load(char *bpf_object, char *section, char *interface) +static int tc_bpf_attach(char *pin_dir, char *section, char *interface) { - char *const argv[] = { TCBPF_LOADER_SCRIPT, "--dev", interface, "--obj", - bpf_object, "--sec", section, NULL }; + char prog_path[MAX_PATH_LEN]; + char *const argv[] = { TCBPF_LOADER_SCRIPT, "--dev", interface, "--pinned", prog_path, NULL }; + + if(snprintf(prog_path, sizeof(prog_path), "%s/%s", pin_dir, section) < 0) + return -EINVAL; + return run_program(TCBPF_LOADER_SCRIPT, argv); } @@ -184,6 +188,7 @@ static __u64 get_time_ns(void) return (__u64)t.tv_sec * NS_PER_SECOND + (__u64)t.tv_nsec; } +// TODO - generalize mechanic so it can be used for cleaning both ts_start and flow_state maps static int clean_map(int map_fd, __u64 max_age) { int removed = 0; @@ -280,9 +285,10 @@ int main(int argc, char *argv[]) int ifindex = 0; bool xdp_attached = false; bool tc_attached = false; - char map_path[MAX_PATH_LEN]; + char path_buffer[MAX_PATH_LEN]; - struct bpf_object *obj = NULL; + struct bpf_object *xdp_obj = NULL; + struct bpf_object *tc_obj = NULL; struct bpf_map *map = NULL; pthread_t tid; @@ -321,38 +327,21 @@ int main(int argc, char *argv[]) } // Load and attach the XDP program - err = mkdir_if_noexist("/sys/fs/bpf/tc"); - if (err) { - fprintf(stderr, - "Failed creating directory %s in which to pin map: %s\n", - "/sys/fs/bpf/tc", strerror(-err)); - goto cleanup; - } - - err = bpf_obj_open(&obj, PPING_XDP_OBJ, PINNED_DIR); + err = bpf_obj_open(&xdp_obj, PPING_XDP_OBJ, PINNED_DIR); if (err) { fprintf(stderr, "Failed opening object file %s: %s\n", PPING_XDP_OBJ, strerror(-err)); goto cleanup; } - // Get map here to allow for unpinning at cleanup - map = bpf_object__find_map_by_name(obj, TS_MAP); - err = libbpf_get_error(map); - if (err) { - fprintf(stderr, "Could not find map %s in %s: %s\n", TS_MAP, - PPING_XDP_OBJ, strerror(err)); - map = NULL; - } - - err = bpf_object__load(obj); + err = bpf_object__load(xdp_obj); if (err) { fprintf(stderr, "Failed loading XDP program: %s\n", strerror(-err)); goto cleanup; } - err = xdp_attach(obj, XDP_PROG_SEC, ifindex, XDP_FLAGS, false); + err = xdp_attach(xdp_obj, XDP_PROG_SEC, ifindex, XDP_FLAGS, false); if (err) { fprintf(stderr, "Failed attaching XDP program to %s: %s\n", argv[1], strerror(-err)); @@ -360,20 +349,40 @@ int main(int argc, char *argv[]) } xdp_attached = true; - // Load tc-bpf section on interface egress - err = tc_bpf_load(PPING_TCBPF_OBJ, TCBPF_PROG_SEC, argv[1]); + // Load, pin and attach tc program on egress + err = bpf_obj_open(&tc_obj, PPING_TCBPF_OBJ, PINNED_DIR); + if (err) { + fprintf(stderr, "Failed opening object file %s: %s\n", + PPING_TCBPF_OBJ, strerror(-err)); + goto cleanup; + } + + err = bpf_object__load(tc_obj); + if (err) { + fprintf(stderr, "Failed loading tc program: %s\n", + strerror(-err)); + goto cleanup; + } + + err = bpf_object__pin_programs(tc_obj, PINNED_DIR); + if (err) { + fprintf(stderr, "Failed pinning tc program to %s: %s\n", + PINNED_DIR, strerror(-err)); + goto cleanup; + } + + err = tc_bpf_attach(PINNED_DIR, TCBPF_PROG_SEC, argv[1]); if (err) { fprintf(stderr, - "Could not load section %s of %s on interface %s: %s\n", - TCBPF_PROG_SEC, PPING_TCBPF_OBJ, argv[1], - strerror(-err)); + "Failed attaching tc program on interface %s: %s\n", + argv[1], strerror(-err)); goto cleanup; } tc_attached = true; // Set up the periodical map cleaning clean_args.max_age_ns = TIMESTAMP_LIFETIME; - clean_args.map_fd = bpf_map__fd(map); + clean_args.map_fd = bpf_object__find_map_fd_by_name(xdp_obj, TS_MAP); if (clean_args.map_fd < 0) { fprintf(stderr, "Could not get file descriptor of map %s in object %s: %s\n", @@ -393,7 +402,8 @@ int main(int argc, char *argv[]) pb_opts.sample_cb = handle_rtt_event; pb_opts.lost_cb = handle_missed_rtt_event; - pb = perf_buffer__new(bpf_object__find_map_fd_by_name(obj, PERF_BUFFER), + pb = perf_buffer__new(bpf_object__find_map_fd_by_name(xdp_obj, + PERF_BUFFER), PERF_BUFFER_PAGES, &pb_opts); err = libbpf_get_error(pb); if (err) { @@ -419,29 +429,47 @@ int main(int argc, char *argv[]) cleanup: perf_buffer__free(pb); - if (map && bpf_map__is_pinned(map)) { - snprintf(map_path, sizeof(map_path), "%s/%s", PINNED_DIR, - TS_MAP); - err = bpf_map__unpin(map, map_path); - if (err) { - fprintf(stderr, "Failed unpinning map from %s: %s\n", - map_path, strerror(-err)); - } - } + if (xdp_attached) { err = xdp_detach(ifindex, XDP_FLAGS); - if (err) { + if (err) fprintf(stderr, "Failed deatching program from ifindex %d: %s\n", ifindex, strerror(-err)); - } } + if (tc_attached) { - err = tc_bpf_clear(argv[1]); //system(tc_cmd); - if (err) { + err = tc_bpf_clear(argv[1]); + if (err) fprintf(stderr, "Failed removing tc-bpf program from interface %s: %s\n", argv[1], strerror(-err)); + } + + if (tc_obj) { + err = bpf_object__unpin_programs(tc_obj, PINNED_DIR); + if (err) + fprintf(stderr, + "Failed unpinning tc program from %s: %s\n", + PINNED_DIR, strerror(-err)); + } + + /* + * Could use bpf_obj__unpin_maps(obj, PINNED_DIR) if it only tried + * unpinning pinned maps. But as it also attempts (and fails) to unpin + * maps that aren't pinned, will instead manually unpin the one pinned + * map for now. + */ + if (xdp_obj) { + if ((map = bpf_object__find_map_by_name(xdp_obj, TS_MAP)) && + bpf_map__is_pinned(map)) { + snprintf(path_buffer, sizeof(path_buffer), "%s/%s", + PINNED_DIR, TS_MAP); + err = bpf_map__unpin(map, path_buffer); + if (err) + fprintf(stderr, + "Failed unpinning map from %s: %s\n", + path_buffer, strerror(-err)); } } diff --git a/pping/pping.h b/pping/pping.h index b5edb0e..ded8410 100644 --- a/pping/pping.h +++ b/pping/pping.h @@ -6,7 +6,7 @@ #include #define XDP_PROG_SEC "xdp" -#define TCBPF_PROG_SEC "pping_egress" +#define TCBPF_PROG_SEC "classifier" /* * Struct that can hold the source or destination address for a flow (l3+l4). diff --git a/pping/pping_helpers.h b/pping/pping_helpers.h index 83d1078..640f718 100644 --- a/pping/pping_helpers.h +++ b/pping/pping_helpers.h @@ -32,6 +32,15 @@ struct parsing_context { __u32 pkt_len; //Full packet length (headers+data) }; +// Timestamp map +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct packet_id); + __type(value, __u64); + __uint(max_entries, 16384); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} ts_start SEC(".maps"); + /* * Maps an IPv4 address into an IPv6 address according to RFC 4291 sec 2.5.5.2 */ diff --git a/pping/pping_kern_tc.c b/pping/pping_kern_tc.c index a72b675..2df3032 100644 --- a/pping/pping_kern_tc.c +++ b/pping/pping_kern_tc.c @@ -8,46 +8,16 @@ #define RATE_LIMIT \ 100000000UL // 100ms. Temporary solution, should be set by userspace -#define BURST_DURATION \ - 4000000 // 4ms, duration for when it may burst packet timestamps -#define BURST_SIZE \ - 3 // Number of packets it may create timestamps for in a burst char _license[] SEC("license") = "GPL"; -#ifdef HAVE_TC_LIBBPF /* detected by configure script in config.mk */ struct { __uint(type, BPF_MAP_TYPE_HASH); - __uint(key_size, sizeof(struct packet_id)); - __uint(value_size, sizeof(__u64)); - __uint(max_entries, 16384); - __uint(pinning, LIBBPF_PIN_BY_NAME); -} ts_start SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(key_size, sizeof(struct network_tuple)); - __uint(value_size, sizeof(struct flow_state)); + __type(key, struct network_tuple); + __type(value, struct flow_state); __uint(max_entries, 16384); } flow_state SEC(".maps"); -#else -struct bpf_elf_map SEC("maps") ts_start = { - .type = BPF_MAP_TYPE_HASH, - .size_key = sizeof(struct packet_id), - .size_value = sizeof(__u64), - .max_elem = 16384, - .pinning = PIN_GLOBAL_NS, -}; - -struct bpf_elf_map SEC("maps") flow_state = { - .type = BPF_MAP_TYPE_HASH, - .size_key = sizeof(struct network_tuple), - .size_value = sizeof(struct flow_state), - .max_elem = 16384, -}; -#endif - // TC-BFP for parsing packet identifier from egress traffic and add to map SEC(TCBPF_PROG_SEC) int tc_bpf_prog_egress(struct __sk_buff *skb) @@ -61,10 +31,10 @@ int tc_bpf_prog_egress(struct __sk_buff *skb) .nh = { .pos = pctx.data }, }; struct flow_state *f_state; - struct flow_state new_state = { 0 }; // Rarely-ish used, but can't really dynamically allocate memory or? + struct flow_state new_state = { 0 }; if (parse_packet_identifier(&pctx, true, &p_id) < 0) - goto end; + goto out; // Check flow state f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow); @@ -73,7 +43,7 @@ int tc_bpf_prog_egress(struct __sk_buff *skb) BPF_NOEXIST); f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow); if (!f_state) - goto end; + goto out; } // Check if identfier is new @@ -97,7 +67,7 @@ int tc_bpf_prog_egress(struct __sk_buff *skb) * because of rate check failing due to concurrency issues. */ if (f_state->last_id == p_id.identifier) - goto end; + goto out; f_state->last_id = p_id.identifier; // Check rate-limit @@ -110,7 +80,7 @@ int tc_bpf_prog_egress(struct __sk_buff *skb) p_ts = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns if (p_ts < f_state->last_timestamp || p_ts - f_state->last_timestamp < RATE_LIMIT) - goto end; + goto out; /* * Updates attempt at creating timestamp, even if creation of timestamp @@ -121,6 +91,6 @@ int tc_bpf_prog_egress(struct __sk_buff *skb) f_state->last_timestamp = p_ts; bpf_map_update_elem(&ts_start, &p_id, &p_ts, BPF_NOEXIST); -end: +out: return BPF_OK; } diff --git a/pping/pping_kern_xdp.c b/pping/pping_kern_xdp.c index 4cdf068..00fe8ad 100644 --- a/pping/pping_kern_xdp.c +++ b/pping/pping_kern_xdp.c @@ -7,14 +7,6 @@ char _license[] SEC("license") = "GPL"; -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(key_size, sizeof(struct packet_id)); - __uint(value_size, sizeof(__u64)); - __uint(max_entries, 16384); - __uint(pinning, LIBBPF_PIN_BY_NAME); -} ts_start SEC(".maps"); - struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __uint(key_size, sizeof(__u32)); @@ -36,11 +28,11 @@ int xdp_prog_ingress(struct xdp_md *ctx) }; if (parse_packet_identifier(&pctx, false, &p_id) < 0) - goto end; + goto out; p_ts = bpf_map_lookup_elem(&ts_start, &p_id); if (!p_ts) - goto end; + goto out; event.rtt = bpf_ktime_get_ns() - *p_ts; /* @@ -54,6 +46,6 @@ int xdp_prog_ingress(struct xdp_md *ctx) bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event, sizeof(event)); -end: +out: return XDP_PASS; }