pping: Load tc-bpf program with libbpf

Load and pin the tc-bpf program in pping.c using libbpf, and only
attach the pinned program using iproute. That way, can use features
that are not supported by the old iproute loader, even if iproute does
not have libbpf support.

To support this change, extend bpf_egress_loader with option to load
pinned program. Additionally, remove configure script and parts of
Makefile that are no longer needed. Furthermore, remove multiple
definitions of ts_start map, and place singular definition in
pping_helpers.h which is included by both BPF programs.

Also, some minor fixes based on Toke's review.

Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
Simon Sundberg
2021-03-02 17:40:51 +01:00
parent 1282bce7d8
commit 1446e6edec
9 changed files with 133 additions and 163 deletions

View File

@@ -6,29 +6,8 @@ BPF_TARGETS := pping_kern_xdp
BPF_TARGETS += $(TC_BPF_TARGETS)
LDFLAGS += -pthread
EXTRA_DEPS += config.mk pping.h pping_helpers.h
EXTRA_DEPS += pping.h pping_helpers.h
LIB_DIR = ../lib
include $(LIB_DIR)/common.mk
include config.mk
all: config.mk
config.mk: configure
@sh configure
ifndef HAVE_TC_LIBBPF
# If the iproute2 'tc' tool doesn't understand BTF debug info
# use llvm-strip to remove this debug info from object file
#
# *BUT* cannot strip everything as it removes ELF elems needed for
# creating maps
#
.PHONY: strip_tc_obj
strip_tc_obj: ${TC_BPF_TARGETS:=.o}
$(Q) echo "TC don't support libbpf - strip BTF info"
$(Q) llvm-strip --no-strip-all --remove-section .BTF $?
all: strip_tc_obj
endif

View File

@@ -4,7 +4,7 @@
# License: GPLv2
#
# Modified by Simon Sundberg <simon.sundberg@kau.se> to add support
# of optional section (--sec) option and changed default BPF_OBJ
# of optional section (--sec) option or attaching a pinned program
#
basedir=`dirname $0`
source ${basedir}/functions.sh
@@ -64,6 +64,16 @@ function tc_egress_bpf_attach()
egress bpf da obj "$objfile" sec "$section"
}
function tc_egress_bpf_attach_pinned()
{
local device=${1:-$DEV}
local pinprog=${2:-$PIN_PROG}
shift 2
call_tc filter add dev "$device" pref 2 handle 2 \
egress bpf da pinned "$pinprog"
}
function tc_egress_list()
{
local device=${1:-$DEV}
@@ -77,7 +87,12 @@ if [[ -n $REMOVE ]]; then
fi
tc_init_clsact $DEV
tc_egress_bpf_attach $DEV $BPF_OBJ $SEC
if [[ -n $PIN_PROG ]]; then
tc_egress_bpf_attach_pinned $DEV $PIN_PROG
else
tc_egress_bpf_attach $DEV $BPF_OBJ $SEC
fi
# Practical to list egress filters after setup.
# (It's a common mistake to have several progs loaded)

29
pping/configure vendored
View File

@@ -1,29 +0,0 @@
#!/bin/bash
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
# This is not an autoconf generated configure
#
# Output file which is input to Makefile
CONFIG=config.mk
# Assume tc is in $PATH
TC=tc
check_tc_libbpf()
{
tc_version=$($TC -V)
if echo $tc_version | grep -q libbpf; then
libbpf_version=${tc_version##*libbpf }
echo "HAVE_TC_LIBBPF:=y" >> $CONFIG
echo "BPF_CFLAGS += -DHAVE_TC_LIBBPF" >> $CONFIG
echo "yes ($libbpf_version)"
else
echo "no"
fi
}
echo "# Generated config" > $CONFIG
echo "Detecting available features on system"
echo -n " - libbpf support in tc tool: "
check_tc_libbpf

View File

@@ -6,7 +6,7 @@
# License: GPLv2
#
# Modified by Simon Sundberg <simon.sundberg@kau.se> to add support
# of optional section (--sec) option
# of optional section (--sec) option or attaching a pinned program
#
function usage() {
@@ -20,12 +20,13 @@ function usage() {
echo " -l | --list : (\$LIST) List setup after setup"
echo " --file | --obj : (\$BPF_OBJ) BPF-object file to load"
echo " --sec : (\$SEC) Section of BPF-object to load"
echo " --pinned : (\$PIN_PROG) Path to pinned program to attach"
echo ""
}
# Using external program "getopt" to get --long-options
OPTIONS=$(getopt -o vshd:l \
--long verbose,dry-run,remove,stats,list,help,dev:,file:,obj:,sec: -- "$@")
--long verbose,dry-run,remove,stats,list,help,dev:,file:,obj:,sec:,pinned: -- "$@")
if (( $? != 0 )); then
usage
err 2 "Error calling getopt"
@@ -50,6 +51,11 @@ while true; do
info "Section to load: $SEC" >&2
shift 2
;;
--pinned )
export PIN_PROG=$2
info "Pinned program path: $PIN_PROG" >&2
shift 2
;;
-v | --verbose)
export VERBOSE=yes
# info "Verbose mode: VERBOSE=$VERBOSE" >&2

View File

@@ -24,7 +24,7 @@
#define NS_PER_MS 1000000UL
#define TCBPF_LOADER_SCRIPT "./bpf_egress_loader.sh"
#define PINNED_DIR "/sys/fs/bpf/tc/globals"
#define PINNED_DIR "/sys/fs/bpf/pping"
#define PPING_XDP_OBJ "pping_kern_xdp.o"
#define PPING_TCBPF_OBJ "pping_kern_tc.o"
@@ -78,20 +78,20 @@ static int set_rlimit(long int lim)
return !setrlimit(RLIMIT_MEMLOCK, &rlim) ? 0 : -errno;
}
static int mkdir_if_noexist(const char *path)
{
int ret;
struct stat st = { 0 };
/* static int mkdir_if_noexist(const char *path) */
/* { */
/* int ret; */
/* struct stat st = { 0 }; */
ret = stat(path, &st);
if (ret) {
if (errno != ENOENT)
return -errno;
/* ret = stat(path, &st); */
/* if (ret) { */
/* if (errno != ENOENT) */
/* return -errno; */
return mkdir(path, 0700) ? -errno : 0;
}
return S_ISDIR(st.st_mode) ? 0 : -EEXIST;
}
/* return mkdir(path, 0700) ? -errno : 0; */
/* } */
/* return S_ISDIR(st.st_mode) ? 0 : -EEXIST; */
/* } */
static int bpf_obj_open(struct bpf_object **obj, const char *obj_path,
char *map_path)
@@ -157,10 +157,14 @@ static int run_program(const char *path, char *const argv[])
}
}
static int tc_bpf_load(char *bpf_object, char *section, char *interface)
static int tc_bpf_attach(char *pin_dir, char *section, char *interface)
{
char *const argv[] = { TCBPF_LOADER_SCRIPT, "--dev", interface, "--obj",
bpf_object, "--sec", section, NULL };
char prog_path[MAX_PATH_LEN];
char *const argv[] = { TCBPF_LOADER_SCRIPT, "--dev", interface, "--pinned", prog_path, NULL };
if(snprintf(prog_path, sizeof(prog_path), "%s/%s", pin_dir, section) < 0)
return -EINVAL;
return run_program(TCBPF_LOADER_SCRIPT, argv);
}
@@ -184,6 +188,7 @@ static __u64 get_time_ns(void)
return (__u64)t.tv_sec * NS_PER_SECOND + (__u64)t.tv_nsec;
}
// TODO - generalize mechanic so it can be used for cleaning both ts_start and flow_state maps
static int clean_map(int map_fd, __u64 max_age)
{
int removed = 0;
@@ -280,9 +285,10 @@ int main(int argc, char *argv[])
int ifindex = 0;
bool xdp_attached = false;
bool tc_attached = false;
char map_path[MAX_PATH_LEN];
char path_buffer[MAX_PATH_LEN];
struct bpf_object *obj = NULL;
struct bpf_object *xdp_obj = NULL;
struct bpf_object *tc_obj = NULL;
struct bpf_map *map = NULL;
pthread_t tid;
@@ -321,38 +327,21 @@ int main(int argc, char *argv[])
}
// Load and attach the XDP program
err = mkdir_if_noexist("/sys/fs/bpf/tc");
if (err) {
fprintf(stderr,
"Failed creating directory %s in which to pin map: %s\n",
"/sys/fs/bpf/tc", strerror(-err));
goto cleanup;
}
err = bpf_obj_open(&obj, PPING_XDP_OBJ, PINNED_DIR);
err = bpf_obj_open(&xdp_obj, PPING_XDP_OBJ, PINNED_DIR);
if (err) {
fprintf(stderr, "Failed opening object file %s: %s\n",
PPING_XDP_OBJ, strerror(-err));
goto cleanup;
}
// Get map here to allow for unpinning at cleanup
map = bpf_object__find_map_by_name(obj, TS_MAP);
err = libbpf_get_error(map);
if (err) {
fprintf(stderr, "Could not find map %s in %s: %s\n", TS_MAP,
PPING_XDP_OBJ, strerror(err));
map = NULL;
}
err = bpf_object__load(obj);
err = bpf_object__load(xdp_obj);
if (err) {
fprintf(stderr, "Failed loading XDP program: %s\n",
strerror(-err));
goto cleanup;
}
err = xdp_attach(obj, XDP_PROG_SEC, ifindex, XDP_FLAGS, false);
err = xdp_attach(xdp_obj, XDP_PROG_SEC, ifindex, XDP_FLAGS, false);
if (err) {
fprintf(stderr, "Failed attaching XDP program to %s: %s\n",
argv[1], strerror(-err));
@@ -360,20 +349,40 @@ int main(int argc, char *argv[])
}
xdp_attached = true;
// Load tc-bpf section on interface egress
err = tc_bpf_load(PPING_TCBPF_OBJ, TCBPF_PROG_SEC, argv[1]);
// Load, pin and attach tc program on egress
err = bpf_obj_open(&tc_obj, PPING_TCBPF_OBJ, PINNED_DIR);
if (err) {
fprintf(stderr, "Failed opening object file %s: %s\n",
PPING_TCBPF_OBJ, strerror(-err));
goto cleanup;
}
err = bpf_object__load(tc_obj);
if (err) {
fprintf(stderr, "Failed loading tc program: %s\n",
strerror(-err));
goto cleanup;
}
err = bpf_object__pin_programs(tc_obj, PINNED_DIR);
if (err) {
fprintf(stderr, "Failed pinning tc program to %s: %s\n",
PINNED_DIR, strerror(-err));
goto cleanup;
}
err = tc_bpf_attach(PINNED_DIR, TCBPF_PROG_SEC, argv[1]);
if (err) {
fprintf(stderr,
"Could not load section %s of %s on interface %s: %s\n",
TCBPF_PROG_SEC, PPING_TCBPF_OBJ, argv[1],
strerror(-err));
"Failed attaching tc program on interface %s: %s\n",
argv[1], strerror(-err));
goto cleanup;
}
tc_attached = true;
// Set up the periodical map cleaning
clean_args.max_age_ns = TIMESTAMP_LIFETIME;
clean_args.map_fd = bpf_map__fd(map);
clean_args.map_fd = bpf_object__find_map_fd_by_name(xdp_obj, TS_MAP);
if (clean_args.map_fd < 0) {
fprintf(stderr,
"Could not get file descriptor of map %s in object %s: %s\n",
@@ -393,7 +402,8 @@ int main(int argc, char *argv[])
pb_opts.sample_cb = handle_rtt_event;
pb_opts.lost_cb = handle_missed_rtt_event;
pb = perf_buffer__new(bpf_object__find_map_fd_by_name(obj, PERF_BUFFER),
pb = perf_buffer__new(bpf_object__find_map_fd_by_name(xdp_obj,
PERF_BUFFER),
PERF_BUFFER_PAGES, &pb_opts);
err = libbpf_get_error(pb);
if (err) {
@@ -419,29 +429,47 @@ int main(int argc, char *argv[])
cleanup:
perf_buffer__free(pb);
if (map && bpf_map__is_pinned(map)) {
snprintf(map_path, sizeof(map_path), "%s/%s", PINNED_DIR,
TS_MAP);
err = bpf_map__unpin(map, map_path);
if (err) {
fprintf(stderr, "Failed unpinning map from %s: %s\n",
map_path, strerror(-err));
}
}
if (xdp_attached) {
err = xdp_detach(ifindex, XDP_FLAGS);
if (err) {
if (err)
fprintf(stderr,
"Failed deatching program from ifindex %d: %s\n",
ifindex, strerror(-err));
}
}
if (tc_attached) {
err = tc_bpf_clear(argv[1]); //system(tc_cmd);
if (err) {
err = tc_bpf_clear(argv[1]);
if (err)
fprintf(stderr,
"Failed removing tc-bpf program from interface %s: %s\n",
argv[1], strerror(-err));
}
if (tc_obj) {
err = bpf_object__unpin_programs(tc_obj, PINNED_DIR);
if (err)
fprintf(stderr,
"Failed unpinning tc program from %s: %s\n",
PINNED_DIR, strerror(-err));
}
/*
* Could use bpf_obj__unpin_maps(obj, PINNED_DIR) if it only tried
* unpinning pinned maps. But as it also attempts (and fails) to unpin
* maps that aren't pinned, will instead manually unpin the one pinned
* map for now.
*/
if (xdp_obj) {
if ((map = bpf_object__find_map_by_name(xdp_obj, TS_MAP)) &&
bpf_map__is_pinned(map)) {
snprintf(path_buffer, sizeof(path_buffer), "%s/%s",
PINNED_DIR, TS_MAP);
err = bpf_map__unpin(map, path_buffer);
if (err)
fprintf(stderr,
"Failed unpinning map from %s: %s\n",
path_buffer, strerror(-err));
}
}

View File

@@ -6,7 +6,7 @@
#include <linux/in6.h>
#define XDP_PROG_SEC "xdp"
#define TCBPF_PROG_SEC "pping_egress"
#define TCBPF_PROG_SEC "classifier"
/*
* Struct that can hold the source or destination address for a flow (l3+l4).

View File

@@ -32,6 +32,15 @@ struct parsing_context {
__u32 pkt_len; //Full packet length (headers+data)
};
// Timestamp map
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, struct packet_id);
__type(value, __u64);
__uint(max_entries, 16384);
__uint(pinning, LIBBPF_PIN_BY_NAME);
} ts_start SEC(".maps");
/*
* Maps an IPv4 address into an IPv6 address according to RFC 4291 sec 2.5.5.2
*/

View File

@@ -8,46 +8,16 @@
#define RATE_LIMIT \
100000000UL // 100ms. Temporary solution, should be set by userspace
#define BURST_DURATION \
4000000 // 4ms, duration for when it may burst packet timestamps
#define BURST_SIZE \
3 // Number of packets it may create timestamps for in a burst
char _license[] SEC("license") = "GPL";
#ifdef HAVE_TC_LIBBPF /* detected by configure script in config.mk */
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(struct packet_id));
__uint(value_size, sizeof(__u64));
__uint(max_entries, 16384);
__uint(pinning, LIBBPF_PIN_BY_NAME);
} ts_start SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(struct network_tuple));
__uint(value_size, sizeof(struct flow_state));
__type(key, struct network_tuple);
__type(value, struct flow_state);
__uint(max_entries, 16384);
} flow_state SEC(".maps");
#else
struct bpf_elf_map SEC("maps") ts_start = {
.type = BPF_MAP_TYPE_HASH,
.size_key = sizeof(struct packet_id),
.size_value = sizeof(__u64),
.max_elem = 16384,
.pinning = PIN_GLOBAL_NS,
};
struct bpf_elf_map SEC("maps") flow_state = {
.type = BPF_MAP_TYPE_HASH,
.size_key = sizeof(struct network_tuple),
.size_value = sizeof(struct flow_state),
.max_elem = 16384,
};
#endif
// TC-BFP for parsing packet identifier from egress traffic and add to map
SEC(TCBPF_PROG_SEC)
int tc_bpf_prog_egress(struct __sk_buff *skb)
@@ -61,10 +31,10 @@ int tc_bpf_prog_egress(struct __sk_buff *skb)
.nh = { .pos = pctx.data },
};
struct flow_state *f_state;
struct flow_state new_state = { 0 }; // Rarely-ish used, but can't really dynamically allocate memory or?
struct flow_state new_state = { 0 };
if (parse_packet_identifier(&pctx, true, &p_id) < 0)
goto end;
goto out;
// Check flow state
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
@@ -73,7 +43,7 @@ int tc_bpf_prog_egress(struct __sk_buff *skb)
BPF_NOEXIST);
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
if (!f_state)
goto end;
goto out;
}
// Check if identfier is new
@@ -97,7 +67,7 @@ int tc_bpf_prog_egress(struct __sk_buff *skb)
* because of rate check failing due to concurrency issues.
*/
if (f_state->last_id == p_id.identifier)
goto end;
goto out;
f_state->last_id = p_id.identifier;
// Check rate-limit
@@ -110,7 +80,7 @@ int tc_bpf_prog_egress(struct __sk_buff *skb)
p_ts = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns
if (p_ts < f_state->last_timestamp ||
p_ts - f_state->last_timestamp < RATE_LIMIT)
goto end;
goto out;
/*
* Updates attempt at creating timestamp, even if creation of timestamp
@@ -121,6 +91,6 @@ int tc_bpf_prog_egress(struct __sk_buff *skb)
f_state->last_timestamp = p_ts;
bpf_map_update_elem(&ts_start, &p_id, &p_ts, BPF_NOEXIST);
end:
out:
return BPF_OK;
}

View File

@@ -7,14 +7,6 @@
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(struct packet_id));
__uint(value_size, sizeof(__u64));
__uint(max_entries, 16384);
__uint(pinning, LIBBPF_PIN_BY_NAME);
} ts_start SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(__u32));
@@ -36,11 +28,11 @@ int xdp_prog_ingress(struct xdp_md *ctx)
};
if (parse_packet_identifier(&pctx, false, &p_id) < 0)
goto end;
goto out;
p_ts = bpf_map_lookup_elem(&ts_start, &p_id);
if (!p_ts)
goto end;
goto out;
event.rtt = bpf_ktime_get_ns() - *p_ts;
/*
@@ -54,6 +46,6 @@ int xdp_prog_ingress(struct xdp_md *ctx)
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event,
sizeof(event));
end:
out:
return XDP_PASS;
}