pping: Use libbpf to attach tc program

libbpf v0.4 added an API for attaching/detaching TC-BPF programs. So
use the new API to attach the tc program instead of calling on an
external script (which uses the tc command line utility).

Avoid removing the clsact qdisc on program shutdown or error, as
there's currently no convenient way to ensure the qdisc isn't used by
other programs as well. This means pping will not completely clean up
after itself, but this is a safer alternative than always destroying
the qdsic as done by the external script, which may pull the rug out
underneath other programs using the qdisc.

Finally, remove the pin_dir member from the configuration as pping no
longer pins any programs or maps, and remove deleted tc loading
scripts from README.

Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
Simon Sundberg
2021-06-11 14:44:19 +02:00
parent 2465e3e0df
commit bd29a246b9
5 changed files with 34 additions and 387 deletions

View File

@@ -132,14 +132,6 @@ An example of a (pretty-printed) RTT-even is provided below:
calculated RTT (together with the flow-tuple) is pushed to the perf-buffer
`events`. Both `pping_egress()` and `pping_ingress` can also push flow-events
to the `events` buffer.
- **bpf_egress_loader.sh:** A shell script that's used by `pping.c` to setup a
clsact qdisc and attach the `pping_egress()` program to egress using
tc. **Note**: Unless your iproute2 comes with libbpf support, tc will use
iproute's own loading mechanism when loading and attaching object files
directly through the tc command line. To ensure that libbpf is always used to
load `pping_egress()`, `pping.c` actually loads the program and pins it to
`/sys/fs/bpf/pping/classifier`, and tc only attaches the pinned program.
- **functions.sh and parameters.sh:** Imported by `bpf_egress_loader.sh`.
- **pping.h:** Common header file included by `pping.c` and
`pping_kern.c`. Contains some common structs used by both (are part of the
maps).

View File

@@ -1,102 +0,0 @@
#!/bin/bash
#
# Author: Jesper Dangaaard Brouer <netoptimizer@brouer.com>
# License: GPLv2
#
# Modified by Simon Sundberg <simon.sundberg@kau.se> to add support
# of optional section (--sec) option or attaching a pinned program
#
basedir=`dirname $0`
source ${basedir}/functions.sh
root_check_run_with_sudo "$@"
# Use common parameters
source ${basedir}/parameters.sh
export TC=/sbin/tc
# This can be changed via --file or --obj
if [[ -z ${BPF_OBJ} ]]; then
# Fallback default
BPF_OBJ=pping_kern_tc.o
fi
# This can be changed via --sec
if [[ -z ${SEC} ]]; then
# Fallback default
SEC=pping_egress
fi
info "Applying TC-BPF egress setup on device: $DEV with object file: $BPF_OBJ"
function tc_remove_clsact()
{
local device=${1:-$DEV}
shift
# Removing qdisc clsact, also deletes all filters
call_tc_allow_fail qdisc del dev "$device" clsact 2> /dev/null
}
function tc_init_clsact()
{
local device=${1:-$DEV}
shift
# TODO: find method that avoids flushing (all users)
# Also deletes all filters
call_tc_allow_fail qdisc del dev "$device" clsact 2> /dev/null
# Load qdisc clsact which allow us to attach BPF-progs as TC filters
call_tc qdisc add dev "$device" clsact
}
function tc_egress_bpf_attach()
{
local device=${1:-$DEV}
local objfile=${2:-$BPF_OBJ}
local section=${3:-$SEC}
shift 3
call_tc filter add dev "$device" pref 2 handle 2 \
egress bpf da obj "$objfile" sec "$section"
}
function tc_egress_bpf_attach_pinned()
{
local device=${1:-$DEV}
local pinprog=${2:-$PIN_PROG}
shift 2
call_tc filter add dev "$device" pref 2 handle 2 \
egress bpf da pinned "$pinprog"
}
function tc_egress_list()
{
local device=${1:-$DEV}
call_tc filter show dev "$device" egress
}
if [[ -n $REMOVE ]]; then
tc_remove_clsact $DEV
exit 0
fi
tc_init_clsact $DEV
if [[ -n $PIN_PROG ]]; then
tc_egress_bpf_attach_pinned $DEV $PIN_PROG
else
tc_egress_bpf_attach $DEV $BPF_OBJ $SEC
fi
# Practical to list egress filters after setup.
# (It's a common mistake to have several progs loaded)
if [[ -n $LIST ]]; then
info "Listing egress filter on device"
tc_egress_list $DEV
fi

View File

@@ -1,64 +0,0 @@
#
# Common functions used by scripts in this directory
# - Depending on bash 3 (or higher) syntax
#
# Author: Jesper Dangaaard Brouer <netoptimizer@brouer.com>
# License: GPLv2
## -- sudo trick --
function root_check_run_with_sudo() {
# Trick so, program can be run as normal user, will just use "sudo"
# call as root_check_run_as_sudo "$@"
if [ "$EUID" -ne 0 ]; then
if [ -x $0 ]; then # Directly executable use sudo
echo "# (Not root, running with sudo)" >&2
sudo "$0" "$@"
exit $?
fi
echo "cannot perform sudo run of $0"
exit 1
fi
}
## -- General shell logging cmds --
function err() {
local exitcode=$1
shift
echo -e "ERROR: $@" >&2
exit $exitcode
}
function warn() {
echo -e "WARN : $@" >&2
}
function info() {
if [[ -n "$VERBOSE" ]]; then
echo "# $@"
fi
}
## -- Wrapper calls for TC --
function _call_tc() {
local allow_fail="$1"
shift
if [[ -n "$VERBOSE" ]]; then
echo "tc $@"
fi
if [[ -n "$DRYRUN" ]]; then
return
fi
$TC "$@"
local status=$?
if (( $status != 0 )); then
if [[ "$allow_fail" == "" ]]; then
err 3 "Exec error($status) occurred cmd: \"$TC $@\""
fi
fi
}
function call_tc() {
_call_tc "" "$@"
}
function call_tc_allow_fail() {
_call_tc "allow_fail" "$@"
}

View File

@@ -1,100 +0,0 @@
#
# Common parameter parsing used by scripts in this directory
# - Depending on bash 3 (or higher) syntax
#
# Author: Jesper Dangaaard Brouer <netoptimizer@brouer.com>
# License: GPLv2
#
# Modified by Simon Sundberg <simon.sundberg@kau.se> to add support
# of optional section (--sec) option or attaching a pinned program
#
function usage() {
echo ""
echo "Usage: $0 [-vh] --dev ethX"
echo " -d | --dev : (\$DEV) Interface/device (required)"
echo " -v | --verbose : (\$VERBOSE) verbose"
echo " --remove : (\$REMOVE) Remove the rules"
echo " --dry-run : (\$DRYRUN) Dry-run only (echo tc commands)"
echo " -s | --stats : (\$STATS_ONLY) Call statistics command"
echo " -l | --list : (\$LIST) List setup after setup"
echo " --file | --obj : (\$BPF_OBJ) BPF-object file to load"
echo " --sec : (\$SEC) Section of BPF-object to load"
echo " --pinned : (\$PIN_PROG) Path to pinned program to attach"
echo ""
}
# Using external program "getopt" to get --long-options
OPTIONS=$(getopt -o vshd:l \
--long verbose,dry-run,remove,stats,list,help,dev:,file:,obj:,sec:,pinned: -- "$@")
if (( $? != 0 )); then
usage
err 2 "Error calling getopt"
fi
eval set -- "$OPTIONS"
## --- Parse command line arguments / parameters ---
while true; do
case "$1" in
-d | --dev ) # device
export DEV=$2
info "Device set to: DEV=$DEV" >&2
shift 2
;;
--file | --obj )
export BPF_OBJ=$2
info "BPF-object file: $BPF_OBJ" >&2
shift 2
;;
--sec )
export SEC=$2
info "Section to load: $SEC" >&2
shift 2
;;
--pinned )
export PIN_PROG=$2
info "Pinned program path: $PIN_PROG" >&2
shift 2
;;
-v | --verbose)
export VERBOSE=yes
# info "Verbose mode: VERBOSE=$VERBOSE" >&2
shift
;;
--dry-run )
export DRYRUN=yes
export VERBOSE=yes
info "Dry-run mode: enable VERBOSE and don't call TC" >&2
shift
;;
--remove )
export REMOVE=yes
shift
;;
-s | --stats )
export STATS_ONLY=yes
shift
;;
-l | --list )
export LIST=yes
shift
;;
-- )
shift
break
;;
-h | --help )
usage;
exit 0
;;
* )
shift
break
;;
esac
done
if [ -z "$DEV" ]; then
usage
err 2 "Please specify net_device (\$DEV)"
fi

View File

@@ -29,8 +29,6 @@ static const char *__doc__ =
#define NS_PER_SECOND 1000000000UL
#define NS_PER_MS 1000000UL
#define TCBPF_LOADER_SCRIPT "./bpf_egress_loader.sh"
#define TIMESTAMP_LIFETIME \
(10 * NS_PER_SECOND) // Clear out packet timestamps if they're over 10 seconds
#define FLOW_LIFETIME \
@@ -71,7 +69,6 @@ struct pping_config {
char *object_path;
char *ingress_sec;
char *egress_sec;
char *pin_dir;
char *packet_map;
char *flow_map;
char *event_map;
@@ -232,53 +229,6 @@ static int set_rlimit(long int lim)
return !setrlimit(RLIMIT_MEMLOCK, &rlim) ? 0 : -errno;
}
static int
bpf_obj_run_prog_pindir_func(struct bpf_object *obj, const char *prog_title,
const char *pin_dir,
int (*func)(struct bpf_program *, const char *))
{
int len;
struct bpf_program *prog;
char path[MAX_PATH_LEN];
if (!obj || libbpf_get_error(obj))
return obj ? libbpf_get_error(obj) : -EINVAL;
len = snprintf(path, MAX_PATH_LEN, "%s/%s", pin_dir, prog_title);
if (len < 0)
return len;
if (len > MAX_PATH_LEN)
return -ENAMETOOLONG;
prog = bpf_object__find_program_by_title(obj, prog_title);
if (!prog || libbpf_get_error(prog))
return prog ? libbpf_get_error(prog) : -EINVAL;
return func(prog, path);
}
/*
* Similar to bpf_object__pin_programs, but only attemps to pin a
* single program prog_title at path pin_dir/prog_title
*/
static int bpf_obj_pin_program(struct bpf_object *obj, const char *prog_title,
const char *pin_dir)
{
return bpf_obj_run_prog_pindir_func(obj, prog_title, pin_dir,
bpf_program__pin);
}
/*
* Similar to bpf_object__unpin_programs, but only attempts to unpin a
* single program prog_title at path pin_dir/prog_title.
*/
static int bpf_obj_unpin_program(struct bpf_object *obj, const char *prog_title,
const char *pin_dir)
{
return bpf_obj_run_prog_pindir_func(obj, prog_title, pin_dir,
bpf_program__unpin);
}
static int xdp_detach(int ifindex, __u32 xdp_flags)
{
return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
@@ -317,73 +267,43 @@ static int init_rodata(struct bpf_object *obj, void *src, size_t size)
return -EINVAL;
}
static int run_external_program(const char *path, char *const argv[])
static int tc_attach(struct bpf_object *obj, int ifindex,
enum bpf_tc_attach_point attach_point,
const char *prog_title, struct bpf_tc_opts *opts)
{
int status;
int ret = -1;
int err;
int prog_fd;
DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
.attach_point = attach_point);
pid_t pid = fork();
if (pid < 0)
return -errno;
if (pid == 0) {
execv(path, argv);
return -errno;
} else { //pid > 0
waitpid(pid, &status, 0);
if (WIFEXITED(status))
ret = WEXITSTATUS(status);
return ret;
}
}
static int __tc_attach(char *interface, const char *sec, const char *pin_dir)
{
char prog_path[MAX_PATH_LEN];
char *const argv[] = { TCBPF_LOADER_SCRIPT, "--dev", interface,
"--pinned", prog_path, NULL };
if (snprintf(prog_path, sizeof(prog_path), "%s/%s", pin_dir, sec) < 0)
return -EINVAL;
return run_external_program(TCBPF_LOADER_SCRIPT, argv);
}
static int tc_attach(struct bpf_object *obj, char *interface,
const char *prog_title, const char *pin_dir)
{
int err, unpin_err;
// Temporarily pin program while attaching it with tc-command
err = bpf_obj_pin_program(obj, prog_title, pin_dir);
if (err)
err = bpf_tc_hook_create(&hook);
if (err && err != -EEXIST)
return err;
err = __tc_attach(interface, prog_title, pin_dir);
prog_fd = bpf_program__fd(
bpf_object__find_program_by_title(obj, prog_title));
if (prog_fd < 0)
return prog_fd;
/* we need to unpin regardless of whether attach succeeded, and
* we can't really do anything about it if unpinning fails, so just warn
* and continue if it does fail.
*/
unpin_err = bpf_obj_unpin_program(obj, prog_title, pin_dir);
if (unpin_err)
fprintf(stderr,
"Warning: Failed unpinning tc program from %s/%s: %s\n",
pin_dir, prog_title, strerror(-unpin_err));
return err;
opts->prog_fd = prog_fd;
opts->prog_id = 0;
return bpf_tc_attach(&hook, opts);
}
static int tc_detach(char *interface)
static int tc_detach(int ifindex, enum bpf_tc_attach_point attach_point,
struct bpf_tc_opts *opts)
{
char *const argv[] = { TCBPF_LOADER_SCRIPT, "--dev", interface,
"--remove", NULL };
return run_external_program(TCBPF_LOADER_SCRIPT, argv);
DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
.attach_point = attach_point);
opts->prog_fd = 0;
opts->prog_id = 0;
opts->flags = 0;
return bpf_tc_detach(&hook, opts);
}
/*
* Returns time of CLOCK_MONOTONIC as nanoseconds in a single __u64.
* Returns time as nanoseconds in a single __u64.
* On failure, the value 0 is returned (and errno will be set).
*/
static __u64 get_time_ns(clockid_t clockid)
@@ -735,7 +655,8 @@ static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt)
}
static int load_attach_bpfprogs(struct bpf_object **obj,
struct pping_config *config)
struct pping_config *config,
struct bpf_tc_opts *tc_opts)
{
int err, detach_err;
@@ -764,8 +685,8 @@ static int load_attach_bpfprogs(struct bpf_object **obj,
}
// Attach tc prog
err = tc_attach(*obj, config->ifname, config->egress_sec,
config->pin_dir);
err = tc_attach(*obj, config->ifindex, BPF_TC_EGRESS,
config->egress_sec, tc_opts);
if (err) {
fprintf(stderr,
"Failed attaching tc program on interface %s: %s\n",
@@ -787,7 +708,7 @@ static int load_attach_bpfprogs(struct bpf_object **obj,
return 0;
err_xdp:
detach_err = tc_detach(config->ifname);
detach_err = tc_detach(config->ifindex, BPF_TC_EGRESS, tc_opts);
if (detach_err)
fprintf(stderr, "Failed detaching tc program from %s: %s\n",
config->ifname, strerror(-detach_err));
@@ -847,13 +768,13 @@ int main(int argc, char *argv[])
.object_path = "pping_kern.o",
.ingress_sec = INGRESS_PROG_SEC,
.egress_sec = EGRESS_PROG_SEC,
.pin_dir = "/sys/fs/bpf/pping",
.packet_map = "packet_ts",
.flow_map = "flow_state",
.event_map = "events",
.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST,
};
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_opts); // Need to keep track of where tc prog was attached
print_event_func = print_event_standard;
// Detect if running as root
@@ -886,7 +807,7 @@ int main(int argc, char *argv[])
print_event_func = print_event_ppviz;
}
err = load_attach_bpfprogs(&obj, &config);
err = load_attach_bpfprogs(&obj, &config, &tc_opts);
if (err) {
fprintf(stderr,
"Failed loading and attaching BPF programs in %s\n",
@@ -935,7 +856,7 @@ int main(int argc, char *argv[])
perf_buffer__free(pb);
cleanup_attached_progs:
detach_err = tc_detach(config.ifname);
detach_err = tc_detach(config.ifindex, BPF_TC_EGRESS, &tc_opts);
if (detach_err)
fprintf(stderr,
"Failed removing tc program from interface %s: %s\n",