pping: Add tc ingress hook as alternative to XDP

For some machines, XDP may not be suitable due to ex. lack of XDP
support in NIC drivers or another program already being attached to
the XDP hook on the desired interface. Therefore, add an option to use
the tc-ingress hook instead of XDP to attach the pping ingress BPF
program on.

In practice, this adds an additional BPF program to the object file (a
TC ingress program). To avoid loading an unnecessary BPF program, also
explicitly disable autoloading for the ingress program not selected.

Also, change the tc programs to return TC_ACT_OK instead of
BPF_OK. While both should be compatible, the TC_ACT_* return codes
seem to be more commonly used for TC-BPF programs.

Concerns with this commit:
- The error messages for XDP attach failure has gotten slightly less
  descriptive. I plan to improve the code for attaching and detaching
  XDP programs in a separate commit, and will then address that.

Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
Simon Sundberg
2021-12-08 10:06:30 +01:00
parent bd29a246b9
commit 2f5c3fc5b0
3 changed files with 159 additions and 70 deletions

View File

@@ -65,6 +65,8 @@ struct map_cleanup_args {
// Store configuration values in struct to easily pass around
struct pping_config {
struct bpf_config bpf_config;
struct bpf_tc_opts tc_ingress_opts;
struct bpf_tc_opts tc_egress_opts;
__u64 cleanup_interval;
char *object_path;
char *ingress_sec;
@@ -91,6 +93,7 @@ static const struct option long_options[] = {
{ "force", no_argument, NULL, 'f' }, // Detach any existing XDP program on interface
{ "cleanup-interval", required_argument, NULL, 'c' }, // Map cleaning interval in s
{ "format", required_argument, NULL, 'F' }, // Which format to output in (standard/json/ppviz)
{ "ingress-hook", required_argument, NULL, 'I' }, // Use tc or XDP as ingress hook
{ 0, 0, NULL, 0 }
};
@@ -145,7 +148,7 @@ static int parse_arguments(int argc, char *argv[], struct pping_config *config)
config->json_format = false;
config->ppviz_format = false;
while ((opt = getopt_long(argc, argv, "hfi:r:c:F:", long_options,
while ((opt = getopt_long(argc, argv, "hfi:r:c:F:I:", long_options,
NULL)) != -1) {
switch (opt) {
case 'i':
@@ -192,6 +195,16 @@ static int parse_arguments(int argc, char *argv[], struct pping_config *config)
return -EINVAL;
}
break;
case 'I':
if (strcmp(optarg, "xdp") == 0) {
config->ingress_sec = SEC_INGRESS_XDP;
} else if (strcmp(optarg, "tc") == 0) {
config->ingress_sec = SEC_INGRESS_TC;
} else {
fprintf(stderr, "ingress-hook must be \"xdp\" or \"tc\"\n");
return -EINVAL;
}
break;
case 'f':
config->force = true;
break;
@@ -654,9 +667,29 @@ static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt)
fprintf(stderr, "Lost %llu RTT events on CPU %d\n", lost_cnt, cpu);
}
/*
* Sets only the necessary programs in the object file to autoload.
*
* Assumes all programs are set to autoload by default, so in practice
* deactivates autoloading for the program that does not need to be loaded.
*/
static int set_programs_to_load(struct bpf_object *obj,
struct pping_config *config)
{
struct bpf_program *prog;
char *unload_sec = strcmp(SEC_INGRESS_XDP, config->ingress_sec) == 0 ?
SEC_INGRESS_TC :
SEC_INGRESS_XDP;
prog = bpf_object__find_program_by_title(obj, unload_sec);
if (libbpf_get_error(prog))
return libbpf_get_error(prog);
return bpf_program__set_autoload(prog, false);
}
static int load_attach_bpfprogs(struct bpf_object **obj,
struct pping_config *config,
struct bpf_tc_opts *tc_opts)
struct pping_config *config)
{
int err, detach_err;
@@ -677,38 +710,43 @@ static int load_attach_bpfprogs(struct bpf_object **obj,
return err;
}
set_programs_to_load(*obj, config);
err = bpf_object__load(*obj);
if (err) {
fprintf(stderr, "Failed loading bpf program in %s: %s\n",
fprintf(stderr, "Failed loading bpf programs in %s: %s\n",
config->object_path, strerror(-err));
return err;
}
// Attach tc prog
// Attach egress prog
err = tc_attach(*obj, config->ifindex, BPF_TC_EGRESS,
config->egress_sec, tc_opts);
config->egress_sec, &config->tc_egress_opts);
if (err) {
fprintf(stderr,
"Failed attaching tc program on interface %s: %s\n",
"Failed attaching egress BPF program on interface %s: %s\n",
config->ifname, strerror(-err));
return err;
}
// Attach xdp prog
err = xdp_attach(*obj, config->ingress_sec, config->ifindex,
config->xdp_flags, config->force);
// Attach ingress prog
if (strcmp(config->ingress_sec, SEC_INGRESS_XDP) == 0)
err = xdp_attach(*obj, config->ingress_sec, config->ifindex,
config->xdp_flags, config->force);
else
err = tc_attach(*obj, config->ifindex, BPF_TC_INGRESS,
config->ingress_sec, &config->tc_ingress_opts);
if (err) {
fprintf(stderr, "Failed attaching XDP program to %s%s: %s\n",
config->ifname,
config->force ? "" : ", ensure no XDP program is already running on interface",
strerror(-err));
goto err_xdp;
fprintf(stderr,
"Failed attaching ingress BPF program on interface %s: %s\n",
config->ifname, strerror(-err));
goto ingress_err;
}
return 0;
err_xdp:
detach_err = tc_detach(config->ifindex, BPF_TC_EGRESS, tc_opts);
ingress_err:
detach_err = tc_detach(config->ifindex, BPF_TC_EGRESS,
&config->tc_egress_opts);
if (detach_err)
fprintf(stderr, "Failed detaching tc program from %s: %s\n",
config->ifname, strerror(-detach_err));
@@ -762,19 +800,23 @@ int main(int argc, char *argv[])
.lost_cb = handle_missed_rtt_event,
};
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_ingress_opts);
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_egress_opts);
struct pping_config config = {
.bpf_config = { .rate_limit = 100 * NS_PER_MS },
.cleanup_interval = 1 * NS_PER_SECOND,
.object_path = "pping_kern.o",
.ingress_sec = INGRESS_PROG_SEC,
.egress_sec = EGRESS_PROG_SEC,
.ingress_sec = SEC_INGRESS_XDP,
.egress_sec = SEC_EGRESS_TC,
.packet_map = "packet_ts",
.flow_map = "flow_state",
.event_map = "events",
.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST,
.tc_ingress_opts = tc_ingress_opts,
.tc_egress_opts = tc_egress_opts,
};
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_opts); // Need to keep track of where tc prog was attached
print_event_func = print_event_standard;
// Detect if running as root
@@ -807,7 +849,7 @@ int main(int argc, char *argv[])
print_event_func = print_event_ppviz;
}
err = load_attach_bpfprogs(&obj, &config, &tc_opts);
err = load_attach_bpfprogs(&obj, &config);
if (err) {
fprintf(stderr,
"Failed loading and attaching BPF programs in %s\n",
@@ -856,16 +898,21 @@ int main(int argc, char *argv[])
perf_buffer__free(pb);
cleanup_attached_progs:
detach_err = tc_detach(config.ifindex, BPF_TC_EGRESS, &tc_opts);
detach_err = tc_detach(config.ifindex, BPF_TC_EGRESS,
&config.tc_egress_opts);
if (detach_err)
fprintf(stderr,
"Failed removing tc program from interface %s: %s\n",
"Failed removing egress program from interface %s: %s\n",
config.ifname, strerror(-detach_err));
detach_err = xdp_detach(config.ifindex, config.xdp_flags);
if (strcmp(config.ingress_sec, SEC_INGRESS_XDP) == 0)
detach_err = xdp_detach(config.ifindex, config.xdp_flags);
else
detach_err = tc_detach(config.ifindex, BPF_TC_INGRESS,
&config.tc_ingress_opts);
if (detach_err)
fprintf(stderr,
"Failed removing xdp program from interface %s: %s\n",
"Failed removing ingress program from interface %s: %s\n",
config.ifname, strerror(-detach_err));
return (err != 0 && keep_running) || detach_err != 0;

View File

@@ -6,8 +6,9 @@
#include <linux/in6.h>
#include <stdbool.h>
#define INGRESS_PROG_SEC "xdp"
#define EGRESS_PROG_SEC "classifier"
#define SEC_INGRESS_XDP "xdp"
#define SEC_INGRESS_TC "classifier/ingress"
#define SEC_EGRESS_TC "classifier/egress"
/* For the event_type members of rtt_event and flow_event */
#define EVENT_TYPE_FLOW 1

View File

@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/pkt_cls.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/if_ether.h>
@@ -267,27 +268,21 @@ static void fill_flow_event(struct flow_event *fe, __u64 timestamp,
fe->reserved = 0; // Make sure it's initilized
}
// Programs
// TC-BFP for parsing packet identifier from egress traffic and add to map
SEC(EGRESS_PROG_SEC)
int pping_egress(struct __sk_buff *skb)
/*
* Main function for handling the pping egress path.
* Parses the packet for an identifer and attemps to store a timestamp for it
* in the packet_ts map.
*/
static void pping_egress(void *ctx, struct parsing_context *pctx)
{
struct packet_id p_id = { 0 };
struct flow_event fe;
__u64 now;
struct parsing_context pctx = {
.data = (void *)(long)skb->data,
.data_end = (void *)(long)skb->data_end,
.pkt_len = skb->len,
.nh = { .pos = pctx.data },
.is_egress = true,
};
struct flow_state *f_state;
struct flow_state new_state = { 0 };
__u64 now;
if (parse_packet_identifier(&pctx, &p_id, &fe.event_info) < 0)
goto out;
if (parse_packet_identifier(pctx, &p_id, &fe.event_info) < 0)
return;
now = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
@@ -298,10 +293,10 @@ int pping_egress(struct __sk_buff *skb)
bpf_map_delete_elem(&flow_state, &p_id.flow);
fill_flow_event(&fe, now, &p_id.flow,
EVENT_SOURCE_EGRESS);
bpf_perf_event_output(skb, &events, BPF_F_CURRENT_CPU,
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&fe, sizeof(fe));
}
goto out;
return;
}
// No previous state - attempt to create it and push flow-opening event
@@ -311,29 +306,29 @@ int pping_egress(struct __sk_buff *skb)
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
if (!f_state) // Creation failed
goto out;
return;
if (fe.event_info.event != FLOW_EVENT_OPENING) {
fe.event_info.event = FLOW_EVENT_OPENING;
fe.event_info.reason = EVENT_REASON_FIRST_OBS_PCKT;
}
fill_flow_event(&fe, now, &p_id.flow, EVENT_SOURCE_EGRESS);
bpf_perf_event_output(skb, &events, BPF_F_CURRENT_CPU, &fe,
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &fe,
sizeof(fe));
}
f_state->sent_pkts++;
f_state->sent_bytes += remaining_pkt_payload(&pctx);
f_state->sent_bytes += remaining_pkt_payload(pctx);
// Check if identfier is new
if (f_state->last_id == p_id.identifier)
goto out;
return;
f_state->last_id = p_id.identifier;
// Check rate-limit
if (now < f_state->last_timestamp ||
now - f_state->last_timestamp < config.rate_limit)
goto out;
return;
/*
* Updates attempt at creating timestamp, even if creation of timestamp
@@ -344,37 +339,32 @@ int pping_egress(struct __sk_buff *skb)
f_state->last_timestamp = now;
bpf_map_update_elem(&packet_ts, &p_id, &now, BPF_NOEXIST);
out:
return BPF_OK;
return;
}
// XDP program for parsing identifier in ingress traffic and check for match in map
SEC(INGRESS_PROG_SEC)
int pping_ingress(struct xdp_md *ctx)
/*
* Main function for handling the pping ingress path.
* Parses the packet for an identifer and tries to lookup a stored timestmap.
* If it finds a match, it pushes an rtt_event to the events buffer.
*/
static void pping_ingress(void *ctx, struct parsing_context *pctx)
{
struct packet_id p_id = { 0 };
__u64 *p_ts;
struct flow_event fe;
struct rtt_event re = { 0 };
struct flow_state *f_state;
struct parsing_context pctx = {
.data = (void *)(long)ctx->data,
.data_end = (void *)(long)ctx->data_end,
.pkt_len = pctx.data_end - pctx.data,
.nh = { .pos = pctx.data },
.is_egress = false,
};
__u64 *p_ts;
__u64 now;
if (parse_packet_identifier(&pctx, &p_id, &fe.event_info) < 0)
goto out;
if (parse_packet_identifier(pctx, &p_id, &fe.event_info) < 0)
return;
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
if (!f_state)
goto out;
return;
f_state->rec_pkts++;
f_state->rec_bytes += remaining_pkt_payload(&pctx);
f_state->rec_bytes += remaining_pkt_payload(pctx);
now = bpf_ktime_get_ns();
p_ts = bpf_map_lookup_elem(&packet_ts, &p_id);
@@ -389,6 +379,7 @@ int pping_ingress(struct xdp_md *ctx)
if (f_state->min_rtt == 0 || re.rtt < f_state->min_rtt)
f_state->min_rtt = re.rtt;
// Fill event and push to perf-buffer
re.event_type = EVENT_TYPE_RTT;
re.timestamp = now;
re.min_rtt = f_state->min_rtt;
@@ -396,9 +387,7 @@ int pping_ingress(struct xdp_md *ctx)
re.sent_bytes = f_state->sent_bytes;
re.rec_pkts = f_state->rec_pkts;
re.rec_bytes = f_state->rec_bytes;
// Push event to perf-buffer
__builtin_memcpy(&re.flow, &p_id.flow, sizeof(struct network_tuple));
re.flow = p_id.flow;
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &re, sizeof(re));
validflow_out:
@@ -410,6 +399,58 @@ validflow_out:
sizeof(fe));
}
out:
return;
}
// Programs
// Egress path using TC-BPF
SEC(SEC_EGRESS_TC)
int pping_tc_egress(struct __sk_buff *skb)
{
struct parsing_context pctx = {
.data = (void *)(long)skb->data,
.data_end = (void *)(long)skb->data_end,
.pkt_len = skb->len,
.nh = { .pos = pctx.data },
.is_egress = true,
};
pping_egress(skb, &pctx);
return TC_ACT_OK;
}
// Ingress path using TC-BPF
SEC(SEC_INGRESS_TC)
int pping_tc_ingress(struct __sk_buff *skb)
{
struct parsing_context pctx = {
.data = (void *)(long)skb->data,
.data_end = (void *)(long)skb->data_end,
.pkt_len = skb->len,
.nh = { .pos = pctx.data },
.is_egress = false,
};
pping_ingress(skb, &pctx);
return TC_ACT_OK;
}
// Ingress path using XDP
SEC(SEC_INGRESS_XDP)
int pping_xdp_ingress(struct xdp_md *ctx)
{
struct parsing_context pctx = {
.data = (void *)(long)ctx->data,
.data_end = (void *)(long)ctx->data_end,
.pkt_len = pctx.data_end - pctx.data,
.nh = { .pos = pctx.data },
.is_egress = false,
};
pping_ingress(ctx, &pctx);
return XDP_PASS;
}