mirror of
https://github.com/xdp-project/bpf-examples.git
synced 2024-05-06 15:54:53 +00:00
pping: Add tc ingress hook as alternative to XDP
For some machines, XDP may not be suitable due to ex. lack of XDP support in NIC drivers or another program already being attached to the XDP hook on the desired interface. Therefore, add an option to use the tc-ingress hook instead of XDP to attach the pping ingress BPF program on. In practice, this adds an additional BPF program to the object file (a TC ingress program). To avoid loading an unnecessary BPF program, also explicitly disable autoloading for the ingress program not selected. Also, change the tc programs to return TC_ACT_OK instead of BPF_OK. While both should be compatible, the TC_ACT_* return codes seem to be more commonly used for TC-BPF programs. Concerns with this commit: - The error messages for XDP attach failure has gotten slightly less descriptive. I plan to improve the code for attaching and detaching XDP programs in a separate commit, and will then address that. Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
@@ -65,6 +65,8 @@ struct map_cleanup_args {
|
||||
// Store configuration values in struct to easily pass around
|
||||
struct pping_config {
|
||||
struct bpf_config bpf_config;
|
||||
struct bpf_tc_opts tc_ingress_opts;
|
||||
struct bpf_tc_opts tc_egress_opts;
|
||||
__u64 cleanup_interval;
|
||||
char *object_path;
|
||||
char *ingress_sec;
|
||||
@@ -91,6 +93,7 @@ static const struct option long_options[] = {
|
||||
{ "force", no_argument, NULL, 'f' }, // Detach any existing XDP program on interface
|
||||
{ "cleanup-interval", required_argument, NULL, 'c' }, // Map cleaning interval in s
|
||||
{ "format", required_argument, NULL, 'F' }, // Which format to output in (standard/json/ppviz)
|
||||
{ "ingress-hook", required_argument, NULL, 'I' }, // Use tc or XDP as ingress hook
|
||||
{ 0, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
@@ -145,7 +148,7 @@ static int parse_arguments(int argc, char *argv[], struct pping_config *config)
|
||||
config->json_format = false;
|
||||
config->ppviz_format = false;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "hfi:r:c:F:", long_options,
|
||||
while ((opt = getopt_long(argc, argv, "hfi:r:c:F:I:", long_options,
|
||||
NULL)) != -1) {
|
||||
switch (opt) {
|
||||
case 'i':
|
||||
@@ -192,6 +195,16 @@ static int parse_arguments(int argc, char *argv[], struct pping_config *config)
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
case 'I':
|
||||
if (strcmp(optarg, "xdp") == 0) {
|
||||
config->ingress_sec = SEC_INGRESS_XDP;
|
||||
} else if (strcmp(optarg, "tc") == 0) {
|
||||
config->ingress_sec = SEC_INGRESS_TC;
|
||||
} else {
|
||||
fprintf(stderr, "ingress-hook must be \"xdp\" or \"tc\"\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
case 'f':
|
||||
config->force = true;
|
||||
break;
|
||||
@@ -654,9 +667,29 @@ static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt)
|
||||
fprintf(stderr, "Lost %llu RTT events on CPU %d\n", lost_cnt, cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Sets only the necessary programs in the object file to autoload.
|
||||
*
|
||||
* Assumes all programs are set to autoload by default, so in practice
|
||||
* deactivates autoloading for the program that does not need to be loaded.
|
||||
*/
|
||||
static int set_programs_to_load(struct bpf_object *obj,
|
||||
struct pping_config *config)
|
||||
{
|
||||
struct bpf_program *prog;
|
||||
char *unload_sec = strcmp(SEC_INGRESS_XDP, config->ingress_sec) == 0 ?
|
||||
SEC_INGRESS_TC :
|
||||
SEC_INGRESS_XDP;
|
||||
|
||||
prog = bpf_object__find_program_by_title(obj, unload_sec);
|
||||
if (libbpf_get_error(prog))
|
||||
return libbpf_get_error(prog);
|
||||
|
||||
return bpf_program__set_autoload(prog, false);
|
||||
}
|
||||
|
||||
static int load_attach_bpfprogs(struct bpf_object **obj,
|
||||
struct pping_config *config,
|
||||
struct bpf_tc_opts *tc_opts)
|
||||
struct pping_config *config)
|
||||
{
|
||||
int err, detach_err;
|
||||
|
||||
@@ -677,38 +710,43 @@ static int load_attach_bpfprogs(struct bpf_object **obj,
|
||||
return err;
|
||||
}
|
||||
|
||||
set_programs_to_load(*obj, config);
|
||||
err = bpf_object__load(*obj);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed loading bpf program in %s: %s\n",
|
||||
fprintf(stderr, "Failed loading bpf programs in %s: %s\n",
|
||||
config->object_path, strerror(-err));
|
||||
return err;
|
||||
}
|
||||
|
||||
// Attach tc prog
|
||||
// Attach egress prog
|
||||
err = tc_attach(*obj, config->ifindex, BPF_TC_EGRESS,
|
||||
config->egress_sec, tc_opts);
|
||||
config->egress_sec, &config->tc_egress_opts);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"Failed attaching tc program on interface %s: %s\n",
|
||||
"Failed attaching egress BPF program on interface %s: %s\n",
|
||||
config->ifname, strerror(-err));
|
||||
return err;
|
||||
}
|
||||
|
||||
// Attach xdp prog
|
||||
err = xdp_attach(*obj, config->ingress_sec, config->ifindex,
|
||||
config->xdp_flags, config->force);
|
||||
// Attach ingress prog
|
||||
if (strcmp(config->ingress_sec, SEC_INGRESS_XDP) == 0)
|
||||
err = xdp_attach(*obj, config->ingress_sec, config->ifindex,
|
||||
config->xdp_flags, config->force);
|
||||
else
|
||||
err = tc_attach(*obj, config->ifindex, BPF_TC_INGRESS,
|
||||
config->ingress_sec, &config->tc_ingress_opts);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed attaching XDP program to %s%s: %s\n",
|
||||
config->ifname,
|
||||
config->force ? "" : ", ensure no XDP program is already running on interface",
|
||||
strerror(-err));
|
||||
goto err_xdp;
|
||||
fprintf(stderr,
|
||||
"Failed attaching ingress BPF program on interface %s: %s\n",
|
||||
config->ifname, strerror(-err));
|
||||
goto ingress_err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_xdp:
|
||||
detach_err = tc_detach(config->ifindex, BPF_TC_EGRESS, tc_opts);
|
||||
ingress_err:
|
||||
detach_err = tc_detach(config->ifindex, BPF_TC_EGRESS,
|
||||
&config->tc_egress_opts);
|
||||
if (detach_err)
|
||||
fprintf(stderr, "Failed detaching tc program from %s: %s\n",
|
||||
config->ifname, strerror(-detach_err));
|
||||
@@ -762,19 +800,23 @@ int main(int argc, char *argv[])
|
||||
.lost_cb = handle_missed_rtt_event,
|
||||
};
|
||||
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_ingress_opts);
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_egress_opts);
|
||||
|
||||
struct pping_config config = {
|
||||
.bpf_config = { .rate_limit = 100 * NS_PER_MS },
|
||||
.cleanup_interval = 1 * NS_PER_SECOND,
|
||||
.object_path = "pping_kern.o",
|
||||
.ingress_sec = INGRESS_PROG_SEC,
|
||||
.egress_sec = EGRESS_PROG_SEC,
|
||||
.ingress_sec = SEC_INGRESS_XDP,
|
||||
.egress_sec = SEC_EGRESS_TC,
|
||||
.packet_map = "packet_ts",
|
||||
.flow_map = "flow_state",
|
||||
.event_map = "events",
|
||||
.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST,
|
||||
.tc_ingress_opts = tc_ingress_opts,
|
||||
.tc_egress_opts = tc_egress_opts,
|
||||
};
|
||||
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_opts); // Need to keep track of where tc prog was attached
|
||||
print_event_func = print_event_standard;
|
||||
|
||||
// Detect if running as root
|
||||
@@ -807,7 +849,7 @@ int main(int argc, char *argv[])
|
||||
print_event_func = print_event_ppviz;
|
||||
}
|
||||
|
||||
err = load_attach_bpfprogs(&obj, &config, &tc_opts);
|
||||
err = load_attach_bpfprogs(&obj, &config);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"Failed loading and attaching BPF programs in %s\n",
|
||||
@@ -856,16 +898,21 @@ int main(int argc, char *argv[])
|
||||
perf_buffer__free(pb);
|
||||
|
||||
cleanup_attached_progs:
|
||||
detach_err = tc_detach(config.ifindex, BPF_TC_EGRESS, &tc_opts);
|
||||
detach_err = tc_detach(config.ifindex, BPF_TC_EGRESS,
|
||||
&config.tc_egress_opts);
|
||||
if (detach_err)
|
||||
fprintf(stderr,
|
||||
"Failed removing tc program from interface %s: %s\n",
|
||||
"Failed removing egress program from interface %s: %s\n",
|
||||
config.ifname, strerror(-detach_err));
|
||||
|
||||
detach_err = xdp_detach(config.ifindex, config.xdp_flags);
|
||||
if (strcmp(config.ingress_sec, SEC_INGRESS_XDP) == 0)
|
||||
detach_err = xdp_detach(config.ifindex, config.xdp_flags);
|
||||
else
|
||||
detach_err = tc_detach(config.ifindex, BPF_TC_INGRESS,
|
||||
&config.tc_ingress_opts);
|
||||
if (detach_err)
|
||||
fprintf(stderr,
|
||||
"Failed removing xdp program from interface %s: %s\n",
|
||||
"Failed removing ingress program from interface %s: %s\n",
|
||||
config.ifname, strerror(-detach_err));
|
||||
|
||||
return (err != 0 && keep_running) || detach_err != 0;
|
||||
|
||||
@@ -6,8 +6,9 @@
|
||||
#include <linux/in6.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define INGRESS_PROG_SEC "xdp"
|
||||
#define EGRESS_PROG_SEC "classifier"
|
||||
#define SEC_INGRESS_XDP "xdp"
|
||||
#define SEC_INGRESS_TC "classifier/ingress"
|
||||
#define SEC_EGRESS_TC "classifier/egress"
|
||||
|
||||
/* For the event_type members of rtt_event and flow_event */
|
||||
#define EVENT_TYPE_FLOW 1
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <linux/pkt_cls.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/in6.h>
|
||||
#include <linux/if_ether.h>
|
||||
@@ -267,27 +268,21 @@ static void fill_flow_event(struct flow_event *fe, __u64 timestamp,
|
||||
fe->reserved = 0; // Make sure it's initilized
|
||||
}
|
||||
|
||||
// Programs
|
||||
|
||||
// TC-BFP for parsing packet identifier from egress traffic and add to map
|
||||
SEC(EGRESS_PROG_SEC)
|
||||
int pping_egress(struct __sk_buff *skb)
|
||||
/*
|
||||
* Main function for handling the pping egress path.
|
||||
* Parses the packet for an identifer and attemps to store a timestamp for it
|
||||
* in the packet_ts map.
|
||||
*/
|
||||
static void pping_egress(void *ctx, struct parsing_context *pctx)
|
||||
{
|
||||
struct packet_id p_id = { 0 };
|
||||
struct flow_event fe;
|
||||
__u64 now;
|
||||
struct parsing_context pctx = {
|
||||
.data = (void *)(long)skb->data,
|
||||
.data_end = (void *)(long)skb->data_end,
|
||||
.pkt_len = skb->len,
|
||||
.nh = { .pos = pctx.data },
|
||||
.is_egress = true,
|
||||
};
|
||||
struct flow_state *f_state;
|
||||
struct flow_state new_state = { 0 };
|
||||
__u64 now;
|
||||
|
||||
if (parse_packet_identifier(&pctx, &p_id, &fe.event_info) < 0)
|
||||
goto out;
|
||||
if (parse_packet_identifier(pctx, &p_id, &fe.event_info) < 0)
|
||||
return;
|
||||
|
||||
now = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns
|
||||
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
|
||||
@@ -298,10 +293,10 @@ int pping_egress(struct __sk_buff *skb)
|
||||
bpf_map_delete_elem(&flow_state, &p_id.flow);
|
||||
fill_flow_event(&fe, now, &p_id.flow,
|
||||
EVENT_SOURCE_EGRESS);
|
||||
bpf_perf_event_output(skb, &events, BPF_F_CURRENT_CPU,
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
|
||||
&fe, sizeof(fe));
|
||||
}
|
||||
goto out;
|
||||
return;
|
||||
}
|
||||
|
||||
// No previous state - attempt to create it and push flow-opening event
|
||||
@@ -311,29 +306,29 @@ int pping_egress(struct __sk_buff *skb)
|
||||
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
|
||||
|
||||
if (!f_state) // Creation failed
|
||||
goto out;
|
||||
return;
|
||||
|
||||
if (fe.event_info.event != FLOW_EVENT_OPENING) {
|
||||
fe.event_info.event = FLOW_EVENT_OPENING;
|
||||
fe.event_info.reason = EVENT_REASON_FIRST_OBS_PCKT;
|
||||
}
|
||||
fill_flow_event(&fe, now, &p_id.flow, EVENT_SOURCE_EGRESS);
|
||||
bpf_perf_event_output(skb, &events, BPF_F_CURRENT_CPU, &fe,
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &fe,
|
||||
sizeof(fe));
|
||||
}
|
||||
|
||||
f_state->sent_pkts++;
|
||||
f_state->sent_bytes += remaining_pkt_payload(&pctx);
|
||||
f_state->sent_bytes += remaining_pkt_payload(pctx);
|
||||
|
||||
// Check if identfier is new
|
||||
if (f_state->last_id == p_id.identifier)
|
||||
goto out;
|
||||
return;
|
||||
f_state->last_id = p_id.identifier;
|
||||
|
||||
// Check rate-limit
|
||||
if (now < f_state->last_timestamp ||
|
||||
now - f_state->last_timestamp < config.rate_limit)
|
||||
goto out;
|
||||
return;
|
||||
|
||||
/*
|
||||
* Updates attempt at creating timestamp, even if creation of timestamp
|
||||
@@ -344,37 +339,32 @@ int pping_egress(struct __sk_buff *skb)
|
||||
f_state->last_timestamp = now;
|
||||
bpf_map_update_elem(&packet_ts, &p_id, &now, BPF_NOEXIST);
|
||||
|
||||
out:
|
||||
return BPF_OK;
|
||||
return;
|
||||
}
|
||||
|
||||
// XDP program for parsing identifier in ingress traffic and check for match in map
|
||||
SEC(INGRESS_PROG_SEC)
|
||||
int pping_ingress(struct xdp_md *ctx)
|
||||
/*
|
||||
* Main function for handling the pping ingress path.
|
||||
* Parses the packet for an identifer and tries to lookup a stored timestmap.
|
||||
* If it finds a match, it pushes an rtt_event to the events buffer.
|
||||
*/
|
||||
static void pping_ingress(void *ctx, struct parsing_context *pctx)
|
||||
{
|
||||
struct packet_id p_id = { 0 };
|
||||
__u64 *p_ts;
|
||||
struct flow_event fe;
|
||||
struct rtt_event re = { 0 };
|
||||
struct flow_state *f_state;
|
||||
struct parsing_context pctx = {
|
||||
.data = (void *)(long)ctx->data,
|
||||
.data_end = (void *)(long)ctx->data_end,
|
||||
.pkt_len = pctx.data_end - pctx.data,
|
||||
.nh = { .pos = pctx.data },
|
||||
.is_egress = false,
|
||||
};
|
||||
__u64 *p_ts;
|
||||
__u64 now;
|
||||
|
||||
if (parse_packet_identifier(&pctx, &p_id, &fe.event_info) < 0)
|
||||
goto out;
|
||||
if (parse_packet_identifier(pctx, &p_id, &fe.event_info) < 0)
|
||||
return;
|
||||
|
||||
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
|
||||
if (!f_state)
|
||||
goto out;
|
||||
return;
|
||||
|
||||
f_state->rec_pkts++;
|
||||
f_state->rec_bytes += remaining_pkt_payload(&pctx);
|
||||
f_state->rec_bytes += remaining_pkt_payload(pctx);
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
p_ts = bpf_map_lookup_elem(&packet_ts, &p_id);
|
||||
@@ -389,6 +379,7 @@ int pping_ingress(struct xdp_md *ctx)
|
||||
if (f_state->min_rtt == 0 || re.rtt < f_state->min_rtt)
|
||||
f_state->min_rtt = re.rtt;
|
||||
|
||||
// Fill event and push to perf-buffer
|
||||
re.event_type = EVENT_TYPE_RTT;
|
||||
re.timestamp = now;
|
||||
re.min_rtt = f_state->min_rtt;
|
||||
@@ -396,9 +387,7 @@ int pping_ingress(struct xdp_md *ctx)
|
||||
re.sent_bytes = f_state->sent_bytes;
|
||||
re.rec_pkts = f_state->rec_pkts;
|
||||
re.rec_bytes = f_state->rec_bytes;
|
||||
|
||||
// Push event to perf-buffer
|
||||
__builtin_memcpy(&re.flow, &p_id.flow, sizeof(struct network_tuple));
|
||||
re.flow = p_id.flow;
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &re, sizeof(re));
|
||||
|
||||
validflow_out:
|
||||
@@ -410,6 +399,58 @@ validflow_out:
|
||||
sizeof(fe));
|
||||
}
|
||||
|
||||
out:
|
||||
return;
|
||||
}
|
||||
|
||||
// Programs
|
||||
|
||||
// Egress path using TC-BPF
|
||||
SEC(SEC_EGRESS_TC)
|
||||
int pping_tc_egress(struct __sk_buff *skb)
|
||||
{
|
||||
struct parsing_context pctx = {
|
||||
.data = (void *)(long)skb->data,
|
||||
.data_end = (void *)(long)skb->data_end,
|
||||
.pkt_len = skb->len,
|
||||
.nh = { .pos = pctx.data },
|
||||
.is_egress = true,
|
||||
};
|
||||
|
||||
pping_egress(skb, &pctx);
|
||||
|
||||
return TC_ACT_OK;
|
||||
}
|
||||
|
||||
// Ingress path using TC-BPF
|
||||
SEC(SEC_INGRESS_TC)
|
||||
int pping_tc_ingress(struct __sk_buff *skb)
|
||||
{
|
||||
struct parsing_context pctx = {
|
||||
.data = (void *)(long)skb->data,
|
||||
.data_end = (void *)(long)skb->data_end,
|
||||
.pkt_len = skb->len,
|
||||
.nh = { .pos = pctx.data },
|
||||
.is_egress = false,
|
||||
};
|
||||
|
||||
pping_ingress(skb, &pctx);
|
||||
|
||||
return TC_ACT_OK;
|
||||
}
|
||||
|
||||
// Ingress path using XDP
|
||||
SEC(SEC_INGRESS_XDP)
|
||||
int pping_xdp_ingress(struct xdp_md *ctx)
|
||||
{
|
||||
struct parsing_context pctx = {
|
||||
.data = (void *)(long)ctx->data,
|
||||
.data_end = (void *)(long)ctx->data_end,
|
||||
.pkt_len = pctx.data_end - pctx.data,
|
||||
.nh = { .pos = pctx.data },
|
||||
.is_egress = false,
|
||||
};
|
||||
|
||||
pping_ingress(ctx, &pctx);
|
||||
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user