Add pkt-loop-filter example

Add an example to filter looping packets on (for instance) a bond interface, by recording the egress MAC+VLAN and dropping any packets that come in on other (related) interfaces with the same MAC+VLAN. Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
2024-05-06 15:54:53 +00:00 · 2022-05-20 14:53:14 +02:00
parent daefd11178
commit 070715cf1c
6 changed files with 276 additions and 0 deletions
--- a/pkt-loop-filter/.gitignore
+++ b/pkt-loop-filter/.gitignore
@@ -0,0 +1 @@
+pkt-loop-filter
--- a/pkt-loop-filter/Makefile
+++ b/pkt-loop-filter/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+USER_TARGETS    += pkt-loop-filter
+BPF_TARGETS    += pkt-loop-filter.kern
+EXTRA_DEPS := pkt-loop-filter.h
+
+LIB_DIR = ../lib
+
+include $(LIB_DIR)/common.mk
--- a/pkt-loop-filter/README.org
+++ b/pkt-loop-filter/README.org
@@ -0,0 +1,23 @@
+* Packet loop filter
+
+This example shows how to filter looping packets, for example when two bond
+interfaces are attached to a switch that loops packets back through the other
+bond interface. It works by attaching ingress and egress TC filters to one or
+more interfaces, and keeping track of the source VLAN+MAC on every packet going
+out any of the included interfaces. If a packet comes back in with a source
+MAC+VLAN that was already seen on egress, that packet is simply dropped (subject
+to a 10-second expiry time).
+
+To load, simply execute the userspace binary with all interface names to load
+the filter to; all interfaces loaded this way will share the same map, so the
+filter will work across all of them. For instance, if a bond interface is using
+underlying veth0 and veth1, execute =./pkt-loop-filter veth0 veth1= to enable
+the filter on packets looping through the two.
+
+To unload, add the =--unload= parameter to the userspace utility. Note that the
+same set of interfaces should be supplied on load and unload; the tool doesn't
+check for this so if it's not, the unload will only be partial.
+
+The BPF programs will record which interface a given source MAC+VLAN was last
+seen on, as well as the number of packets dropped for that MAC+VLAN. These
+statistics are kept in the BPF map and can be dumped using =bpftool=.
--- a/pkt-loop-filter/pkt-loop-filter.c
+++ b/pkt-loop-filter/pkt-loop-filter.c
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <net/if.h>
+#include <linux/if_arp.h>
+
+#include <bpf/libbpf.h>
+
+#define MAX_IFINDEXES 10
+
+int main(int argc, char *argv[])
+{
+	int err = 0, i, num_ifindexes = 0, _err, ingress_fd, egress_fd;
+	const char *filename = "pkt-loop-filter.kern.o";
+	int ifindex[MAX_IFINDEXES];
+	struct bpf_object *obj;
+	bool unload = false;
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
+
+	if (argc < 2) {
+		fprintf(stderr, "Usage: %s <ifname> [..ifname] [--unload]\n", argv[0]);
+		return 1;
+	}
+
+	for (i = 0; i < MAX_IFINDEXES; i++) {
+		char *ifname = argv[i+1];
+
+		if (i + 1 >= argc)
+			break;
+
+		if (!strcmp(ifname, "--unload")) {
+			unload = true;
+			continue;
+		}
+
+		ifindex[num_ifindexes] = if_nametoindex(ifname);
+		if (!ifindex[num_ifindexes]) {
+			fprintf(stderr, "Couldn't find interface '%s'\n", ifname);
+			return 1;
+		}
+		num_ifindexes++;
+	}
+
+	if (!num_ifindexes) {
+		fprintf(stderr, "Need at least one interface name\n");
+		return 1;
+	}
+
+	if (unload)
+		goto unload;
+
+	obj = bpf_object__open(filename);
+	err = libbpf_get_error(obj);
+	if (err) {
+		fprintf(stderr, "Couldn't open file: %s\n", filename);
+		return err;
+	}
+
+	err = bpf_object__load(obj);
+	if (err) {
+		fprintf(stderr, "Failed to load object\n");
+		goto out;
+	}
+
+	egress_fd = bpf_program__fd(bpf_object__find_program_by_name(obj, "record_egress_pkt"));
+	if (egress_fd < 0) {
+		fprintf(stderr, "Couldn't find program 'record_egress_pkt'\n");
+		err = -ENOENT;
+		goto out;
+	}
+
+	ingress_fd = bpf_program__fd(bpf_object__find_program_by_name(obj, "filter_ingress_pkt"));
+	if (ingress_fd < 0) {
+		fprintf(stderr, "Couldn't find program 'filter_ingress_pkt'\n");
+		err = -ENOENT;
+		goto out;
+	}
+
+	for (i = 0; i < num_ifindexes; i++) {
+		DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_egress,
+				    .prog_fd = egress_fd);
+		DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_ingress,
+				    .prog_fd = ingress_fd);
+		char ifname[IF_NAMESIZE];
+
+		if (!if_indextoname(ifindex[i], ifname)) {
+			err = -errno;
+			perror("if_indextoname");
+			goto out;
+		}
+
+		hook.ifindex = ifindex[i];
+		hook.attach_point = BPF_TC_EGRESS | BPF_TC_INGRESS;
+		err = bpf_tc_hook_create(&hook);
+		if (err && err != -EEXIST) {
+			fprintf(stderr, "Couldn't create egress hook for interface %s\n", ifname);
+			goto unload;
+		}
+
+		hook.attach_point = BPF_TC_EGRESS;
+		err = bpf_tc_attach(&hook, &attach_egress);
+		if (err) {
+			fprintf(stderr, "Couldn't attach egress program to interface %s: %s\n", ifname, strerror(errno));
+			goto unload;
+		}
+
+		hook.attach_point = BPF_TC_INGRESS;
+		err = bpf_tc_attach(&hook, &attach_ingress);
+		if (err) {
+			fprintf(stderr, "Couldn't attach ingress program to interface %s: %s\n", ifname, strerror(errno));
+			goto unload;
+		}
+	}
+
+
+out:
+	bpf_object__close(obj);
+	return err;
+
+unload:
+	for (i = 0; i < num_ifindexes; i++) {
+		char ifname[IF_NAMESIZE];
+
+		hook.ifindex = ifindex[i];
+		hook.attach_point = BPF_TC_EGRESS | BPF_TC_INGRESS;
+		_err = bpf_tc_hook_destroy(&hook);
+		if (_err) {
+			fprintf(stderr, "Couldn't remove clsact qdisc on %s\n",
+				if_indextoname(ifindex[i], ifname));
+			err = _err;
+		}
+
+	}
+	return err;
+}
--- a/pkt-loop-filter/pkt-loop-filter.h
+++ b/pkt-loop-filter/pkt-loop-filter.h
@@ -0,0 +1,19 @@
+#ifndef __PKT_LOOP_FILTER_H__
+#define __PKT_LOOP_FILTER_H__
+
+#define NS_PER_SEC 1000000000ULL
+#define STATE_LIFETIME (10 * NS_PER_SEC)
+
+struct pkt_loop_key {
+        __u8 src_mac[6];
+        __u16 src_vlan;
+};
+
+struct pkt_loop_data {
+        __u64 expiry_time;
+        __u32 ifindex;
+        __u32 drops;
+
+};
+
+#endif
--- a/pkt-loop-filter/pkt-loop-filter.kern.c
+++ b/pkt-loop-filter/pkt-loop-filter.kern.c
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <xdp/parsing_helpers.h>
+#include <linux/pkt_cls.h>
+
+#include "pkt-loop-filter.h"
+
+/* We use an LRU map to avoid having to do cleanup: We just rely on the LRU
+ * mechanism to evict old entries as the map fills up.
+ */
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__type(key, struct pkt_loop_key);
+	__type(value, struct pkt_loop_data);
+	__uint(max_entries, 16384);
+} iface_state SEC(".maps");
+
+static int parse_pkt(struct __sk_buff *skb, struct pkt_loop_key *key)
+{
+	void *data_end = (void *)(unsigned long long)skb->data_end;
+	void *data = (void *)(unsigned long long)skb->data;
+	struct hdr_cursor nh = { .pos = data };
+	struct ethhdr *eth;
+	int eth_type;
+
+	/* Parse Ethernet and IP/IPv6 headers */
+	eth_type = parse_ethhdr(&nh, data_end, &eth);
+	if (eth_type < 0)
+		return eth_type;
+
+	__builtin_memcpy(key->src_mac, eth->h_source, ETH_ALEN);
+	key->src_vlan = skb->vlan_tci;
+
+	return 0;
+}
+
+SEC("tc")
+int record_egress_pkt(struct __sk_buff *skb)
+{
+	struct pkt_loop_data value = { .ifindex = skb->ifindex }, *v;
+	struct pkt_loop_key key;
+	int err;
+
+	err = parse_pkt(skb, &key);
+	if (err)
+		goto out;
+
+	v = bpf_map_lookup_elem(&iface_state, &key);
+	if (!v) {
+		bpf_map_update_elem(&iface_state, &key, &value, BPF_NOEXIST);
+		v = bpf_map_lookup_elem(&iface_state, &key);
+		if (!v)
+			goto out;
+	}
+	v->expiry_time = bpf_ktime_get_coarse_ns() + STATE_LIFETIME;
+	v->ifindex = skb->ifindex;
+out:
+	return TC_ACT_OK;
+}
+
+SEC("tc")
+int filter_ingress_pkt(struct __sk_buff *skb)
+{
+	struct pkt_loop_data *value;
+	struct pkt_loop_key key;
+	int err;
+
+	err = parse_pkt(skb, &key);
+	if (err)
+		goto out;
+
+	value = bpf_map_lookup_elem(&iface_state, &key);
+	if (value && value->expiry_time > bpf_ktime_get_coarse_ns()) {
+		value->drops++;
+		return TC_ACT_SHOT;
+
+	}
+
+out:
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";