Add pkt-loop-filter example

Add an example to filter looping packets on (for instance) a bond
interface, by recording the egress MAC+VLAN and dropping any packets that
come in on other (related) interfaces with the same MAC+VLAN.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
This commit is contained in:
Toke Høiland-Jørgensen
2022-05-20 14:53:14 +02:00
parent daefd11178
commit 070715cf1c
6 changed files with 276 additions and 0 deletions

1
pkt-loop-filter/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
pkt-loop-filter

9
pkt-loop-filter/Makefile Normal file
View File

@@ -0,0 +1,9 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
USER_TARGETS += pkt-loop-filter
BPF_TARGETS += pkt-loop-filter.kern
EXTRA_DEPS := pkt-loop-filter.h
LIB_DIR = ../lib
include $(LIB_DIR)/common.mk

View File

@@ -0,0 +1,23 @@
* Packet loop filter
This example shows how to filter looping packets, for example when two bond
interfaces are attached to a switch that loops packets back through the other
bond interface. It works by attaching ingress and egress TC filters to one or
more interfaces, and keeping track of the source VLAN+MAC on every packet going
out any of the included interfaces. If a packet comes back in with a source
MAC+VLAN that was already seen on egress, that packet is simply dropped (subject
to a 10-second expiry time).
To load, simply execute the userspace binary with all interface names to load
the filter to; all interfaces loaded this way will share the same map, so the
filter will work across all of them. For instance, if a bond interface is using
underlying veth0 and veth1, execute =./pkt-loop-filter veth0 veth1= to enable
the filter on packets looping through the two.
To unload, add the =--unload= parameter to the userspace utility. Note that the
same set of interfaces should be supplied on load and unload; the tool doesn't
check for this so if it's not, the unload will only be partial.
The BPF programs will record which interface a given source MAC+VLAN was last
seen on, as well as the number of packets dropped for that MAC+VLAN. These
statistics are kept in the BPF map and can be dumped using =bpftool=.

View File

@@ -0,0 +1,140 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <net/if.h>
#include <linux/if_arp.h>
#include <bpf/libbpf.h>
#define MAX_IFINDEXES 10
int main(int argc, char *argv[])
{
int err = 0, i, num_ifindexes = 0, _err, ingress_fd, egress_fd;
const char *filename = "pkt-loop-filter.kern.o";
int ifindex[MAX_IFINDEXES];
struct bpf_object *obj;
bool unload = false;
DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
if (argc < 2) {
fprintf(stderr, "Usage: %s <ifname> [..ifname] [--unload]\n", argv[0]);
return 1;
}
for (i = 0; i < MAX_IFINDEXES; i++) {
char *ifname = argv[i+1];
if (i + 1 >= argc)
break;
if (!strcmp(ifname, "--unload")) {
unload = true;
continue;
}
ifindex[num_ifindexes] = if_nametoindex(ifname);
if (!ifindex[num_ifindexes]) {
fprintf(stderr, "Couldn't find interface '%s'\n", ifname);
return 1;
}
num_ifindexes++;
}
if (!num_ifindexes) {
fprintf(stderr, "Need at least one interface name\n");
return 1;
}
if (unload)
goto unload;
obj = bpf_object__open(filename);
err = libbpf_get_error(obj);
if (err) {
fprintf(stderr, "Couldn't open file: %s\n", filename);
return err;
}
err = bpf_object__load(obj);
if (err) {
fprintf(stderr, "Failed to load object\n");
goto out;
}
egress_fd = bpf_program__fd(bpf_object__find_program_by_name(obj, "record_egress_pkt"));
if (egress_fd < 0) {
fprintf(stderr, "Couldn't find program 'record_egress_pkt'\n");
err = -ENOENT;
goto out;
}
ingress_fd = bpf_program__fd(bpf_object__find_program_by_name(obj, "filter_ingress_pkt"));
if (ingress_fd < 0) {
fprintf(stderr, "Couldn't find program 'filter_ingress_pkt'\n");
err = -ENOENT;
goto out;
}
for (i = 0; i < num_ifindexes; i++) {
DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_egress,
.prog_fd = egress_fd);
DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_ingress,
.prog_fd = ingress_fd);
char ifname[IF_NAMESIZE];
if (!if_indextoname(ifindex[i], ifname)) {
err = -errno;
perror("if_indextoname");
goto out;
}
hook.ifindex = ifindex[i];
hook.attach_point = BPF_TC_EGRESS | BPF_TC_INGRESS;
err = bpf_tc_hook_create(&hook);
if (err && err != -EEXIST) {
fprintf(stderr, "Couldn't create egress hook for interface %s\n", ifname);
goto unload;
}
hook.attach_point = BPF_TC_EGRESS;
err = bpf_tc_attach(&hook, &attach_egress);
if (err) {
fprintf(stderr, "Couldn't attach egress program to interface %s: %s\n", ifname, strerror(errno));
goto unload;
}
hook.attach_point = BPF_TC_INGRESS;
err = bpf_tc_attach(&hook, &attach_ingress);
if (err) {
fprintf(stderr, "Couldn't attach ingress program to interface %s: %s\n", ifname, strerror(errno));
goto unload;
}
}
out:
bpf_object__close(obj);
return err;
unload:
for (i = 0; i < num_ifindexes; i++) {
char ifname[IF_NAMESIZE];
hook.ifindex = ifindex[i];
hook.attach_point = BPF_TC_EGRESS | BPF_TC_INGRESS;
_err = bpf_tc_hook_destroy(&hook);
if (_err) {
fprintf(stderr, "Couldn't remove clsact qdisc on %s\n",
if_indextoname(ifindex[i], ifname));
err = _err;
}
}
return err;
}

View File

@@ -0,0 +1,19 @@
#ifndef __PKT_LOOP_FILTER_H__
#define __PKT_LOOP_FILTER_H__
#define NS_PER_SEC 1000000000ULL
#define STATE_LIFETIME (10 * NS_PER_SEC)
struct pkt_loop_key {
__u8 src_mac[6];
__u16 src_vlan;
};
struct pkt_loop_data {
__u64 expiry_time;
__u32 ifindex;
__u32 drops;
};
#endif

View File

@@ -0,0 +1,84 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <xdp/parsing_helpers.h>
#include <linux/pkt_cls.h>
#include "pkt-loop-filter.h"
/* We use an LRU map to avoid having to do cleanup: We just rely on the LRU
* mechanism to evict old entries as the map fills up.
*/
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, struct pkt_loop_key);
__type(value, struct pkt_loop_data);
__uint(max_entries, 16384);
} iface_state SEC(".maps");
static int parse_pkt(struct __sk_buff *skb, struct pkt_loop_key *key)
{
void *data_end = (void *)(unsigned long long)skb->data_end;
void *data = (void *)(unsigned long long)skb->data;
struct hdr_cursor nh = { .pos = data };
struct ethhdr *eth;
int eth_type;
/* Parse Ethernet and IP/IPv6 headers */
eth_type = parse_ethhdr(&nh, data_end, &eth);
if (eth_type < 0)
return eth_type;
__builtin_memcpy(key->src_mac, eth->h_source, ETH_ALEN);
key->src_vlan = skb->vlan_tci;
return 0;
}
SEC("tc")
int record_egress_pkt(struct __sk_buff *skb)
{
struct pkt_loop_data value = { .ifindex = skb->ifindex }, *v;
struct pkt_loop_key key;
int err;
err = parse_pkt(skb, &key);
if (err)
goto out;
v = bpf_map_lookup_elem(&iface_state, &key);
if (!v) {
bpf_map_update_elem(&iface_state, &key, &value, BPF_NOEXIST);
v = bpf_map_lookup_elem(&iface_state, &key);
if (!v)
goto out;
}
v->expiry_time = bpf_ktime_get_coarse_ns() + STATE_LIFETIME;
v->ifindex = skb->ifindex;
out:
return TC_ACT_OK;
}
SEC("tc")
int filter_ingress_pkt(struct __sk_buff *skb)
{
struct pkt_loop_data *value;
struct pkt_loop_key key;
int err;
err = parse_pkt(skb, &key);
if (err)
goto out;
value = bpf_map_lookup_elem(&iface_state, &key);
if (value && value->expiry_time > bpf_ktime_get_coarse_ns()) {
value->drops++;
return TC_ACT_SHOT;
}
out:
return TC_ACT_OK;
}
char _license[] SEC("license") = "GPL";