mirror of
https://github.com/xdp-project/bpf-examples.git
synced 2024-05-06 15:54:53 +00:00
Merge pull request #1 from netoptimizer/MTU-tests01.public
MTU testing Simple TC-BPF program for testing different packets sizes that violate the MTU of the interface. This is part of testing upstream kernel work, for removing the MTU limit in the BPF-helpers that change packet size, and instead add BPF-helper that can check or lookup interface MTU.
This commit is contained in:
21
MTU-tests/Makefile
Normal file
21
MTU-tests/Makefile
Normal file
@@ -0,0 +1,21 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
|
||||
|
||||
USER_TARGETS :=
|
||||
BPF_TARGETS := tc_bpf_inc_pkt_size
|
||||
EXTRA_DEPS := encap.h
|
||||
|
||||
LIB_DIR = ../lib
|
||||
|
||||
include $(LIB_DIR)/common.mk
|
||||
|
||||
# The iproute2 'tc' tool doesn't understand BTF debug info
|
||||
# use llvm-strip to remove this debug info from object file
|
||||
#
|
||||
# *BUT* cannot strip everything as it removes ELF elems needed for
|
||||
# creating maps
|
||||
#
|
||||
.PHONY: strip_tc_obj
|
||||
strip_tc_obj: tc_bpf_inc_pkt_size.o
|
||||
$(Q) llvm-strip --no-strip-all --remove-section .BTF $?
|
||||
|
||||
all: strip_tc_obj
|
25
MTU-tests/README.org
Normal file
25
MTU-tests/README.org
Normal file
@@ -0,0 +1,25 @@
|
||||
#+OPTIONS: ^:nil
|
||||
|
||||
* BPF testing packet size changes
|
||||
|
||||
Playing with increasing packet size with TC-BPF. Testing what happens when
|
||||
sending packets larger that MTU out an interface.
|
||||
|
||||
* Notes loading BPF object
|
||||
|
||||
How to manually load TC program:
|
||||
|
||||
#+begin_src sh
|
||||
export DEV=mlx5p1
|
||||
tc qdisc del dev "$DEV" clsact # Also deletes all filters
|
||||
tc qdisc add dev "$DEV" clsact
|
||||
tc filter add dev "$DEV" pref 1 handle 1 egress bpf da obj tc_bpf_inc_pkt_size.o
|
||||
tc filter show dev "$DEV" egress
|
||||
#+end_src
|
||||
|
||||
Be *VERY* careful with =replace= command, it MUST have same =pref= +=handle=:
|
||||
|
||||
#+begin_src sh
|
||||
tc filter replace dev "$DEV" pref 1 handle 1 egress bpf da obj tc_bpf_inc_pkt_size.o
|
||||
#+end_src
|
||||
|
33
MTU-tests/encap.h
Normal file
33
MTU-tests/encap.h
Normal file
@@ -0,0 +1,33 @@
|
||||
|
||||
static __always_inline __u16 csum_fold_helper(__u32 csum)
|
||||
{
|
||||
__u32 sum;
|
||||
sum = (csum >> 16) + (csum & 0xffff);
|
||||
sum += (sum >> 16);
|
||||
return ~sum;
|
||||
}
|
||||
|
||||
static void encap_ipv4_ipip(volatile void *data, volatile void *data_end)
|
||||
{
|
||||
volatile struct iphdr *iph;
|
||||
size_t len;
|
||||
|
||||
struct iphdr encap_hdr = {
|
||||
.version = 4,
|
||||
.ihl = 5,
|
||||
.protocol = IPPROTO_IPIP,
|
||||
.ttl = 16,
|
||||
.saddr = bpf_htonl(0x0a0b0202),
|
||||
.daddr = bpf_htonl(0x0a0b0201),
|
||||
};
|
||||
|
||||
iph = data + sizeof(struct ethhdr);
|
||||
if (iph + 1 > data_end)
|
||||
return;
|
||||
|
||||
*iph = encap_hdr;
|
||||
|
||||
len = (data_end - data);
|
||||
iph->tot_len = bpf_htons(len - sizeof(struct ethhdr));
|
||||
iph->check = csum_fold_helper(bpf_csum_diff((__be32 *)iph, 0, (__be32 *)iph, sizeof(*iph), 0));
|
||||
}
|
131
MTU-tests/tc_bpf_inc_pkt_size.c
Normal file
131
MTU-tests/tc_bpf_inc_pkt_size.c
Normal file
@@ -0,0 +1,131 @@
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/in6.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <xdp/parsing_helpers.h>
|
||||
#include "encap.h"
|
||||
|
||||
/* Cycle through different MTU packet sizes, encoded in BPF-code via switch
|
||||
* statement. MTU is defined as L3 size (usually 1500 for Ethernet), but
|
||||
* remember TC (and XDP) operate at L2 (adjusted later)
|
||||
*/
|
||||
static __always_inline __u32 get_pkt_size_l3(__u64 cnt)
|
||||
{
|
||||
switch (cnt) {
|
||||
case 0:
|
||||
return 1024;
|
||||
case 1:
|
||||
return 1500;
|
||||
case 2:
|
||||
return 1504;
|
||||
case 3:
|
||||
return 1508;
|
||||
case 4:
|
||||
return 1600;
|
||||
case 5:
|
||||
return 4096 + 128;
|
||||
case 6:
|
||||
return 3520;
|
||||
case 7:
|
||||
return 3528;
|
||||
case 8:
|
||||
return 4096 - 14;
|
||||
case 9:
|
||||
return 4096;
|
||||
case 10:
|
||||
return 8192;
|
||||
case 11:
|
||||
return 16000;
|
||||
default:
|
||||
return 1500;
|
||||
}
|
||||
}
|
||||
#define CNT_MAX 12
|
||||
|
||||
/* The tc tool (iproute2) use another ELF map layout than libbpf, see
|
||||
* struct bpf_elf_map from iproute2, but bpf_map_def from libbpf have
|
||||
* same binary layout until "flags" so use that.
|
||||
*/
|
||||
struct bpf_map_def SEC("maps") cnt_map = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = sizeof(__u64),
|
||||
.max_entries = 1,
|
||||
.map_flags = 0,
|
||||
};
|
||||
|
||||
/* LLVM maps __sync_fetch_and_add() as a built-in function to the BPF atomic add
|
||||
* instruction (that is BPF_STX | BPF_XADD | BPF_W for word sizes)
|
||||
*/
|
||||
#ifndef lock_xadd
|
||||
#define lock_xadd(ptr, val) ((void) __sync_fetch_and_add(ptr, val))
|
||||
#endif
|
||||
|
||||
#define ENCAP_TYPE BPF_F_ADJ_ROOM_ENCAP_L3_IPV4
|
||||
|
||||
SEC("classifier") int tc_inc_pkt_sz(struct __sk_buff *skb)
|
||||
{
|
||||
volatile void *data, *data_end;
|
||||
int ret = BPF_DROP;
|
||||
struct ethhdr *eth;
|
||||
struct iphdr *iph;
|
||||
int extra_len;
|
||||
int len;
|
||||
|
||||
int key = 0;
|
||||
__u64 *cnt;
|
||||
|
||||
cnt = bpf_map_lookup_elem(&cnt_map, &key);
|
||||
if (!cnt)
|
||||
goto out;
|
||||
|
||||
/* Desired packet size at L2 */
|
||||
int pkt_size_l2 = get_pkt_size_l3(*cnt) + sizeof(*eth) ;
|
||||
|
||||
data = (void *)(long)skb->data;
|
||||
data_end = (void *)(long)skb->data_end;
|
||||
eth = (struct ethhdr *)data;
|
||||
|
||||
if (data + sizeof(*eth) > data_end)
|
||||
return BPF_DROP;
|
||||
|
||||
/* Keep ARP resolution working */
|
||||
if (eth->h_proto == bpf_htons(ETH_P_ARP)) {
|
||||
ret = BPF_OK;
|
||||
goto out;
|
||||
}
|
||||
|
||||
len = (data_end - data);
|
||||
extra_len = pkt_size_l2 - len;
|
||||
// extra_len= sizeof(*iph); /* Adj that does correct IPIP encap */
|
||||
|
||||
/* Wrapping global counter */
|
||||
lock_xadd(cnt, 1);
|
||||
if (*cnt == CNT_MAX)
|
||||
*cnt = 0;
|
||||
|
||||
if (bpf_skb_adjust_room(skb, extra_len, BPF_ADJ_ROOM_MAC, ENCAP_TYPE)) {
|
||||
/* If adjust fails, then skip this packet length adjustment */
|
||||
ret = BPF_OK;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Most re-load after bpf_skb_adjust_room() */
|
||||
data = (void *)(long)skb->data;
|
||||
data_end = (void *)(long)skb->data_end;
|
||||
|
||||
/* Add IP-header with IPIP */
|
||||
encap_ipv4_ipip(data, data_end);
|
||||
|
||||
eth = (void *)data;
|
||||
iph = (void *)(eth +1);
|
||||
if (iph +1 > data_end)
|
||||
goto out;
|
||||
|
||||
eth->h_proto = bpf_htons(ETH_P_IP);
|
||||
|
||||
ret = BPF_OK;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
Reference in New Issue
Block a user