mirror of
https://github.com/xdp-project/bpf-examples.git
synced 2024-05-06 15:54:53 +00:00
Adding a basic TC eBPF Qdisc classifier example
This example demonstrates how to write a simple eBPF Qdisc classifier that classifies flows depending on their destination TCP port. The example script, runner.sh shows how you can use the eBPF Qdisc classifier and implement the same functionality using u32. The script creates two network namespaces called Left and Right, representing two different hosts. The script then illustrates the classifiers in action using iperf3 by starting clients on the Left namespace that connect to iperf3 servers on the Right namespace. The Qdisc classifiers give TCP ports 8080 and 8081 a high rate limit, while TCP port 8082 represents all other traffic capped at 20 Mbps. Signed-off-by: Frey Alfredsson <freysteinn@freysteinn.com>
This commit is contained in:
31
tc-basic-classifier/Makefile
Normal file
31
tc-basic-classifier/Makefile
Normal file
@ -0,0 +1,31 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
|
||||
|
||||
TC_BPF_TARGETS := filter
|
||||
BPF_TARGETS += $(TC_BPF_TARGETS)
|
||||
|
||||
EXTRA_DEPS += config.mk
|
||||
|
||||
LIB_DIR = ../lib
|
||||
|
||||
include $(LIB_DIR)/common.mk
|
||||
include config.mk
|
||||
|
||||
all: config.mk
|
||||
|
||||
config.mk: configure
|
||||
@sh configure
|
||||
|
||||
ifndef HAVE_TC_LIBBPF
|
||||
# If the iproute2 'tc' tool doesn't understand BTF debug info
|
||||
# use llvm-strip to remove this debug info from object file
|
||||
#
|
||||
# *BUT* cannot strip everything as it removes ELF elems needed for
|
||||
# creating maps
|
||||
#
|
||||
.PHONY: strip_tc_obj
|
||||
strip_tc_obj: ${TC_BPF_TARGETS:=.o}
|
||||
$(Q) echo "TC don't support libbpf - strip BTF info"
|
||||
$(Q) llvm-strip --no-strip-all --remove-section .BTF $?
|
||||
|
||||
all: strip_tc_obj
|
||||
endif
|
66
tc-basic-classifier/README.org
Normal file
66
tc-basic-classifier/README.org
Normal file
@ -0,0 +1,66 @@
|
||||
#+TITLE: eBPF Qdisc classifier example
|
||||
|
||||
This example demonstrates how to write a simple eBPF Qdisc classifier that
|
||||
classifies flows depending on their destination TCP port. The example script,
|
||||
runner.sh shows how you can use the eBPF Qdisc classifier and implement the same
|
||||
functionality using u32. The script creates two network namespaces called
|
||||
Left and Right, representing two different hosts, as seen in Figure 1. The
|
||||
script then illustrates the classifiers in action using iperf3.
|
||||
|
||||
#+CAPTION: The figure depicts the network and classifier setup of the two network namespaces setup provided by the runner.sh script.
|
||||
#+NAME: fig:Figure 1
|
||||
[[./overview.png]]
|
||||
|
||||
The Left namespace loads a Qdisc classifier that rate-limit TCP ports 8080 and
|
||||
8082 to get a higher rate than default traffic. The runner.sh script shows the
|
||||
higher rate limits by connecting to both target ports and TCP port 8082 to
|
||||
establish the default rate limit of 20 Mbps.
|
||||
|
||||
To run the application, choose either the "bpf" or "u32" parameters:
|
||||
|
||||
#+BEGIN_SRC bash
|
||||
[root@bpfexamples]# ./runner.sh bpf
|
||||
bash-5.0# ./runner.sh bpf
|
||||
Starting setup
|
||||
Starting iperf3
|
||||
Connecting to host 172.16.16.20, port 8080
|
||||
[ 5] local 172.16.16.10 port 56332 connected to 172.16.16.20 port 8080
|
||||
[ ID] Interval Transfer Bitrate Retr Cwnd
|
||||
[ 5] 0.00-1.00 sec 9.51 MBytes 79.8 Mbits/sec 0 86.3 KBytes
|
||||
[ 5] 1.00-2.00 sec 9.38 MBytes 78.7 Mbits/sec 0 112 KBytes
|
||||
[ 5] 2.00-3.00 sec 8.95 MBytes 75.1 Mbits/sec 0 112 KBytes
|
||||
[ 5] 3.00-4.00 sec 9.20 MBytes 77.2 Mbits/sec 0 112 KBytes
|
||||
- - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
[ ID] Interval Transfer Bitrate Retr
|
||||
[ 5] 0.00-4.00 sec 37.0 MBytes 77.7 Mbits/sec 0 sender
|
||||
[ 5] 0.00-4.01 sec 36.5 MBytes 76.4 Mbits/sec receiver
|
||||
|
||||
iperf Done.
|
||||
Connecting to host 172.16.16.20, port 8081
|
||||
[ 5] local 172.16.16.10 port 45084 connected to 172.16.16.20 port 8081
|
||||
[ ID] Interval Transfer Bitrate Retr Cwnd
|
||||
[ 5] 0.00-1.00 sec 4.92 MBytes 41.3 Mbits/sec 0 62.2 KBytes
|
||||
[ 5] 1.00-2.00 sec 4.47 MBytes 37.5 Mbits/sec 0 62.2 KBytes
|
||||
[ 5] 2.00-3.00 sec 4.66 MBytes 39.1 Mbits/sec 0 62.2 KBytes
|
||||
[ 5] 3.00-4.00 sec 4.47 MBytes 37.5 Mbits/sec 0 62.2 KBytes
|
||||
- - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
[ ID] Interval Transfer Bitrate Retr
|
||||
[ 5] 0.00-4.00 sec 18.5 MBytes 38.9 Mbits/sec 0 sender
|
||||
[ 5] 0.00-4.01 sec 18.3 MBytes 38.2 Mbits/sec receiver
|
||||
|
||||
iperf Done.
|
||||
Connecting to host 172.16.16.20, port 8082
|
||||
[ 5] local 172.16.16.10 port 33196 connected to 172.16.16.20 port 8082
|
||||
[ ID] Interval Transfer Bitrate Retr Cwnd
|
||||
[ 5] 0.00-1.00 sec 2.94 MBytes 24.6 Mbits/sec 0 153 KBytes
|
||||
[ 5] 1.00-2.00 sec 2.49 MBytes 20.9 Mbits/sec 0 153 KBytes
|
||||
[ 5] 2.00-3.00 sec 2.17 MBytes 18.2 Mbits/sec 0 153 KBytes
|
||||
[ 5] 3.00-4.00 sec 2.17 MBytes 18.2 Mbits/sec 0 153 KBytes
|
||||
- - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
[ ID] Interval Transfer Bitrate Retr
|
||||
[ 5] 0.00-4.00 sec 9.77 MBytes 20.5 Mbits/sec 0 sender
|
||||
[ 5] 0.00-4.01 sec 9.14 MBytes 19.1 Mbits/sec receiver
|
||||
|
||||
iperf Done.
|
||||
[root@bpfexamples]# ./runner.sh bpf
|
||||
#+END_SRC
|
29
tc-basic-classifier/configure
vendored
Executable file
29
tc-basic-classifier/configure
vendored
Executable file
@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
|
||||
# This is not an autoconf generated configure
|
||||
#
|
||||
|
||||
# Output file which is input to Makefile
|
||||
CONFIG=config.mk
|
||||
|
||||
# Assume tc is in $PATH
|
||||
TC=tc
|
||||
|
||||
check_tc_libbpf()
|
||||
{
|
||||
tc_version=$($TC -V)
|
||||
if echo $tc_version | grep -q libbpf; then
|
||||
libbpf_version=${tc_version##*libbpf }
|
||||
echo "HAVE_TC_LIBBPF:=y" >> $CONFIG
|
||||
echo "BPF_CFLAGS += -DHAVE_TC_LIBBPF" >> $CONFIG
|
||||
echo "yes ($libbpf_version)"
|
||||
else
|
||||
echo "no"
|
||||
fi
|
||||
}
|
||||
|
||||
echo "# Generated config" > $CONFIG
|
||||
echo "Detecting available features on system"
|
||||
|
||||
echo -n " - libbpf support in tc tool: "
|
||||
check_tc_libbpf
|
70
tc-basic-classifier/filter.c
Normal file
70
tc-basic-classifier/filter.c
Normal file
@ -0,0 +1,70 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright 2021 Frey Alfredsson <freysteinn@freysteinn.com> */
|
||||
/* Based on code by Jesper Dangaard Brouer <brouer@redhat.com> */
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <linux/pkt_sched.h>
|
||||
#include <linux/pkt_cls.h>
|
||||
#include "../include/xdp/parsing_helpers.h"
|
||||
|
||||
/*
|
||||
* This example eBPF code mirrors the TC u32 rules set in the runner.sh
|
||||
* script, where the script gives different rate limits depending on if the TCP
|
||||
* traffic is for ports 8080 or 8081. It must be loaded with the direct-action
|
||||
* flag on TC to function, as this is a Qdisc classifier, not a Qdisc action. The
|
||||
* runner.sh script shows an example of how it is loaded and used.
|
||||
*/
|
||||
|
||||
SEC("classifier")
|
||||
int cls_filter(struct __sk_buff *skb)
|
||||
{
|
||||
void *data_end = (void *)(unsigned long long)skb->data_end;
|
||||
void *data = (void *)(unsigned long long)skb->data;
|
||||
|
||||
struct hdr_cursor nh;
|
||||
struct ethhdr *eth;
|
||||
int eth_type;
|
||||
int ip_type;
|
||||
int tcp_type;
|
||||
struct iphdr *iphdr;
|
||||
struct ipv6hdr *ipv6hdr;
|
||||
struct tcphdr *tcphdr;
|
||||
skb->tc_classid = 0x30; /* Default class */
|
||||
|
||||
nh.pos = data;
|
||||
|
||||
/* Parse Ethernet and IP/IPv6 headers */
|
||||
eth_type = parse_ethhdr(&nh, data_end, ð);
|
||||
if (eth_type == bpf_htons(ETH_P_IP)) {
|
||||
ip_type = parse_iphdr(&nh, data_end, &iphdr);
|
||||
if (ip_type != IPPROTO_TCP)
|
||||
goto out;
|
||||
}
|
||||
else if (eth_type == bpf_htons(ETH_P_IPV6)) {
|
||||
ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr);
|
||||
if (ip_type != IPPROTO_TCP)
|
||||
goto out;
|
||||
} else {
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Classify TCP ports 8080 and 8081 */
|
||||
tcp_type = parse_tcphdr(&nh, data_end, &tcphdr);
|
||||
if (tcphdr + 1 > data_end) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
switch (tcphdr->dest) {
|
||||
case bpf_htons(8080):
|
||||
skb->tc_classid = 0x10; /* Handles are always in hex */
|
||||
break;
|
||||
case bpf_htons(8081):
|
||||
skb->tc_classid = 0x20;
|
||||
}
|
||||
|
||||
out:
|
||||
return TC_ACT_OK;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
BIN
tc-basic-classifier/overview.png
Executable file
BIN
tc-basic-classifier/overview.png
Executable file
Binary file not shown.
After Width: | Height: | Size: 39 KiB |
103
tc-basic-classifier/runner.sh
Executable file
103
tc-basic-classifier/runner.sh
Executable file
@ -0,0 +1,103 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# Copyright 2021 Frey Alfredsson <freysteinn@freysteinn.com>
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
|
||||
### Configuration
|
||||
IP="ip"
|
||||
TC="tc"
|
||||
|
||||
# Left and right IPs
|
||||
L_IP=172.16.16.10
|
||||
R_IP=172.16.16.20
|
||||
L_CIDR="${L_IP}/24"
|
||||
R_CIDR="${R_IP}/24"
|
||||
|
||||
LIMIT=100mbit
|
||||
START_RATE=5mbit
|
||||
P8080_LIMIT=80mbit
|
||||
P8081_LIMIT=40mbit
|
||||
DEFAULT_LIMIT=20mbit
|
||||
|
||||
### Constants
|
||||
L_NS="left"
|
||||
R_NS="right"
|
||||
L_DEV="$L_NS-veth"
|
||||
R_DEV="$R_NS-veth"
|
||||
|
||||
|
||||
### Helper functions
|
||||
function filter-help() {
|
||||
cat <<-EOF
|
||||
Usage: [<bpf> | <u32>]
|
||||
Runs an example that either uses an eBPF based Qdisc filter or an u32
|
||||
filter to rate limit traffic destined for TCP ports 8080 and 8081. It
|
||||
relies on iperf3 to demonstrates the functionality.
|
||||
|
||||
The following are mandatory arguments. Without them prints this help.
|
||||
u32 Runs the example with the u32 Qdisc filter
|
||||
bpf Runs the example with the bpf Qdisc filterdiff
|
||||
|
||||
Please look at the script's source code to see how the examples differ.
|
||||
EOF
|
||||
}
|
||||
|
||||
### Script main
|
||||
if [ $# -ne 1 ] || [[ "${1-}" != "bpf" && "${1-}" != "u32" ]]; then
|
||||
filter-help
|
||||
exit 1
|
||||
fi
|
||||
mode="$1"
|
||||
echo "Starting setup"
|
||||
|
||||
# Remove network namespaces if this is the second run
|
||||
$IP netns delete "$L_NS" &> /dev/null || true
|
||||
$IP netns delete "$R_NS" &> /dev/null || true
|
||||
|
||||
# Create network namespaces
|
||||
$IP netns add "$L_NS"
|
||||
$IP netns add "$R_NS"
|
||||
|
||||
# Create connected virtual nics
|
||||
$IP link add "$L_DEV" type veth peer "$R_DEV"
|
||||
|
||||
# Add the virtual nics to the network namespaces
|
||||
$IP link set "$L_DEV" netns "$L_NS"
|
||||
$IP link set "$R_DEV" netns "$R_NS"
|
||||
|
||||
# Add IP addresses to links
|
||||
$IP -netns "$L_NS" addr add "$L_CIDR" dev "$L_DEV"
|
||||
$IP -netns "$R_NS" addr add "$R_CIDR" dev "$R_DEV"
|
||||
|
||||
# Enable links
|
||||
$IP -netns "$L_NS" link set "$L_DEV" up
|
||||
$IP -netns "$R_NS" link set "$R_DEV" up
|
||||
|
||||
# Setting up the qdiscs on Left
|
||||
$TC -netns "$L_NS" qdisc add dev "$L_DEV" root handle 1:0 htb default 30
|
||||
TC_CLASS_ADD="$TC -netns $L_NS class add dev $L_DEV parent"
|
||||
$TC_CLASS_ADD 1:0 classid 1:1 htb rate "$LIMIT"
|
||||
$TC_CLASS_ADD 1:1 classid 1:10 htb rate "$START_RATE" ceil "$P8080_LIMIT"
|
||||
$TC_CLASS_ADD 1:1 classid 1:20 htb rate "$START_RATE" ceil "$P8081_LIMIT"
|
||||
$TC_CLASS_ADD 1:1 classid 1:30 htb rate "$START_RATE" ceil "$DEFAULT_LIMIT"
|
||||
|
||||
# Setup filters
|
||||
if [ "$mode" == "bpf" ]; then
|
||||
$TC -netns $L_NS filter add dev $L_DEV protocol ip parent 1:0 \
|
||||
bpf obj filter.o classid 1: direct-action
|
||||
else
|
||||
U32="$TC -netns $L_NS filter add dev $L_DEV protocol ip parent 1:0 prio 1 u32"
|
||||
$U32 match ip dport 8080 FFFF flowid 1:10
|
||||
$U32 match ip dport 8081 FFFF flowid 1:20
|
||||
fi
|
||||
# Setup iperf3
|
||||
echo "Starting iperf3"
|
||||
$IP netns exec "$R_NS" iperf3 -s -p 8080 &> /dev/null &
|
||||
$IP netns exec "$R_NS" iperf3 -s -p 8081 &> /dev/null &
|
||||
$IP netns exec "$R_NS" iperf3 -s -p 8082 &> /dev/null &
|
||||
sleep 1
|
||||
$IP netns exec "$L_NS" iperf3 -t 4 -c "$R_IP" -p 8080
|
||||
$IP netns exec "$L_NS" iperf3 -t 4 -c "$R_IP" -p 8081
|
||||
$IP netns exec "$L_NS" iperf3 -t 4 -c "$R_IP" -p 8082
|
Reference in New Issue
Block a user