mirror of
https://github.com/xdp-project/bpf-examples.git
synced 2024-05-06 15:54:53 +00:00
Merge pull request #38 from xdp-project/vestas06_tc_qdisc
TC policy example of overriding netstack TXQ
This commit is contained in:
13
tc-policy/Makefile
Normal file
13
tc-policy/Makefile
Normal file
@@ -0,0 +1,13 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
|
||||
|
||||
USER_TARGETS := tc_txq_policy
|
||||
BPF_TARGETS := tc_txq_policy_kern
|
||||
|
||||
# Depend on bpftool for auto generating
|
||||
# skeleton header file tc_txq_policy_kern.skel.h
|
||||
#
|
||||
BPF_SKEL_OBJ := tc_txq_policy_kern.o
|
||||
|
||||
LIB_DIR = ../lib
|
||||
|
||||
include $(LIB_DIR)/common.mk
|
118
tc-policy/README.org
Normal file
118
tc-policy/README.org
Normal file
@@ -0,0 +1,118 @@
|
||||
#+Title: Controlling TC qdisc TXQ selection via BPF
|
||||
|
||||
* Use-case
|
||||
|
||||
As a policy we don't want any traffic generated by the Linux networking stack,
|
||||
to use transmit queue *zero*.
|
||||
|
||||
This use-case is connected with =AF_XDP=. The example
|
||||
[[file:../AF_XDP-interaction/]] is sending important Real-Time traffic on XDP-socket
|
||||
queue zero. Some HW and NIC drivers (e.g. igb and igc) don't have enough
|
||||
hardware TX-queues to allocate seperate queues for XDP. Thus, these queues are
|
||||
shared between XDP and network stack, and there is a potential lock-contention
|
||||
and also HW queue usage contention.
|
||||
|
||||
* Example
|
||||
|
||||
The BPF code in this example is rather simple:
|
||||
- See: [[file:tc_txq_policy_kern.c]]
|
||||
|
||||
This BPF program is meant to be loaded in the TC *egress* hook.
|
||||
|
||||
** TC-BPF loader
|
||||
|
||||
The =tc= cmdline tool is notorious difficult to use, and have issues (mounting
|
||||
BPF file-system) on Yocto build.
|
||||
|
||||
Thus, [[file:tc_txq_policy.c]] contains a C-code loader, that attach the BPF-prog to
|
||||
the TC-hook, without depending on =tc= command util. Furthermore, the loader
|
||||
uses =bpftool= skeleton feature (to generate a header file) allowing to create a
|
||||
binary that contains the BPF-object itself, making it self-contained.
|
||||
|
||||
* Gotchas: XPS
|
||||
|
||||
For TXQ (=queue_mapping=) overwrite to work, you need to *disable* XPS (Transmit
|
||||
Packet Steering), as XSP will have higher precedence than our BPF change to
|
||||
=queue_mapping=. This is done by writing 0 into each =/sys/class/net/= tx-queue
|
||||
file =/sys/class/net/DEV/queues/tx-*/xps_cpus=.
|
||||
|
||||
A script for configuring and disabling XPS is provided here: [[file:xps_setup_ash.sh]].
|
||||
|
||||
Script command line to disable XPS:
|
||||
#+begin_src sh
|
||||
sudo ./xps_setup_ash.sh --dev DEVICE --default --disable
|
||||
#+end_src
|
||||
|
||||
* Different ways to view queue_mapping
|
||||
|
||||
Notice that =queue_mapping= set in BPF-prog is like RX-recorded number
|
||||
(=skb_rx_queue_recorded=). When reaching TX-layer it will have been decremented
|
||||
by one (by =skb_get_rx_queue()=) at the TX netstack processing stage (in
|
||||
=__dev_queue_xmit()=).
|
||||
|
||||
** perf probe
|
||||
|
||||
The perf tool can be used for recording and inspecting the =skb->queue_mapping=.
|
||||
|
||||
Remember: BPF-prog =queue_mapping= setting have been decremented by one at this
|
||||
TX netstack processing stage.
|
||||
|
||||
#+begin_src sh
|
||||
perf probe -a 'dev_hard_start_xmit skb->dev->name:string skb->queue_mapping skb->hash'
|
||||
Added new event:
|
||||
probe:dev_hard_start_xmit (on dev_hard_start_xmit with name=skb->dev->name:string queue_mapping=skb->queue_mapping hash=skb->hash)
|
||||
|
||||
You can now use it in all perf tools, such as:
|
||||
perf record -e probe:dev_hard_start_xmit -aR sleep 1
|
||||
#+end_src
|
||||
|
||||
Afterwards run =perf script= and see results.
|
||||
|
||||
** bpftrace
|
||||
|
||||
It is also possible to monitor TXQ usage via a =bpftrace= script.
|
||||
* see [[file:monitor_txq_usage.bt]].
|
||||
|
||||
The main part of the script is:
|
||||
#+begin_src sh
|
||||
tracepoint:net:net_dev_start_xmit {
|
||||
$qm = args->queue_mapping;
|
||||
$dev = str(args->name, 15);
|
||||
|
||||
@stat_txq_usage[$dev] = lhist($qm, 0,32,1);
|
||||
}
|
||||
#+end_src
|
||||
|
||||
Or as oneliner:
|
||||
#+begin_src sh
|
||||
bpftrace -e 't:net:net_dev_start_xmit {@txq[str(args->name, 15)]=lhist(args->queue_mapping, 0,32,1)}'
|
||||
#+end_src
|
||||
|
||||
* Inspecting loaded BPF
|
||||
|
||||
How do you see if these BPF TC-hook programs are loaded?
|
||||
|
||||
** bpftool
|
||||
|
||||
The cmdline =bpftool net= can list any network related BPF program:
|
||||
|
||||
#+begin_example
|
||||
root@main-ctrl2:~ # bpftool net
|
||||
xdp:
|
||||
eth1(5) driver id 59
|
||||
|
||||
tc:
|
||||
eth1(5) clsact/egress not_txq_zero:[17] id 17
|
||||
#+end_example
|
||||
|
||||
There we see both the *XDP* BPF-program used by AF_XDP to redirect frames, and
|
||||
the *TC* hook BPF-prog loaded and attached.
|
||||
|
||||
** tc egress
|
||||
|
||||
The tc command need to be longer and more explicit:
|
||||
#+begin_example
|
||||
root@main-ctrl2:~ # tc filter show dev eth1 egress
|
||||
filter protocol all pref 49199 bpf chain 0
|
||||
filter protocol all pref 49199 bpf chain 0 handle 0x1 not_txq_zero:[17] direct-action not_in_hw id 17 tag a761e11074b78959 jited
|
||||
#+end_example
|
42
tc-policy/adv_monitor_txq_usage.bt
Executable file
42
tc-policy/adv_monitor_txq_usage.bt
Executable file
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/bpftrace
|
||||
|
||||
//BEGIN {
|
||||
// printf("Monitor TXQ usage\n");
|
||||
// printf(" - Remember: BPF set queue_mapping is one-less here (zero-indexed)\n");
|
||||
//}
|
||||
|
||||
tracepoint:net:net_dev_start_xmit {
|
||||
$qm = args->queue_mapping;
|
||||
$dev = str(args->name, 16);
|
||||
|
||||
@stat_txq_usage[$dev] = lhist($qm, 0,32,1);
|
||||
}
|
||||
|
||||
/*
|
||||
* More precisely we actually want to see what netdev_pick_tx() is
|
||||
* selecting, as sockets can possibly return another queue_id.
|
||||
*/
|
||||
|
||||
kprobe:netdev_pick_tx {
|
||||
$dev = ((struct net_device *)arg0)->name;
|
||||
@record[cpu] = $dev;
|
||||
}
|
||||
|
||||
kretprobe:netdev_pick_tx {
|
||||
$dev = @record[cpu];
|
||||
@netdev_pick_tx[$dev] = lhist(retval, 0,32,1);
|
||||
}
|
||||
|
||||
/* Periodically print stats */
|
||||
interval:s:3
|
||||
{
|
||||
printf("\nPeriodic show stats - time: ");
|
||||
time();
|
||||
print(@stat_txq_usage);
|
||||
print(@netdev_pick_tx);
|
||||
}
|
||||
|
||||
/* Default bpftrace will print all remaining maps at END */
|
||||
//END {
|
||||
// printf("END stats:\n");
|
||||
//}
|
26
tc-policy/monitor_txq_usage.bt
Executable file
26
tc-policy/monitor_txq_usage.bt
Executable file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/bpftrace
|
||||
|
||||
//BEGIN {
|
||||
// printf("Monitor TXQ usage\n");
|
||||
// printf(" - Remember: BPF set queue_mapping is one-less here (zero-indexed)\n");
|
||||
//}
|
||||
|
||||
tracepoint:net:net_dev_start_xmit {
|
||||
$qm = args->queue_mapping;
|
||||
$dev = str(args->name, 16);
|
||||
|
||||
@stat_txq_usage[$dev] = lhist($qm, 0,32,1);
|
||||
}
|
||||
|
||||
/* Periodically print stats */
|
||||
interval:s:3
|
||||
{
|
||||
printf("\nPeriodic show stats - time: ");
|
||||
time();
|
||||
print(@stat_txq_usage);
|
||||
}
|
||||
|
||||
/* Default bpftrace will print all remaining maps at END */
|
||||
//END {
|
||||
// printf("END stats:\n");
|
||||
//}
|
288
tc-policy/tc_txq_policy.c
Normal file
288
tc-policy/tc_txq_policy.c
Normal file
@@ -0,0 +1,288 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/* Copyright 2022 Jesper Dangaard Brouer */
|
||||
|
||||
static const char *__doc__ =
|
||||
"TC queue policy - Controlling TC qdisc TXQ selection via BPF";
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#include <net/if.h>
|
||||
#include <linux/if_arp.h>
|
||||
#include <getopt.h>
|
||||
#include <linux/in6.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <linux/bpf.h>
|
||||
|
||||
#include <bpf/libbpf.h>
|
||||
#include <bpf/bpf.h>
|
||||
|
||||
static const struct option long_options[] = {
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ "interface", required_argument, NULL, 'i' },
|
||||
{ "unload", no_argument, NULL, 'u' },
|
||||
{ "destroy-hook", no_argument, NULL, 'f' },
|
||||
{ "quiet", no_argument, NULL, 'q' },
|
||||
{ 0, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
#define EGRESS_HANDLE 0x1;
|
||||
#define EGRESS_PRIORITY 0xC02F;
|
||||
|
||||
struct user_config {
|
||||
int ifindex;
|
||||
char ifname[IF_NAMESIZE+1];
|
||||
bool unload;
|
||||
bool flush_hook;
|
||||
};
|
||||
|
||||
static int verbose = 1;
|
||||
|
||||
/* Auto-generated skeleton: Contains BPF-object inlined as code */
|
||||
#include "tc_txq_policy_kern.skel.h"
|
||||
|
||||
static void print_usage(char *argv[])
|
||||
{
|
||||
int i;
|
||||
|
||||
printf("\nDOCUMENTATION:\n%s\n", __doc__);
|
||||
printf("\n");
|
||||
printf(" Usage: %s (options-see-below)\n", argv[0]);
|
||||
printf(" Listing options:\n");
|
||||
for (i = 0; long_options[i].name != 0; i++) {
|
||||
printf(" --%-12s", long_options[i].name);
|
||||
if (long_options[i].flag != NULL)
|
||||
printf(" flag (internal value:%d)",
|
||||
*long_options[i].flag);
|
||||
else
|
||||
printf(" short-option: -%c", long_options[i].val);
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static int parse_arguments(int argc, char *argv[],
|
||||
struct user_config *cfg)
|
||||
{
|
||||
int err, opt;
|
||||
|
||||
cfg->ifindex = 0;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "i:hufq", long_options,
|
||||
NULL)) != -1) {
|
||||
switch (opt) {
|
||||
case 'i':
|
||||
if (strlen(optarg) > IF_NAMESIZE) {
|
||||
fprintf(stderr, "interface name too long\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
strncpy(cfg->ifname, optarg, IF_NAMESIZE);
|
||||
|
||||
cfg->ifindex = if_nametoindex(cfg->ifname);
|
||||
if (cfg->ifindex == 0) {
|
||||
err = -errno;
|
||||
fprintf(stderr,
|
||||
"Could not get index of interface %s: [%d] %s\n",
|
||||
cfg->ifname, errno, strerror(errno));
|
||||
return err;
|
||||
}
|
||||
break;
|
||||
case 'u':
|
||||
cfg->unload = true;
|
||||
break;
|
||||
case 'f':
|
||||
cfg->flush_hook = true;
|
||||
break;
|
||||
case 'q':
|
||||
verbose = 0;
|
||||
break;
|
||||
default:
|
||||
print_usage(argv);
|
||||
fprintf(stderr, "Unknown option %s\n", argv[optind]);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (cfg->ifindex == 0) {
|
||||
fprintf(stderr,
|
||||
"An interface (-i or --interface) must be provided\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct tc_txq_policy_kern *
|
||||
get_bpf_skel_object(struct user_config *cfg)
|
||||
{
|
||||
struct tc_txq_policy_kern *obj; /* Skeleton gave us this */
|
||||
char buf[100];
|
||||
int err;
|
||||
|
||||
/* Skeleton header file have BPF-object as inline code */
|
||||
obj = tc_txq_policy_kern__open();
|
||||
err = libbpf_get_error(obj);
|
||||
if (err) {
|
||||
libbpf_strerror(err, buf, sizeof(buf));
|
||||
fprintf(stderr, "Couldn't open BPF skeleton:(%d) %s\n", err, buf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Add code here that change BPF-obj config before loading */
|
||||
|
||||
/* Loading BPF-code into kernel, verifier will check, but not attach */
|
||||
err = tc_txq_policy_kern__load(obj);
|
||||
if (err) {
|
||||
libbpf_strerror(err, buf, sizeof(buf));
|
||||
fprintf(stderr, "Couldn't load BPF skeleton:(%d) %s\n", err, buf);
|
||||
tc_txq_policy_kern__destroy(obj);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
||||
int teardown_hook(struct user_config *cfg)
|
||||
{
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook,
|
||||
.attach_point = BPF_TC_EGRESS,
|
||||
.ifindex = cfg->ifindex);
|
||||
int err;
|
||||
|
||||
/* When destroying the hook, any and ALL attached TC-BPF (filter)
|
||||
* programs are also detached.
|
||||
*/
|
||||
err = bpf_tc_hook_destroy(&hook);
|
||||
if (err)
|
||||
fprintf(stderr, "Couldn't remove clsact qdisc on %s\n", cfg->ifname);
|
||||
|
||||
if (verbose)
|
||||
printf("Flushed all TC-BPF egress programs (via destroy hook)\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int tc_detach_egress(struct user_config *cfg)
|
||||
{
|
||||
int err;
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = cfg->ifindex,
|
||||
.attach_point = BPF_TC_EGRESS);
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_info);
|
||||
|
||||
opts_info.handle = EGRESS_HANDLE;
|
||||
opts_info.priority = EGRESS_PRIORITY;
|
||||
|
||||
/* Check what program we are removing */
|
||||
err = bpf_tc_query(&hook, &opts_info);
|
||||
if (err) {
|
||||
fprintf(stderr, "No egress program to detach "
|
||||
"for ifindex %d (err:%d)\n", cfg->ifindex, err);
|
||||
return err;
|
||||
}
|
||||
if (verbose)
|
||||
printf("Detaching TC-BPF prog id:%d\n", opts_info.prog_id);
|
||||
|
||||
/* Attempt to detach program */
|
||||
opts_info.prog_fd = 0;
|
||||
opts_info.prog_id = 0;
|
||||
opts_info.flags = 0;
|
||||
err = bpf_tc_detach(&hook, &opts_info);
|
||||
if (err) {
|
||||
fprintf(stderr, "Cannot detach TC-BPF program id:%d "
|
||||
"for ifindex %d (err:%d)\n", opts_info.prog_id,
|
||||
cfg->ifindex, err);
|
||||
}
|
||||
|
||||
if (cfg->flush_hook)
|
||||
return teardown_hook(cfg);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int tc_attach_egress(struct user_config *cfg, struct tc_txq_policy_kern *obj)
|
||||
{
|
||||
int err = 0;
|
||||
int fd;
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .attach_point = BPF_TC_EGRESS);
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_egress);
|
||||
|
||||
/* Selecting BPF-prog here: */
|
||||
//fd = bpf_program__fd(obj->progs.queue_map_4);
|
||||
fd = bpf_program__fd(obj->progs.not_txq_zero);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "Couldn't find egress program\n");
|
||||
err = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
attach_egress.prog_fd = fd;
|
||||
|
||||
hook.ifindex = cfg->ifindex;
|
||||
|
||||
err = bpf_tc_hook_create(&hook);
|
||||
if (err && err != -EEXIST) {
|
||||
fprintf(stderr, "Couldn't create TC-BPF hook for "
|
||||
"ifindex %d (err:%d)\n", cfg->ifindex, err);
|
||||
goto out;
|
||||
}
|
||||
if (verbose && err == -EEXIST) {
|
||||
printf("Success: TC-BPF hook already existed "
|
||||
"(Ignore: \"libbpf: Kernel error message\")\n");
|
||||
}
|
||||
|
||||
hook.attach_point = BPF_TC_EGRESS;
|
||||
attach_egress.flags = BPF_TC_F_REPLACE;
|
||||
attach_egress.handle = EGRESS_HANDLE;
|
||||
attach_egress.priority = EGRESS_PRIORITY;
|
||||
err = bpf_tc_attach(&hook, &attach_egress);
|
||||
if (err) {
|
||||
fprintf(stderr, "Couldn't attach egress program to "
|
||||
"ifindex %d (err:%d)\n", hook.ifindex, err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
printf("Attached TC-BPF program id:%d\n",
|
||||
attach_egress.prog_id);
|
||||
}
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct user_config cfg = {
|
||||
.unload = false,
|
||||
.flush_hook = false,
|
||||
};
|
||||
struct tc_txq_policy_kern *obj; /* Skeleton gave us this */
|
||||
int err;
|
||||
|
||||
err = parse_arguments(argc, argv, &cfg);
|
||||
if (err)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
if (cfg.unload)
|
||||
return tc_detach_egress(&cfg);
|
||||
|
||||
if (cfg.flush_hook)
|
||||
return teardown_hook(&cfg);
|
||||
|
||||
obj = get_bpf_skel_object(&cfg);
|
||||
if (obj == NULL)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
err = tc_attach_egress(&cfg, obj);
|
||||
if (err) {
|
||||
err = EXIT_FAILURE;
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
tc_txq_policy_kern__destroy(obj);
|
||||
return err;
|
||||
}
|
80
tc-policy/tc_txq_policy_kern.c
Normal file
80
tc-policy/tc_txq_policy_kern.c
Normal file
@@ -0,0 +1,80 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/* Copyright 2022 Jesper Dangaard Brouer */
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/compiler.h>
|
||||
#include <linux/pkt_sched.h>
|
||||
#include <linux/pkt_cls.h>
|
||||
//#include <stdbool.h>
|
||||
//#include "../include/xdp/parsing_helpers.h"
|
||||
|
||||
/* Manuel setup:
|
||||
export DEV=eth1
|
||||
|
||||
tc qdisc add dev "$DEV" clsact
|
||||
tc filter add dev "$DEV" egress bpf da obj tc_txq_policy_kern.o
|
||||
tc filter list dev "$DEV" egress
|
||||
|
||||
* Quick test reloading with tc:
|
||||
tc filter replace dev "$DEV" egress prio 0xC000 handle 1 bpf da obj tc_txq_policy_kern.o
|
||||
|
||||
* Beware: Trying to replace an existing TC-BPF prog often result in appending a
|
||||
* new prog (as a new tc filter instance). Be careful to set both handle and
|
||||
* prio to the existing TC-BPF "filter" instance.
|
||||
|
||||
* Delete by teardown of clsact
|
||||
tc qdisc delete dev "$DEV" clsact
|
||||
|
||||
*/
|
||||
SEC("classifier")
|
||||
int queue_map_4 (struct __sk_buff *skb)
|
||||
{
|
||||
__u16 txq_root_handle;
|
||||
|
||||
/* The skb->queue_mapping is 1-indexed (zero means not set). The
|
||||
* underlying MQ leaf's are also 1-indexed, which makes it easier to
|
||||
* reason about. If debugging this realize that setting
|
||||
* skb->queue_mapping here is like it was set on RX-path the
|
||||
* skb_rx_queue_recorded number, and when reaching TX-layer
|
||||
* (skb_get_rx_queue) will have decremented it by-1.
|
||||
*/
|
||||
txq_root_handle = 4;
|
||||
skb->queue_mapping = txq_root_handle;
|
||||
|
||||
/* Details: Kernel double protect against setting a too high
|
||||
* queue_mapping. In skb_tx_hash() it will reduce number to be
|
||||
* less-than (or equal) dev->real_num_tx_queues. And netdev_pick_tx()
|
||||
* cap via netdev_cap_txqueue().
|
||||
*/
|
||||
|
||||
// FIXME: Do we need to set TC_H_MAJOR(skb->priority) for this to work?
|
||||
|
||||
return TC_ACT_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Section name "tc" is preferred over "classifier" as its being deprecated
|
||||
* https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations
|
||||
*/
|
||||
|
||||
SEC("tc")
|
||||
int not_txq_zero (struct __sk_buff *skb)
|
||||
{
|
||||
/* Existing skb->queue_mapping can come from skb_record_rx_queue() which
|
||||
* is usually called by drivers in early RX handling when creating SKB.
|
||||
*/
|
||||
|
||||
/* At this stage queue_mapping is 1-indexed.
|
||||
* Thus, code is changing TXQ zero to be remapped to TXQ 3. */
|
||||
if (skb->queue_mapping == 1)
|
||||
skb->queue_mapping = 4;
|
||||
|
||||
/* If queue_mapping was not set by skb_record_rx_queue(),
|
||||
* e.g. locally generated traffic
|
||||
*/
|
||||
if (skb->queue_mapping == 0)
|
||||
skb->queue_mapping = 3;
|
||||
|
||||
return TC_ACT_OK;
|
||||
}
|
200
tc-policy/xps_setup_ash.sh
Executable file
200
tc-policy/xps_setup_ash.sh
Executable file
@@ -0,0 +1,200 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# This script is a modified version of:
|
||||
# https://github.com/xdp-project/xdp-cpumap-tc/blob/master/bin/xps_setup.sh
|
||||
#
|
||||
# Script simplied for work with shell: ash
|
||||
|
||||
function usage() {
|
||||
echo "Change setting of XPS txq to CPU mapping via files"
|
||||
echo " /sys/class/net/DEV/queues/tx-*/xps_cpus "
|
||||
echo ""
|
||||
echo "Usage: $0 [-h] --dev ethX --txq N --cpu N"
|
||||
echo " -d | --dev : (\$DEV) Interface/device (required)"
|
||||
echo " --default : (\$DEFAULT) Setup 1:1 mapping TXQ-to-CPU"
|
||||
echo " --disable : (\$DISABLE) Disable XPS via mask 0x00"
|
||||
echo " --list : (\$LIST) List current setting"
|
||||
echo " --txq N : (\$TXQ) Select TXQ"
|
||||
echo " --cpu N : (\$CPU) Select CPU that use TXQ"
|
||||
echo " -v | --verbose : (\$VERBOSE) verbose"
|
||||
echo ""
|
||||
}
|
||||
|
||||
## -- General shell logging cmds --
|
||||
function err() {
|
||||
local exitcode=$1
|
||||
shift
|
||||
echo -e "ERROR: $@" >&2
|
||||
exit $exitcode
|
||||
}
|
||||
|
||||
function info() {
|
||||
if [[ -n "$VERBOSE" ]]; then
|
||||
echo "# $@"
|
||||
fi
|
||||
}
|
||||
|
||||
# Convert a mask to a list of CPUs this cover
|
||||
function mask_to_cpus() {
|
||||
local mask=$1
|
||||
local cpu=0
|
||||
|
||||
printf "CPUs in MASK=0x%02X =>" $mask
|
||||
if [[ $mask == 0 ]]; then
|
||||
echo " disabled"
|
||||
fi
|
||||
while [ $mask -gt 0 ]; do
|
||||
if [[ $((mask & 1)) -eq 1 ]]; then
|
||||
echo -n " cpu:$cpu"
|
||||
fi
|
||||
let cpu++
|
||||
let mask=$((mask >> 1))
|
||||
done
|
||||
}
|
||||
|
||||
function sorted_txq_xps_cpus() {
|
||||
local queues=$(ls /sys/class/net/$DEV/queues/tx-*/xps_cpus | sort -t '-' -k2n)
|
||||
echo $queues
|
||||
}
|
||||
|
||||
function list_xps_setup() {
|
||||
local txq=0
|
||||
local mqleaf=0
|
||||
for xps_cpus in $(sorted_txq_xps_cpus); do
|
||||
let mqleaf++
|
||||
mask=$(cat $xps_cpus)
|
||||
value=$((0x$mask))
|
||||
#echo MASK:0x$mask
|
||||
txt=$(mask_to_cpus $value)
|
||||
echo "NIC=$DEV TXQ:$txq (MQ-leaf :$mqleaf) use $txt"
|
||||
let txq++
|
||||
done
|
||||
}
|
||||
|
||||
function cpu_to_mask() {
|
||||
local cpu=$1
|
||||
printf "%X" $((1 << $cpu))
|
||||
}
|
||||
|
||||
# Setup TXQ to only use a single specific CPU
|
||||
function xps_txq_to_cpu() {
|
||||
local txq=$1
|
||||
local cpu=$2
|
||||
local mask=0
|
||||
if [[ "$DISABLE" != "yes" ]]; then
|
||||
mask=$(cpu_to_mask $cpu)
|
||||
fi
|
||||
local txq_file=/sys/class/net/$DEV/queues/tx-$txq/xps_cpus
|
||||
|
||||
if [[ -e "$txq_file" ]]; then
|
||||
echo $mask > $txq_file
|
||||
fi
|
||||
}
|
||||
|
||||
function xps_setup_1to1_mapping() {
|
||||
local cpu=0
|
||||
local txq=0
|
||||
for xps_cpus in $(sorted_txq_xps_cpus); do
|
||||
|
||||
if [[ "$DISABLE" != "yes" ]]; then
|
||||
# Map the TXQ to CPU number 1-to-1
|
||||
mask=$(cpu_to_mask $cpu)
|
||||
else
|
||||
# Disable XPS on TXQ
|
||||
mask=0
|
||||
fi
|
||||
|
||||
echo $mask > $xps_cpus
|
||||
info "NIC=$DEV TXQ:$txq use CPU $cpu (MQ-leaf :$mqleaf)"
|
||||
let cpu++
|
||||
let txq++
|
||||
done
|
||||
}
|
||||
|
||||
# Using external program "getopt" to get --long-options
|
||||
## OPTIONS=$(getopt -o ld: \
|
||||
## --long list,default,disable,dev:,txq:,cpu: -- "$@")
|
||||
##if (( $? != 0 )); then
|
||||
## usage
|
||||
## err 2 "Error calling getopt"
|
||||
##fi
|
||||
##eval set -- "$OPTIONS"
|
||||
|
||||
## --- Parse command line arguments / parameters ---
|
||||
while true; do
|
||||
case "$1" in
|
||||
-d | --dev ) # device
|
||||
export DEV=$2
|
||||
info "Device set to: DEV=$DEV" >&2
|
||||
shift 2
|
||||
;;
|
||||
-v | --verbose)
|
||||
export VERBOSE=yes
|
||||
# info "Verbose mode: VERBOSE=$VERBOSE" >&2
|
||||
shift
|
||||
;;
|
||||
--list )
|
||||
info "Listing --list" >&2
|
||||
export LIST=yes
|
||||
shift 1
|
||||
;;
|
||||
--default )
|
||||
info "Setup default 1-to-1 mapping TXQ-to-CPUs" >&2
|
||||
export DEFAULT=yes
|
||||
shift 1
|
||||
;;
|
||||
--disable )
|
||||
info "Disable XPS via mask 0x00" >&2
|
||||
export DISABLE=yes
|
||||
shift 1
|
||||
;;
|
||||
--txq )
|
||||
export TXQ=$2
|
||||
info "Selected: TXQ=$TXQ" >&2
|
||||
shift 2
|
||||
;;
|
||||
--cpu )
|
||||
export CPU=$2
|
||||
info "Selected: CPU=$CPU" >&2
|
||||
shift 2
|
||||
;;
|
||||
-- )
|
||||
shift
|
||||
break
|
||||
;;
|
||||
-h | --help )
|
||||
usage;
|
||||
exit 0
|
||||
;;
|
||||
* )
|
||||
shift
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$DEV" ]; then
|
||||
usage
|
||||
err 2 "Please specify device"
|
||||
fi
|
||||
|
||||
if [[ -n "$TXQ" ]]; then
|
||||
if [[ -z "$CPU" && -z "$DISABLE" ]]; then
|
||||
err 4 "CPU also needed when giving TXQ:$TXQ (or --disable)"
|
||||
fi
|
||||
xps_txq_to_cpu $TXQ $CPU
|
||||
fi
|
||||
|
||||
if [[ -n "$DEFAULT" ]]; then
|
||||
xps_setup_1to1_mapping
|
||||
fi
|
||||
|
||||
if [[ "$DISABLE" == "yes" ]]; then
|
||||
if [[ -z "$DEFAULT" && -z "$TXQ" ]]; then
|
||||
err 5 "Use --disable together with --default or --txq"
|
||||
fi
|
||||
fi
|
||||
|
||||
#if [[ -n "$LIST" ]]; then
|
||||
list_xps_setup
|
||||
#fi
|
Reference in New Issue
Block a user