mirror of
https://github.com/xdp-project/bpf-examples.git
synced 2024-05-06 15:54:53 +00:00
pping: Various minor fixes
Perform various fixes and tweaks: - Rename several defines to make them more informative - Remove unrolling of loop in BPF programs - Reuse defines for program sections between userspace and kernel space programs - Perform fork+exec to run bpf_egress_loader script instead of system() - Add comment to copied scripts indicating I've modified them - Add pping.h and pping_helpers.h as dependencies in Makefile Also, add a brief description of what PPing is and how it works to README Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
@@ -6,7 +6,7 @@ BPF_TARGETS := pping_kern_xdp
|
|||||||
BPF_TARGETS += $(TC_BPF_TARGETS)
|
BPF_TARGETS += $(TC_BPF_TARGETS)
|
||||||
|
|
||||||
LDFLAGS += -pthread
|
LDFLAGS += -pthread
|
||||||
EXTRA_DEPS += config.mk
|
EXTRA_DEPS += config.mk pping.h pping_helpers.h
|
||||||
|
|
||||||
LIB_DIR = ../lib
|
LIB_DIR = ../lib
|
||||||
|
|
||||||
|
@@ -1,5 +1,19 @@
|
|||||||
# PPing using XDP and TC-BPF
|
# PPing using XDP and TC-BPF
|
||||||
An implementation of the passive ping ([pping](https://github.com/pollere/pping)) utility based on XDP (for ingress) and TC-BPF (for outgress)
|
An implementation of the passive ping ([pping](https://github.com/pollere/pping)) utility based on XDP (for ingress) and TC-BPF (for egress)
|
||||||
|
|
||||||
|
## Simple description
|
||||||
|
Passive Ping (PPing) makes use of the TCP Timestamp option to calculate the RTT for TCP traffic passing through.
|
||||||
|
PPing can be used on measure RTTs on end hosts or any device which sees both directions of the TCP flow.
|
||||||
|
|
||||||
|
For outgoing packets, it checks for TCP timestamp TSval in the TCP header. If it finds one it creates a timestamp
|
||||||
|
for when it saw that TSval in a particular flow. On incomming packets it parses the TCP timestamp TSecr (which
|
||||||
|
is the TSval echoed by the receiving host) and checks it has seen any previous outgoing packets with that TCP
|
||||||
|
timestamp. If it has, an RTT is calculated as the difference in time between when it saw an outgoing packet
|
||||||
|
with a TSval, and when it received an incomming packet from the reverse flow with a matching TSecr.
|
||||||
|
|
||||||
|
Note that TCP timestamps may not be unique for every packet in a flow, therefore it only matches the first
|
||||||
|
outgoing packet with a particular TSval with the first incomming packet with a matching TSecr. Duplicate
|
||||||
|
TSval/TSecr are ignored.
|
||||||
|
|
||||||
## Planned design
|
## Planned design
|
||||||

|

|
||||||
|
@@ -5,7 +5,7 @@
|
|||||||
- [x] Add SPDX-license-identifier tags
|
- [x] Add SPDX-license-identifier tags
|
||||||
- [x] Format C-code in kernel style
|
- [x] Format C-code in kernel style
|
||||||
- [x] Use existing funcionality to reuse maps by using BTF-defined maps
|
- [x] Use existing funcionality to reuse maps by using BTF-defined maps
|
||||||
- [ ] Use BTF-defined maps for TC-BPF as well if iproute has libbpf support
|
- [x] Use BTF-defined maps for TC-BPF as well if iproute has libbpf support
|
||||||
|
|
||||||
## Future
|
## Future
|
||||||
- [ ] Use libxdp to load XDP program
|
- [ ] Use libxdp to load XDP program
|
||||||
|
@@ -3,8 +3,8 @@
|
|||||||
# Author: Jesper Dangaaard Brouer <netoptimizer@brouer.com>
|
# Author: Jesper Dangaaard Brouer <netoptimizer@brouer.com>
|
||||||
# License: GPLv2
|
# License: GPLv2
|
||||||
#
|
#
|
||||||
# Extended by Simon Sundberg <simon.sundberg@kau.se> to add support
|
# Modified by Simon Sundberg <simon.sundberg@kau.se> to add support
|
||||||
# of optional section (--sec) option
|
# of optional section (--sec) option and changed default BPF_OBJ
|
||||||
#
|
#
|
||||||
basedir=`dirname $0`
|
basedir=`dirname $0`
|
||||||
source ${basedir}/functions.sh
|
source ${basedir}/functions.sh
|
||||||
|
@@ -4,6 +4,10 @@
|
|||||||
#
|
#
|
||||||
# Author: Jesper Dangaaard Brouer <netoptimizer@brouer.com>
|
# Author: Jesper Dangaaard Brouer <netoptimizer@brouer.com>
|
||||||
# License: GPLv2
|
# License: GPLv2
|
||||||
|
#
|
||||||
|
# Modified by Simon Sundberg <simon.sundberg@kau.se> to add support
|
||||||
|
# of optional section (--sec) option
|
||||||
|
#
|
||||||
|
|
||||||
function usage() {
|
function usage() {
|
||||||
echo ""
|
echo ""
|
||||||
|
152
pping/pping.c
152
pping/pping.c
@@ -3,7 +3,6 @@
|
|||||||
#include <bpf/libbpf.h>
|
#include <bpf/libbpf.h>
|
||||||
#include <linux/if_link.h>
|
#include <linux/if_link.h>
|
||||||
#include <net/if.h> // For if_nametoindex
|
#include <net/if.h> // For if_nametoindex
|
||||||
//#include <linux/err.h> // For IS_ERR_OR_NULL macro // use libbpf_get_error instead
|
|
||||||
#include <arpa/inet.h> // For inet_ntoa and ntohs
|
#include <arpa/inet.h> // For inet_ntoa and ntohs
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@@ -14,28 +13,31 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <signal.h> // For detecting Ctrl-C
|
#include <signal.h> // For detecting Ctrl-C
|
||||||
#include <sys/resource.h> // For setting rlmit
|
#include <sys/resource.h> // For setting rlmit
|
||||||
|
#include <sys/wait.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
|
|
||||||
#include "pping.h" //key and value structs for the ts_start map
|
#include "pping.h" //key and value structs for the ts_start map
|
||||||
|
|
||||||
#define BILLION 1000000000UL
|
#define NS_PER_SECOND 1000000000UL
|
||||||
#define MILLION 1000000UL
|
#define NS_PER_MS 1000000UL
|
||||||
|
|
||||||
#define TCBPF_LOADER_SCRIPT "./bpf_egress_loader.sh"
|
#define TCBPF_LOADER_SCRIPT "./bpf_egress_loader.sh"
|
||||||
#define PINNED_DIR "/sys/fs/bpf/tc/globals"
|
#define PINNED_DIR "/sys/fs/bpf/tc/globals"
|
||||||
#define PPING_XDP_OBJ "pping_kern_xdp.o"
|
#define PPING_XDP_OBJ "pping_kern_xdp.o"
|
||||||
#define XDP_PROG_SEC "xdp"
|
|
||||||
#define PPING_TCBPF_OBJ "pping_kern_tc.o"
|
#define PPING_TCBPF_OBJ "pping_kern_tc.o"
|
||||||
#define TCBPF_PROG_SEC "pping_egress"
|
|
||||||
#define XDP_FLAGS XDP_FLAGS_UPDATE_IF_NOEXIST
|
#define XDP_FLAGS XDP_FLAGS_UPDATE_IF_NOEXIST
|
||||||
#define MAP_NAME "ts_start"
|
|
||||||
#define MAP_CLEANUP_INTERVAL 1 * BILLION // Clean timestamp map once per second
|
#define TS_MAP "ts_start"
|
||||||
#define PERF_BUFFER_NAME "rtt_events"
|
#define MAP_CLEANUP_INTERVAL 1 * NS_PER_SECOND // Clean timestamp map once per second
|
||||||
|
#define TIMESTAMP_LIFETIME 10 * NS_PER_SECOND // Clear out entries from ts_start if they're over 10 seconds
|
||||||
|
|
||||||
|
#define PERF_BUFFER "rtt_events"
|
||||||
#define PERF_BUFFER_PAGES 64 // Related to the perf-buffer size?
|
#define PERF_BUFFER_PAGES 64 // Related to the perf-buffer size?
|
||||||
#define PERF_POLL_TIMEOUT_MS 100
|
#define PERF_POLL_TIMEOUT_MS 100
|
||||||
#define RMEMLIM 512UL << 20 /* 512 MBs */
|
|
||||||
#define MAX_COMMAND_LEN 1024
|
|
||||||
#define MAX_PATH_LEN 1024
|
#define MAX_PATH_LEN 1024
|
||||||
#define TIMESTAMP_LIFETIME 10 * BILLION // Clear out entries from ts_start if they're over 10 seconds
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BPF implementation of pping using libbpf
|
* BPF implementation of pping using libbpf
|
||||||
@@ -50,6 +52,7 @@
|
|||||||
* (together with the related flow) and printed out
|
* (together with the related flow) and printed out
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// Structure to contain arguments for clean_map (for passing to pthread_create)
|
||||||
struct map_cleanup_args {
|
struct map_cleanup_args {
|
||||||
int map_fd;
|
int map_fd;
|
||||||
__u64 max_age_ns;
|
__u64 max_age_ns;
|
||||||
@@ -97,6 +100,7 @@ static int xdp_attach(struct bpf_object *obj, const char *sec, int ifindex,
|
|||||||
prog = bpf_object__find_program_by_title(obj, sec);
|
prog = bpf_object__find_program_by_title(obj, sec);
|
||||||
else
|
else
|
||||||
prog = bpf_program__next(NULL, obj);
|
prog = bpf_program__next(NULL, obj);
|
||||||
|
|
||||||
prog_fd = bpf_program__fd(prog);
|
prog_fd = bpf_program__fd(prog);
|
||||||
if (prog_fd < 0) {
|
if (prog_fd < 0) {
|
||||||
fprintf(stderr, "Could not find program to attach\n");
|
fprintf(stderr, "Could not find program to attach\n");
|
||||||
@@ -105,6 +109,7 @@ static int xdp_attach(struct bpf_object *obj, const char *sec, int ifindex,
|
|||||||
|
|
||||||
if (force) // detach current (if any) xdp-program first
|
if (force) // detach current (if any) xdp-program first
|
||||||
xdp_detach(ifindex, xdp_flags);
|
xdp_detach(ifindex, xdp_flags);
|
||||||
|
|
||||||
err = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
|
err = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
fprintf(stderr, "Failed loading xdp-program on interface %d\n",
|
fprintf(stderr, "Failed loading xdp-program on interface %d\n",
|
||||||
@@ -114,21 +119,76 @@ static int xdp_attach(struct bpf_object *obj, const char *sec, int ifindex,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int tc_bpf_load(const char *bpf_object, const char *section,
|
||||||
|
const char *interface)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
int ret = -1;
|
||||||
|
|
||||||
|
pid_t pid = fork();
|
||||||
|
|
||||||
|
if (pid < 0)
|
||||||
|
return -errno;
|
||||||
|
if (pid == 0) {
|
||||||
|
execl(TCBPF_LOADER_SCRIPT, TCBPF_LOADER_SCRIPT,
|
||||||
|
"--dev", interface, "--obj", bpf_object,
|
||||||
|
"--sec", section, NULL);
|
||||||
|
return -errno;
|
||||||
|
}
|
||||||
|
else { //pid > 0
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
if (WIFEXITED(status))
|
||||||
|
ret = WEXITSTATUS(status);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int tc_bpf_clear(const char *interface)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
int ret = -1;
|
||||||
|
|
||||||
|
pid_t pid = fork();
|
||||||
|
|
||||||
|
if (pid < 0)
|
||||||
|
return -errno;
|
||||||
|
if (pid == 0) {
|
||||||
|
execl(TCBPF_LOADER_SCRIPT, TCBPF_LOADER_SCRIPT,
|
||||||
|
"--dev", interface, "--remove", NULL);
|
||||||
|
return -errno;
|
||||||
|
}
|
||||||
|
else { //pid > 0
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
if (WIFEXITED(status))
|
||||||
|
ret = WEXITSTATUS(status);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns time of CLOCK_MONOTONIC as nanoseconds in a single __u64.
|
||||||
|
* On failure, the value 0 is returned (and errno will be set).
|
||||||
|
*/
|
||||||
static __u64 get_time_ns(clockid_t clockid)
|
static __u64 get_time_ns(clockid_t clockid)
|
||||||
{
|
{
|
||||||
struct timespec t;
|
struct timespec t;
|
||||||
if (clock_gettime(clockid, &t) != 0) // CLOCK_BOOTTIME if using bpf_get_ktime_boot_ns
|
if (clock_gettime(clockid, &t) != 0) // CLOCK_BOOTTIME if using bpf_get_ktime_boot_ns
|
||||||
return 0;
|
return 0;
|
||||||
return (__u64)t.tv_sec * BILLION + (__u64)t.tv_nsec;
|
|
||||||
|
return (__u64)t.tv_sec * NS_PER_SECOND + (__u64)t.tv_nsec;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int remove_old_entries_from_map(int map_fd, __u64 max_age)
|
static int clean_map(int map_fd, __u64 max_age)
|
||||||
{
|
{
|
||||||
int removed = 0, entries = 0;
|
int removed = 0;
|
||||||
struct ts_key key, prev_key = { 0 };
|
struct ts_key key, prev_key = { 0 };
|
||||||
struct ts_timestamp value;
|
struct ts_timestamp value;
|
||||||
bool delete_prev = false;
|
bool delete_prev = false;
|
||||||
__u64 now_nsec = get_time_ns(CLOCK_MONOTONIC);
|
__u64 now_nsec = get_time_ns(CLOCK_MONOTONIC);
|
||||||
|
|
||||||
|
int entries = 0; // Just for debug
|
||||||
|
__u64 duration; // Just for debug
|
||||||
|
|
||||||
if (now_nsec == 0)
|
if (now_nsec == 0)
|
||||||
return -errno;
|
return -errno;
|
||||||
|
|
||||||
@@ -153,9 +213,9 @@ static int remove_old_entries_from_map(int map_fd, __u64 max_age)
|
|||||||
bpf_map_delete_elem(map_fd, &prev_key);
|
bpf_map_delete_elem(map_fd, &prev_key);
|
||||||
removed++;
|
removed++;
|
||||||
}
|
}
|
||||||
__u64 duration = get_time_ns(CLOCK_MONOTONIC) - now_nsec;
|
duration = get_time_ns(CLOCK_MONOTONIC) - now_nsec;
|
||||||
printf("Gone through %d entries and removed %d of them in %llu.%09llu s\n",
|
printf("Gone through %d entries and removed %d of them in %llu.%09llu s\n",
|
||||||
entries, removed, duration / BILLION, duration % BILLION);
|
entries, removed, duration / NS_PER_SECOND, duration % NS_PER_SECOND);
|
||||||
return removed;
|
return removed;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -163,10 +223,11 @@ static void *periodic_map_cleanup(void *args)
|
|||||||
{
|
{
|
||||||
struct map_cleanup_args *argp = args;
|
struct map_cleanup_args *argp = args;
|
||||||
struct timespec interval;
|
struct timespec interval;
|
||||||
interval.tv_sec = MAP_CLEANUP_INTERVAL / BILLION;
|
interval.tv_sec = MAP_CLEANUP_INTERVAL / NS_PER_SECOND;
|
||||||
interval.tv_nsec = MAP_CLEANUP_INTERVAL % BILLION;
|
interval.tv_nsec = MAP_CLEANUP_INTERVAL % NS_PER_SECOND;
|
||||||
|
|
||||||
while (keep_running) {
|
while (keep_running) {
|
||||||
remove_old_entries_from_map(argp->map_fd, argp->max_age_ns);
|
clean_map(argp->map_fd, argp->max_age_ns);
|
||||||
nanosleep(&interval, NULL);
|
nanosleep(&interval, NULL);
|
||||||
}
|
}
|
||||||
pthread_exit(NULL);
|
pthread_exit(NULL);
|
||||||
@@ -178,9 +239,10 @@ static void handle_rtt_event(void *ctx, int cpu, void *data, __u32 data_size)
|
|||||||
struct in_addr saddr, daddr;
|
struct in_addr saddr, daddr;
|
||||||
saddr.s_addr = e->flow.saddr;
|
saddr.s_addr = e->flow.saddr;
|
||||||
daddr.s_addr = e->flow.daddr;
|
daddr.s_addr = e->flow.daddr;
|
||||||
|
|
||||||
// inet_ntoa is deprecated, will switch to inet_ntop when adding IPv6 support
|
// inet_ntoa is deprecated, will switch to inet_ntop when adding IPv6 support
|
||||||
printf("%llu.%06llu ms %s:%d+", e->rtt / MILLION,
|
printf("%llu.%06llu ms %s:%d+", e->rtt / NS_PER_MS,
|
||||||
e->rtt % MILLION, inet_ntoa(daddr), ntohs(e->flow.dport));
|
e->rtt % NS_PER_MS, inet_ntoa(daddr), ntohs(e->flow.dport));
|
||||||
printf("%s:%d\n", inet_ntoa(saddr), ntohs(e->flow.sport));
|
printf("%s:%d\n", inet_ntoa(saddr), ntohs(e->flow.sport));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -191,17 +253,10 @@ static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt)
|
|||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
if (argc < 2) {
|
|
||||||
printf("Usage: ./pping_user <dev>\n");
|
|
||||||
return EXIT_FAILURE;
|
|
||||||
}
|
|
||||||
|
|
||||||
int err = 0;
|
int err = 0;
|
||||||
int ifindex = 0;
|
int ifindex = 0;
|
||||||
bool xdp_attached = false;
|
bool xdp_attached = false;
|
||||||
bool tc_attached = false;
|
bool tc_attached = false;
|
||||||
|
|
||||||
char tc_cmd[MAX_COMMAND_LEN];
|
|
||||||
char map_path[MAX_PATH_LEN];
|
char map_path[MAX_PATH_LEN];
|
||||||
|
|
||||||
struct bpf_object *obj = NULL;
|
struct bpf_object *obj = NULL;
|
||||||
@@ -213,12 +268,17 @@ int main(int argc, char *argv[])
|
|||||||
struct perf_buffer *pb = NULL;
|
struct perf_buffer *pb = NULL;
|
||||||
struct perf_buffer_opts pb_opts;
|
struct perf_buffer_opts pb_opts;
|
||||||
|
|
||||||
|
// TODO - better argument parsing (more relevant as featureas are added)
|
||||||
|
if (argc < 2) {
|
||||||
|
printf("Usage: ./pping_user <dev>\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
// Increase rlimit
|
// Increase rlimit
|
||||||
err = set_rlimit(RMEMLIM);
|
err = set_rlimit(RLIM_INFINITY);
|
||||||
if (err) {
|
if (err) {
|
||||||
fprintf(stderr, "Could not set rlimit to %ld bytes: %s\n",
|
fprintf(stderr, "Could not set rlimit to infinity: %s\n",
|
||||||
RMEMLIM, strerror(-err));
|
strerror(-err));
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -240,15 +300,14 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get map here to allow for unpinning at cleanup
|
// Get map here to allow for unpinning at cleanup
|
||||||
map = bpf_object__find_map_by_name(obj, MAP_NAME);
|
map = bpf_object__find_map_by_name(obj, TS_MAP);
|
||||||
err = libbpf_get_error(map);
|
err = libbpf_get_error(map);
|
||||||
if (err) {
|
if (err) {
|
||||||
fprintf(stderr, "Could not find map %s in %s: %s\n",
|
fprintf(stderr, "Could not find map %s in %s: %s\n",
|
||||||
MAP_NAME, PPING_XDP_OBJ, strerror(err));
|
TS_MAP, PPING_XDP_OBJ, strerror(err));
|
||||||
map = NULL;
|
map = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
err = bpf_object__load(obj);
|
err = bpf_object__load(obj);
|
||||||
if (err) {
|
if (err) {
|
||||||
fprintf(stderr, "Failed loading XDP program: %s\n",
|
fprintf(stderr, "Failed loading XDP program: %s\n",
|
||||||
@@ -264,27 +323,26 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
xdp_attached = true;
|
xdp_attached = true;
|
||||||
|
|
||||||
//Load tc-bpf section on interface egress
|
// Load tc-bpf section on interface egress
|
||||||
snprintf(tc_cmd, MAX_COMMAND_LEN, "%s --dev %s --obj %s --sec %s",
|
err = tc_bpf_load(PPING_TCBPF_OBJ, TCBPF_PROG_SEC, argv[1]);
|
||||||
TCBPF_LOADER_SCRIPT, argv[1], PPING_TCBPF_OBJ, TCBPF_PROG_SEC);
|
|
||||||
err = system(tc_cmd);
|
|
||||||
if (err) {
|
if (err) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"Could not load section %s of %s on interface %s: %s\n",
|
"Could not load section %s of %s on interface %s: %s\n",
|
||||||
TCBPF_PROG_SEC, PPING_TCBPF_OBJ, argv[1],
|
TCBPF_PROG_SEC, PPING_TCBPF_OBJ, argv[1],
|
||||||
strerror(err));
|
strerror(-err));
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
tc_attached = true;
|
tc_attached = true;
|
||||||
|
|
||||||
// Set up the periodical map cleaning
|
// Set up the periodical map cleaning
|
||||||
|
clean_args.max_age_ns = TIMESTAMP_LIFETIME;
|
||||||
clean_args.map_fd = bpf_map__fd(map);
|
clean_args.map_fd = bpf_map__fd(map);
|
||||||
if (clean_args.map_fd < 0) {
|
if (clean_args.map_fd < 0) {
|
||||||
fprintf(stderr, "Could not get file descriptor of map %s in object %s: %s\n",
|
fprintf(stderr, "Could not get file descriptor of map %s in object %s: %s\n",
|
||||||
MAP_NAME, PPING_XDP_OBJ, strerror(-clean_args.map_fd));
|
TS_MAP, PPING_XDP_OBJ, strerror(-clean_args.map_fd));
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
clean_args.max_age_ns = TIMESTAMP_LIFETIME;
|
|
||||||
err = pthread_create(&tid, NULL, periodic_map_cleanup, &clean_args);
|
err = pthread_create(&tid, NULL, periodic_map_cleanup, &clean_args);
|
||||||
if (err) {
|
if (err) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
@@ -298,13 +356,13 @@ int main(int argc, char *argv[])
|
|||||||
pb_opts.lost_cb = handle_missed_rtt_event;
|
pb_opts.lost_cb = handle_missed_rtt_event;
|
||||||
|
|
||||||
pb = perf_buffer__new(bpf_object__find_map_fd_by_name(obj,
|
pb = perf_buffer__new(bpf_object__find_map_fd_by_name(obj,
|
||||||
PERF_BUFFER_NAME),
|
PERF_BUFFER),
|
||||||
PERF_BUFFER_PAGES, &pb_opts);
|
PERF_BUFFER_PAGES, &pb_opts);
|
||||||
err = libbpf_get_error(pb);
|
err = libbpf_get_error(pb);
|
||||||
if (err) {
|
if (err) {
|
||||||
pb = NULL;
|
pb = NULL;
|
||||||
fprintf(stderr, "Failed to open perf buffer %s: %s\n",
|
fprintf(stderr, "Failed to open perf buffer %s: %s\n",
|
||||||
PERF_BUFFER_NAME, strerror(err));
|
PERF_BUFFER, strerror(err));
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -326,7 +384,7 @@ cleanup:
|
|||||||
perf_buffer__free(pb);
|
perf_buffer__free(pb);
|
||||||
if (map && bpf_map__is_pinned(map)) {
|
if (map && bpf_map__is_pinned(map)) {
|
||||||
snprintf(map_path, sizeof(map_path), "%s/%s",
|
snprintf(map_path, sizeof(map_path), "%s/%s",
|
||||||
PINNED_DIR, MAP_NAME);
|
PINNED_DIR, TS_MAP);
|
||||||
err = bpf_map__unpin(map, map_path);
|
err = bpf_map__unpin(map, map_path);
|
||||||
if (err) {
|
if (err) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
@@ -343,13 +401,11 @@ cleanup:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (tc_attached) {
|
if (tc_attached) {
|
||||||
snprintf(tc_cmd, MAX_COMMAND_LEN, "%s --dev %s --remove",
|
err = tc_bpf_clear(argv[1]); //system(tc_cmd);
|
||||||
TCBPF_LOADER_SCRIPT, argv[1]);
|
|
||||||
err = system(tc_cmd);
|
|
||||||
if (err) {
|
if (err) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"Failed removing tc-bpf program from interface %s: %s\n",
|
"Failed removing tc-bpf program from interface %s: %s\n",
|
||||||
argv[1], strerror(err));
|
argv[1], strerror(-err));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -1,8 +1,13 @@
|
|||||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||||
#ifndef TIMESTAMP_MAP_H
|
#ifndef PPING_H
|
||||||
#define TIMESTAMP_MAP_H
|
#define PPING_H
|
||||||
|
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
#define XDP_PROG_SEC "xdp"
|
||||||
|
#define TCBPF_PROG_SEC "pping_egress"
|
||||||
|
|
||||||
|
// TODO - change to support both IPv4 and IPv6 (IPv4 addresses can be mapped to IPv6 addresses)
|
||||||
struct ipv4_flow {
|
struct ipv4_flow {
|
||||||
__u32 saddr;
|
__u32 saddr;
|
||||||
__u32 daddr;
|
__u32 daddr;
|
||||||
@@ -17,9 +22,7 @@ struct ts_key {
|
|||||||
|
|
||||||
struct ts_timestamp {
|
struct ts_timestamp {
|
||||||
__u64 timestamp;
|
__u64 timestamp;
|
||||||
//__u64 ttl; // Delete entry after ttl, allows more dynamic map cleaning where entries for flows with short RTTs can be removed earlier
|
|
||||||
__u8 used;
|
__u8 used;
|
||||||
// __u8 pad[7]; // Need to pad it due to compiler optimization, see "Remove struct padding with aligning members by using #pragma pack." at https://docs.cilium.io/en/v1.9/bpf/
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct rtt_event {
|
struct rtt_event {
|
||||||
|
@@ -3,6 +3,8 @@
|
|||||||
#define PPING_HELPERS_H
|
#define PPING_HELPERS_H
|
||||||
|
|
||||||
#include "pping.h"
|
#include "pping.h"
|
||||||
|
#include <linux/tcp.h>
|
||||||
|
|
||||||
#define MAX_TCP_OPTIONS 10
|
#define MAX_TCP_OPTIONS 10
|
||||||
|
|
||||||
static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr,
|
static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr,
|
||||||
@@ -14,53 +16,57 @@ static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr,
|
|||||||
flow->dport = dport;
|
flow->dport = dport;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Parses the TSval and TSecr values from the TCP options field. If sucessful
|
* Parses the TSval and TSecr values from the TCP options field. If sucessful
|
||||||
* the TSval and TSecr values will be stored at tsval and tsecr (in network
|
* the TSval and TSecr values will be stored at tsval and tsecr (in network
|
||||||
* byte order).
|
* byte order).
|
||||||
* Returns 0 if sucessful and -1 on failure
|
* Returns 0 if sucessful and -1 on failure
|
||||||
*/
|
*/
|
||||||
static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end,
|
static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end,
|
||||||
__u32 *tsval, __u32 *tsecr)
|
__u32 *tsval, __u32 *tsecr)
|
||||||
{
|
{
|
||||||
if (tcph + 1 > data_end)
|
int len = tcph->doff << 2;
|
||||||
return -1;
|
void *opt_end = (void *)tcph + len;
|
||||||
int len = tcph->doff << 2;
|
__u8 *pos = (__u8 *)(tcph + 1); //Current pos in TCP options
|
||||||
if (len <= sizeof(struct tcphdr)) // No TCP options
|
|
||||||
return -1;
|
|
||||||
void *pos = (void *)(tcph + 1);
|
|
||||||
void *opt_end = ((void *)tcph + len);
|
|
||||||
__u8 i, opt, opt_size;
|
__u8 i, opt, opt_size;
|
||||||
#pragma unroll
|
|
||||||
for (i = 0; i < MAX_TCP_OPTIONS; i++) {
|
|
||||||
if (pos + 1 > opt_end || pos + 1 > data_end)
|
|
||||||
return -1;
|
|
||||||
opt = *(__u8 *)pos; // Save value to avoid future data_end comparisons
|
|
||||||
if (opt == 0) // Reached end of TCP options
|
|
||||||
return -1;
|
|
||||||
if (opt == 1) { // TCP NOP option - advance one byte
|
|
||||||
pos++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Option > 1, should have option size
|
|
||||||
if (pos + 2 > opt_end || pos + 2 > data_end)
|
|
||||||
return -1;
|
|
||||||
opt_size = *(__u8 *)(pos + 1); // Save value to avoid future data_end comparisons
|
|
||||||
|
|
||||||
// Option-kind is TCP timestap (yey!)
|
if (tcph + 1 > data_end || len <= sizeof(struct tcphdr))
|
||||||
if (opt == 8 && opt_size == 10) {
|
return -1;
|
||||||
if (pos + opt_size > opt_end ||
|
|
||||||
pos + opt_size > data_end)
|
|
||||||
return -1;
|
|
||||||
*tsval = *(__u32 *)(pos + 2);
|
|
||||||
*tsecr = *(__u32 *)(pos + 6);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Some other TCP option - advance option-length bytes
|
for (i = 0; i < MAX_TCP_OPTIONS; i++) {
|
||||||
pos += opt_size;
|
if (pos + 1 > opt_end || pos + 1 > data_end)
|
||||||
}
|
return -1;
|
||||||
return -1;
|
|
||||||
|
opt = *pos;
|
||||||
|
if (opt == 0) // Reached end of TCP options
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (opt == 1) { // TCP NOP option - advance one byte
|
||||||
|
pos++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Option > 1, should have option size
|
||||||
|
if (pos + 2 > opt_end || pos + 2 > data_end)
|
||||||
|
return -1;
|
||||||
|
opt_size = *(pos + 1);
|
||||||
|
|
||||||
|
// Option-kind is TCP timestap (yey!)
|
||||||
|
if (opt == 8 && opt_size == 10) {
|
||||||
|
if (pos + opt_size > opt_end ||
|
||||||
|
pos + opt_size > data_end)
|
||||||
|
return -1;
|
||||||
|
*tsval = *(__u32 *)(pos + 2);
|
||||||
|
*tsecr = *(__u32 *)(pos + 6);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some other TCP option - advance option-length bytes
|
||||||
|
pos += opt_size;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -36,14 +36,12 @@ struct bpf_elf_map SEC("maps") ts_start = {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// TC-BFP for parsing TSVAL from egress traffic and add to map
|
// TC-BFP for parsing TSVAL from egress traffic and add to map
|
||||||
SEC("pping_egress")
|
SEC(TCBPF_PROG_SEC)
|
||||||
int tc_bpf_prog_egress(struct __sk_buff *skb)
|
int tc_bpf_prog_egress(struct __sk_buff *skb)
|
||||||
{
|
{
|
||||||
void *data = (void *)(long)skb->data;
|
void *data = (void *)(long)skb->data;
|
||||||
void *data_end = (void *)(long)skb->data_end;
|
void *data_end = (void *)(long)skb->data_end;
|
||||||
|
|
||||||
//bpf_printk("Sent packet of size %d bytes\n", data_end - data);
|
|
||||||
|
|
||||||
int proto = -1;
|
int proto = -1;
|
||||||
struct hdr_cursor nh = { .pos = data };
|
struct hdr_cursor nh = { .pos = data };
|
||||||
struct ethhdr *eth;
|
struct ethhdr *eth;
|
||||||
@@ -60,13 +58,11 @@ int tc_bpf_prog_egress(struct __sk_buff *skb)
|
|||||||
if (proto < 0)
|
if (proto < 0)
|
||||||
goto end;
|
goto end;
|
||||||
|
|
||||||
//bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos);
|
|
||||||
|
|
||||||
__u32 tsval, tsecr;
|
__u32 tsval, tsecr;
|
||||||
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
|
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
|
||||||
goto end;
|
goto end;
|
||||||
|
|
||||||
// We have a TCP timestamp, try adding it to the map
|
// We have a TCP timestamp, try adding it to the map
|
||||||
//bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr));
|
|
||||||
struct ts_key key;
|
struct ts_key key;
|
||||||
fill_ipv4_flow(&(key.flow), iph->saddr, iph->daddr,
|
fill_ipv4_flow(&(key.flow), iph->saddr, iph->daddr,
|
||||||
tcph->source, tcph->dest);
|
tcph->source, tcph->dest);
|
||||||
|
@@ -30,18 +30,18 @@ struct {
|
|||||||
} rtt_events SEC(".maps");
|
} rtt_events SEC(".maps");
|
||||||
|
|
||||||
// XDP program for parsing TSECR-val from ingress traffic and check for match in map
|
// XDP program for parsing TSECR-val from ingress traffic and check for match in map
|
||||||
SEC("xdp")
|
SEC(XDP_PROG_SEC)
|
||||||
int xdp_prog_ingress(struct xdp_md *ctx)
|
int xdp_prog_ingress(struct xdp_md *ctx)
|
||||||
{
|
{
|
||||||
void *data = (void *)(long)ctx->data;
|
void *data = (void *)(long)ctx->data;
|
||||||
void *data_end = (void *)(long)ctx->data_end;
|
void *data_end = (void *)(long)ctx->data_end;
|
||||||
|
|
||||||
int proto = -1;
|
int proto = -1;
|
||||||
struct hdr_cursor nh = { .pos = data };
|
struct hdr_cursor nh = { .pos = data };
|
||||||
struct ethhdr *eth;
|
struct ethhdr *eth;
|
||||||
struct iphdr *iph;
|
struct iphdr *iph;
|
||||||
struct tcphdr *tcph;
|
struct tcphdr *tcph;
|
||||||
|
|
||||||
//bpf_printk("Received packet of length %d\n", (int)(data_end - data));
|
|
||||||
proto = parse_ethhdr(&nh, data_end, ð);
|
proto = parse_ethhdr(&nh, data_end, ð);
|
||||||
if (bpf_ntohs(proto) != ETH_P_IP)
|
if (bpf_ntohs(proto) != ETH_P_IP)
|
||||||
goto end;
|
goto end;
|
||||||
@@ -52,20 +52,18 @@ int xdp_prog_ingress(struct xdp_md *ctx)
|
|||||||
if (proto < 0)
|
if (proto < 0)
|
||||||
goto end;
|
goto end;
|
||||||
|
|
||||||
//bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos);
|
|
||||||
|
|
||||||
__u32 tsval, tsecr;
|
__u32 tsval, tsecr;
|
||||||
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
|
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
|
||||||
goto end;
|
goto end;
|
||||||
|
|
||||||
// We have a TCP-timestamp - now we can check if it's in the map
|
// We have a TCP-timestamp - now we can check if it's in the map
|
||||||
//bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr));
|
|
||||||
struct ts_key key;
|
struct ts_key key;
|
||||||
// Fill in reverse order of egress (dest <--> source)
|
// Fill in reverse order of egress (dest <--> source)
|
||||||
fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr,
|
fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr,
|
||||||
tcph->dest, tcph->source);
|
tcph->dest, tcph->source);
|
||||||
key.tsval = tsecr;
|
key.tsval = tsecr;
|
||||||
struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key);
|
struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key);
|
||||||
|
|
||||||
// Only calculate RTT for first packet with matching TSecr
|
// Only calculate RTT for first packet with matching TSecr
|
||||||
if (ts && ts->used == 0) {
|
if (ts && ts->used == 0) {
|
||||||
/*
|
/*
|
||||||
@@ -81,8 +79,8 @@ int xdp_prog_ingress(struct xdp_md *ctx)
|
|||||||
event.rtt = bpf_ktime_get_ns() - ts->timestamp;
|
event.rtt = bpf_ktime_get_ns() - ts->timestamp;
|
||||||
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU,
|
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU,
|
||||||
&event, sizeof(event));
|
&event, sizeof(event));
|
||||||
//bpf_printk("Pushed rtt event with RTT: %llu\n", event.rtt);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
end:
|
end:
|
||||||
return XDP_PASS;
|
return XDP_PASS;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user