mirror of
https://github.com/xdp-project/bpf-examples.git
synced 2024-05-06 15:54:53 +00:00
pping: Various minor fixes
Perform various fixes and tweaks: - Rename several defines to make them more informative - Remove unrolling of loop in BPF programs - Reuse defines for program sections between userspace and kernel space programs - Perform fork+exec to run bpf_egress_loader script instead of system() - Add comment to copied scripts indicating I've modified them - Add pping.h and pping_helpers.h as dependencies in Makefile Also, add a brief description of what PPing is and how it works to README Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
@@ -6,7 +6,7 @@ BPF_TARGETS := pping_kern_xdp
|
||||
BPF_TARGETS += $(TC_BPF_TARGETS)
|
||||
|
||||
LDFLAGS += -pthread
|
||||
EXTRA_DEPS += config.mk
|
||||
EXTRA_DEPS += config.mk pping.h pping_helpers.h
|
||||
|
||||
LIB_DIR = ../lib
|
||||
|
||||
|
@@ -1,5 +1,19 @@
|
||||
# PPing using XDP and TC-BPF
|
||||
An implementation of the passive ping ([pping](https://github.com/pollere/pping)) utility based on XDP (for ingress) and TC-BPF (for outgress)
|
||||
An implementation of the passive ping ([pping](https://github.com/pollere/pping)) utility based on XDP (for ingress) and TC-BPF (for egress)
|
||||
|
||||
## Simple description
|
||||
Passive Ping (PPing) makes use of the TCP Timestamp option to calculate the RTT for TCP traffic passing through.
|
||||
PPing can be used on measure RTTs on end hosts or any device which sees both directions of the TCP flow.
|
||||
|
||||
For outgoing packets, it checks for TCP timestamp TSval in the TCP header. If it finds one it creates a timestamp
|
||||
for when it saw that TSval in a particular flow. On incomming packets it parses the TCP timestamp TSecr (which
|
||||
is the TSval echoed by the receiving host) and checks it has seen any previous outgoing packets with that TCP
|
||||
timestamp. If it has, an RTT is calculated as the difference in time between when it saw an outgoing packet
|
||||
with a TSval, and when it received an incomming packet from the reverse flow with a matching TSecr.
|
||||
|
||||
Note that TCP timestamps may not be unique for every packet in a flow, therefore it only matches the first
|
||||
outgoing packet with a particular TSval with the first incomming packet with a matching TSecr. Duplicate
|
||||
TSval/TSecr are ignored.
|
||||
|
||||
## Planned design
|
||||

|
||||
|
@@ -5,7 +5,7 @@
|
||||
- [x] Add SPDX-license-identifier tags
|
||||
- [x] Format C-code in kernel style
|
||||
- [x] Use existing funcionality to reuse maps by using BTF-defined maps
|
||||
- [ ] Use BTF-defined maps for TC-BPF as well if iproute has libbpf support
|
||||
- [x] Use BTF-defined maps for TC-BPF as well if iproute has libbpf support
|
||||
|
||||
## Future
|
||||
- [ ] Use libxdp to load XDP program
|
||||
|
@@ -3,8 +3,8 @@
|
||||
# Author: Jesper Dangaaard Brouer <netoptimizer@brouer.com>
|
||||
# License: GPLv2
|
||||
#
|
||||
# Extended by Simon Sundberg <simon.sundberg@kau.se> to add support
|
||||
# of optional section (--sec) option
|
||||
# Modified by Simon Sundberg <simon.sundberg@kau.se> to add support
|
||||
# of optional section (--sec) option and changed default BPF_OBJ
|
||||
#
|
||||
basedir=`dirname $0`
|
||||
source ${basedir}/functions.sh
|
||||
|
@@ -4,6 +4,10 @@
|
||||
#
|
||||
# Author: Jesper Dangaaard Brouer <netoptimizer@brouer.com>
|
||||
# License: GPLv2
|
||||
#
|
||||
# Modified by Simon Sundberg <simon.sundberg@kau.se> to add support
|
||||
# of optional section (--sec) option
|
||||
#
|
||||
|
||||
function usage() {
|
||||
echo ""
|
||||
|
152
pping/pping.c
152
pping/pping.c
@@ -3,7 +3,6 @@
|
||||
#include <bpf/libbpf.h>
|
||||
#include <linux/if_link.h>
|
||||
#include <net/if.h> // For if_nametoindex
|
||||
//#include <linux/err.h> // For IS_ERR_OR_NULL macro // use libbpf_get_error instead
|
||||
#include <arpa/inet.h> // For inet_ntoa and ntohs
|
||||
|
||||
#include <stdio.h>
|
||||
@@ -14,28 +13,31 @@
|
||||
#include <stdbool.h>
|
||||
#include <signal.h> // For detecting Ctrl-C
|
||||
#include <sys/resource.h> // For setting rlmit
|
||||
#include <sys/wait.h>
|
||||
#include <time.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "pping.h" //key and value structs for the ts_start map
|
||||
|
||||
#define BILLION 1000000000UL
|
||||
#define MILLION 1000000UL
|
||||
#define NS_PER_SECOND 1000000000UL
|
||||
#define NS_PER_MS 1000000UL
|
||||
|
||||
#define TCBPF_LOADER_SCRIPT "./bpf_egress_loader.sh"
|
||||
#define PINNED_DIR "/sys/fs/bpf/tc/globals"
|
||||
#define PPING_XDP_OBJ "pping_kern_xdp.o"
|
||||
#define XDP_PROG_SEC "xdp"
|
||||
#define PPING_TCBPF_OBJ "pping_kern_tc.o"
|
||||
#define TCBPF_PROG_SEC "pping_egress"
|
||||
|
||||
#define XDP_FLAGS XDP_FLAGS_UPDATE_IF_NOEXIST
|
||||
#define MAP_NAME "ts_start"
|
||||
#define MAP_CLEANUP_INTERVAL 1 * BILLION // Clean timestamp map once per second
|
||||
#define PERF_BUFFER_NAME "rtt_events"
|
||||
|
||||
#define TS_MAP "ts_start"
|
||||
#define MAP_CLEANUP_INTERVAL 1 * NS_PER_SECOND // Clean timestamp map once per second
|
||||
#define TIMESTAMP_LIFETIME 10 * NS_PER_SECOND // Clear out entries from ts_start if they're over 10 seconds
|
||||
|
||||
#define PERF_BUFFER "rtt_events"
|
||||
#define PERF_BUFFER_PAGES 64 // Related to the perf-buffer size?
|
||||
#define PERF_POLL_TIMEOUT_MS 100
|
||||
#define RMEMLIM 512UL << 20 /* 512 MBs */
|
||||
#define MAX_COMMAND_LEN 1024
|
||||
|
||||
#define MAX_PATH_LEN 1024
|
||||
#define TIMESTAMP_LIFETIME 10 * BILLION // Clear out entries from ts_start if they're over 10 seconds
|
||||
|
||||
/*
|
||||
* BPF implementation of pping using libbpf
|
||||
@@ -50,6 +52,7 @@
|
||||
* (together with the related flow) and printed out
|
||||
*/
|
||||
|
||||
// Structure to contain arguments for clean_map (for passing to pthread_create)
|
||||
struct map_cleanup_args {
|
||||
int map_fd;
|
||||
__u64 max_age_ns;
|
||||
@@ -97,6 +100,7 @@ static int xdp_attach(struct bpf_object *obj, const char *sec, int ifindex,
|
||||
prog = bpf_object__find_program_by_title(obj, sec);
|
||||
else
|
||||
prog = bpf_program__next(NULL, obj);
|
||||
|
||||
prog_fd = bpf_program__fd(prog);
|
||||
if (prog_fd < 0) {
|
||||
fprintf(stderr, "Could not find program to attach\n");
|
||||
@@ -105,6 +109,7 @@ static int xdp_attach(struct bpf_object *obj, const char *sec, int ifindex,
|
||||
|
||||
if (force) // detach current (if any) xdp-program first
|
||||
xdp_detach(ifindex, xdp_flags);
|
||||
|
||||
err = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "Failed loading xdp-program on interface %d\n",
|
||||
@@ -114,21 +119,76 @@ static int xdp_attach(struct bpf_object *obj, const char *sec, int ifindex,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tc_bpf_load(const char *bpf_object, const char *section,
|
||||
const char *interface)
|
||||
{
|
||||
int status;
|
||||
int ret = -1;
|
||||
|
||||
pid_t pid = fork();
|
||||
|
||||
if (pid < 0)
|
||||
return -errno;
|
||||
if (pid == 0) {
|
||||
execl(TCBPF_LOADER_SCRIPT, TCBPF_LOADER_SCRIPT,
|
||||
"--dev", interface, "--obj", bpf_object,
|
||||
"--sec", section, NULL);
|
||||
return -errno;
|
||||
}
|
||||
else { //pid > 0
|
||||
waitpid(pid, &status, 0);
|
||||
if (WIFEXITED(status))
|
||||
ret = WEXITSTATUS(status);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
static int tc_bpf_clear(const char *interface)
|
||||
{
|
||||
int status;
|
||||
int ret = -1;
|
||||
|
||||
pid_t pid = fork();
|
||||
|
||||
if (pid < 0)
|
||||
return -errno;
|
||||
if (pid == 0) {
|
||||
execl(TCBPF_LOADER_SCRIPT, TCBPF_LOADER_SCRIPT,
|
||||
"--dev", interface, "--remove", NULL);
|
||||
return -errno;
|
||||
}
|
||||
else { //pid > 0
|
||||
waitpid(pid, &status, 0);
|
||||
if (WIFEXITED(status))
|
||||
ret = WEXITSTATUS(status);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns time of CLOCK_MONOTONIC as nanoseconds in a single __u64.
|
||||
* On failure, the value 0 is returned (and errno will be set).
|
||||
*/
|
||||
static __u64 get_time_ns(clockid_t clockid)
|
||||
{
|
||||
struct timespec t;
|
||||
if (clock_gettime(clockid, &t) != 0) // CLOCK_BOOTTIME if using bpf_get_ktime_boot_ns
|
||||
return 0;
|
||||
return (__u64)t.tv_sec * BILLION + (__u64)t.tv_nsec;
|
||||
|
||||
return (__u64)t.tv_sec * NS_PER_SECOND + (__u64)t.tv_nsec;
|
||||
}
|
||||
|
||||
static int remove_old_entries_from_map(int map_fd, __u64 max_age)
|
||||
static int clean_map(int map_fd, __u64 max_age)
|
||||
{
|
||||
int removed = 0, entries = 0;
|
||||
int removed = 0;
|
||||
struct ts_key key, prev_key = { 0 };
|
||||
struct ts_timestamp value;
|
||||
bool delete_prev = false;
|
||||
__u64 now_nsec = get_time_ns(CLOCK_MONOTONIC);
|
||||
|
||||
int entries = 0; // Just for debug
|
||||
__u64 duration; // Just for debug
|
||||
|
||||
if (now_nsec == 0)
|
||||
return -errno;
|
||||
|
||||
@@ -153,9 +213,9 @@ static int remove_old_entries_from_map(int map_fd, __u64 max_age)
|
||||
bpf_map_delete_elem(map_fd, &prev_key);
|
||||
removed++;
|
||||
}
|
||||
__u64 duration = get_time_ns(CLOCK_MONOTONIC) - now_nsec;
|
||||
duration = get_time_ns(CLOCK_MONOTONIC) - now_nsec;
|
||||
printf("Gone through %d entries and removed %d of them in %llu.%09llu s\n",
|
||||
entries, removed, duration / BILLION, duration % BILLION);
|
||||
entries, removed, duration / NS_PER_SECOND, duration % NS_PER_SECOND);
|
||||
return removed;
|
||||
}
|
||||
|
||||
@@ -163,10 +223,11 @@ static void *periodic_map_cleanup(void *args)
|
||||
{
|
||||
struct map_cleanup_args *argp = args;
|
||||
struct timespec interval;
|
||||
interval.tv_sec = MAP_CLEANUP_INTERVAL / BILLION;
|
||||
interval.tv_nsec = MAP_CLEANUP_INTERVAL % BILLION;
|
||||
interval.tv_sec = MAP_CLEANUP_INTERVAL / NS_PER_SECOND;
|
||||
interval.tv_nsec = MAP_CLEANUP_INTERVAL % NS_PER_SECOND;
|
||||
|
||||
while (keep_running) {
|
||||
remove_old_entries_from_map(argp->map_fd, argp->max_age_ns);
|
||||
clean_map(argp->map_fd, argp->max_age_ns);
|
||||
nanosleep(&interval, NULL);
|
||||
}
|
||||
pthread_exit(NULL);
|
||||
@@ -178,9 +239,10 @@ static void handle_rtt_event(void *ctx, int cpu, void *data, __u32 data_size)
|
||||
struct in_addr saddr, daddr;
|
||||
saddr.s_addr = e->flow.saddr;
|
||||
daddr.s_addr = e->flow.daddr;
|
||||
|
||||
// inet_ntoa is deprecated, will switch to inet_ntop when adding IPv6 support
|
||||
printf("%llu.%06llu ms %s:%d+", e->rtt / MILLION,
|
||||
e->rtt % MILLION, inet_ntoa(daddr), ntohs(e->flow.dport));
|
||||
printf("%llu.%06llu ms %s:%d+", e->rtt / NS_PER_MS,
|
||||
e->rtt % NS_PER_MS, inet_ntoa(daddr), ntohs(e->flow.dport));
|
||||
printf("%s:%d\n", inet_ntoa(saddr), ntohs(e->flow.sport));
|
||||
}
|
||||
|
||||
@@ -191,17 +253,10 @@ static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt)
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
printf("Usage: ./pping_user <dev>\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
int err = 0;
|
||||
int ifindex = 0;
|
||||
bool xdp_attached = false;
|
||||
bool tc_attached = false;
|
||||
|
||||
char tc_cmd[MAX_COMMAND_LEN];
|
||||
char map_path[MAX_PATH_LEN];
|
||||
|
||||
struct bpf_object *obj = NULL;
|
||||
@@ -213,12 +268,17 @@ int main(int argc, char *argv[])
|
||||
struct perf_buffer *pb = NULL;
|
||||
struct perf_buffer_opts pb_opts;
|
||||
|
||||
// TODO - better argument parsing (more relevant as featureas are added)
|
||||
if (argc < 2) {
|
||||
printf("Usage: ./pping_user <dev>\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Increase rlimit
|
||||
err = set_rlimit(RMEMLIM);
|
||||
err = set_rlimit(RLIM_INFINITY);
|
||||
if (err) {
|
||||
fprintf(stderr, "Could not set rlimit to %ld bytes: %s\n",
|
||||
RMEMLIM, strerror(-err));
|
||||
fprintf(stderr, "Could not set rlimit to infinity: %s\n",
|
||||
strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@@ -240,15 +300,14 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
// Get map here to allow for unpinning at cleanup
|
||||
map = bpf_object__find_map_by_name(obj, MAP_NAME);
|
||||
map = bpf_object__find_map_by_name(obj, TS_MAP);
|
||||
err = libbpf_get_error(map);
|
||||
if (err) {
|
||||
fprintf(stderr, "Could not find map %s in %s: %s\n",
|
||||
MAP_NAME, PPING_XDP_OBJ, strerror(err));
|
||||
TS_MAP, PPING_XDP_OBJ, strerror(err));
|
||||
map = NULL;
|
||||
}
|
||||
|
||||
|
||||
err = bpf_object__load(obj);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed loading XDP program: %s\n",
|
||||
@@ -264,27 +323,26 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
xdp_attached = true;
|
||||
|
||||
//Load tc-bpf section on interface egress
|
||||
snprintf(tc_cmd, MAX_COMMAND_LEN, "%s --dev %s --obj %s --sec %s",
|
||||
TCBPF_LOADER_SCRIPT, argv[1], PPING_TCBPF_OBJ, TCBPF_PROG_SEC);
|
||||
err = system(tc_cmd);
|
||||
// Load tc-bpf section on interface egress
|
||||
err = tc_bpf_load(PPING_TCBPF_OBJ, TCBPF_PROG_SEC, argv[1]);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"Could not load section %s of %s on interface %s: %s\n",
|
||||
TCBPF_PROG_SEC, PPING_TCBPF_OBJ, argv[1],
|
||||
strerror(err));
|
||||
strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
tc_attached = true;
|
||||
|
||||
// Set up the periodical map cleaning
|
||||
clean_args.max_age_ns = TIMESTAMP_LIFETIME;
|
||||
clean_args.map_fd = bpf_map__fd(map);
|
||||
if (clean_args.map_fd < 0) {
|
||||
fprintf(stderr, "Could not get file descriptor of map %s in object %s: %s\n",
|
||||
MAP_NAME, PPING_XDP_OBJ, strerror(-clean_args.map_fd));
|
||||
TS_MAP, PPING_XDP_OBJ, strerror(-clean_args.map_fd));
|
||||
goto cleanup;
|
||||
}
|
||||
clean_args.max_age_ns = TIMESTAMP_LIFETIME;
|
||||
|
||||
err = pthread_create(&tid, NULL, periodic_map_cleanup, &clean_args);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
@@ -298,13 +356,13 @@ int main(int argc, char *argv[])
|
||||
pb_opts.lost_cb = handle_missed_rtt_event;
|
||||
|
||||
pb = perf_buffer__new(bpf_object__find_map_fd_by_name(obj,
|
||||
PERF_BUFFER_NAME),
|
||||
PERF_BUFFER),
|
||||
PERF_BUFFER_PAGES, &pb_opts);
|
||||
err = libbpf_get_error(pb);
|
||||
if (err) {
|
||||
pb = NULL;
|
||||
fprintf(stderr, "Failed to open perf buffer %s: %s\n",
|
||||
PERF_BUFFER_NAME, strerror(err));
|
||||
PERF_BUFFER, strerror(err));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@@ -326,7 +384,7 @@ cleanup:
|
||||
perf_buffer__free(pb);
|
||||
if (map && bpf_map__is_pinned(map)) {
|
||||
snprintf(map_path, sizeof(map_path), "%s/%s",
|
||||
PINNED_DIR, MAP_NAME);
|
||||
PINNED_DIR, TS_MAP);
|
||||
err = bpf_map__unpin(map, map_path);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
@@ -343,13 +401,11 @@ cleanup:
|
||||
}
|
||||
}
|
||||
if (tc_attached) {
|
||||
snprintf(tc_cmd, MAX_COMMAND_LEN, "%s --dev %s --remove",
|
||||
TCBPF_LOADER_SCRIPT, argv[1]);
|
||||
err = system(tc_cmd);
|
||||
err = tc_bpf_clear(argv[1]); //system(tc_cmd);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"Failed removing tc-bpf program from interface %s: %s\n",
|
||||
argv[1], strerror(err));
|
||||
argv[1], strerror(-err));
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,8 +1,13 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#ifndef TIMESTAMP_MAP_H
|
||||
#define TIMESTAMP_MAP_H
|
||||
#ifndef PPING_H
|
||||
#define PPING_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define XDP_PROG_SEC "xdp"
|
||||
#define TCBPF_PROG_SEC "pping_egress"
|
||||
|
||||
// TODO - change to support both IPv4 and IPv6 (IPv4 addresses can be mapped to IPv6 addresses)
|
||||
struct ipv4_flow {
|
||||
__u32 saddr;
|
||||
__u32 daddr;
|
||||
@@ -17,9 +22,7 @@ struct ts_key {
|
||||
|
||||
struct ts_timestamp {
|
||||
__u64 timestamp;
|
||||
//__u64 ttl; // Delete entry after ttl, allows more dynamic map cleaning where entries for flows with short RTTs can be removed earlier
|
||||
__u8 used;
|
||||
// __u8 pad[7]; // Need to pad it due to compiler optimization, see "Remove struct padding with aligning members by using #pragma pack." at https://docs.cilium.io/en/v1.9/bpf/
|
||||
};
|
||||
|
||||
struct rtt_event {
|
||||
|
@@ -3,6 +3,8 @@
|
||||
#define PPING_HELPERS_H
|
||||
|
||||
#include "pping.h"
|
||||
#include <linux/tcp.h>
|
||||
|
||||
#define MAX_TCP_OPTIONS 10
|
||||
|
||||
static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr,
|
||||
@@ -14,53 +16,57 @@ static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr,
|
||||
flow->dport = dport;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Parses the TSval and TSecr values from the TCP options field. If sucessful
|
||||
* the TSval and TSecr values will be stored at tsval and tsecr (in network
|
||||
* Parses the TSval and TSecr values from the TCP options field. If sucessful
|
||||
* the TSval and TSecr values will be stored at tsval and tsecr (in network
|
||||
* byte order).
|
||||
* Returns 0 if sucessful and -1 on failure
|
||||
*/
|
||||
static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end,
|
||||
__u32 *tsval, __u32 *tsecr)
|
||||
__u32 *tsval, __u32 *tsecr)
|
||||
{
|
||||
if (tcph + 1 > data_end)
|
||||
return -1;
|
||||
int len = tcph->doff << 2;
|
||||
if (len <= sizeof(struct tcphdr)) // No TCP options
|
||||
return -1;
|
||||
void *pos = (void *)(tcph + 1);
|
||||
void *opt_end = ((void *)tcph + len);
|
||||
int len = tcph->doff << 2;
|
||||
void *opt_end = (void *)tcph + len;
|
||||
__u8 *pos = (__u8 *)(tcph + 1); //Current pos in TCP options
|
||||
__u8 i, opt, opt_size;
|
||||
#pragma unroll
|
||||
for (i = 0; i < MAX_TCP_OPTIONS; i++) {
|
||||
if (pos + 1 > opt_end || pos + 1 > data_end)
|
||||
return -1;
|
||||
opt = *(__u8 *)pos; // Save value to avoid future data_end comparisons
|
||||
if (opt == 0) // Reached end of TCP options
|
||||
return -1;
|
||||
if (opt == 1) { // TCP NOP option - advance one byte
|
||||
pos++;
|
||||
continue;
|
||||
}
|
||||
// Option > 1, should have option size
|
||||
if (pos + 2 > opt_end || pos + 2 > data_end)
|
||||
return -1;
|
||||
opt_size = *(__u8 *)(pos + 1); // Save value to avoid future data_end comparisons
|
||||
|
||||
// Option-kind is TCP timestap (yey!)
|
||||
if (opt == 8 && opt_size == 10) {
|
||||
if (pos + opt_size > opt_end ||
|
||||
pos + opt_size > data_end)
|
||||
return -1;
|
||||
*tsval = *(__u32 *)(pos + 2);
|
||||
*tsecr = *(__u32 *)(pos + 6);
|
||||
return 0;
|
||||
}
|
||||
if (tcph + 1 > data_end || len <= sizeof(struct tcphdr))
|
||||
return -1;
|
||||
|
||||
// Some other TCP option - advance option-length bytes
|
||||
pos += opt_size;
|
||||
}
|
||||
return -1;
|
||||
for (i = 0; i < MAX_TCP_OPTIONS; i++) {
|
||||
if (pos + 1 > opt_end || pos + 1 > data_end)
|
||||
return -1;
|
||||
|
||||
opt = *pos;
|
||||
if (opt == 0) // Reached end of TCP options
|
||||
return -1;
|
||||
|
||||
if (opt == 1) { // TCP NOP option - advance one byte
|
||||
pos++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Option > 1, should have option size
|
||||
if (pos + 2 > opt_end || pos + 2 > data_end)
|
||||
return -1;
|
||||
opt_size = *(pos + 1);
|
||||
|
||||
// Option-kind is TCP timestap (yey!)
|
||||
if (opt == 8 && opt_size == 10) {
|
||||
if (pos + opt_size > opt_end ||
|
||||
pos + opt_size > data_end)
|
||||
return -1;
|
||||
*tsval = *(__u32 *)(pos + 2);
|
||||
*tsecr = *(__u32 *)(pos + 6);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Some other TCP option - advance option-length bytes
|
||||
pos += opt_size;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -36,14 +36,12 @@ struct bpf_elf_map SEC("maps") ts_start = {
|
||||
#endif
|
||||
|
||||
// TC-BFP for parsing TSVAL from egress traffic and add to map
|
||||
SEC("pping_egress")
|
||||
SEC(TCBPF_PROG_SEC)
|
||||
int tc_bpf_prog_egress(struct __sk_buff *skb)
|
||||
{
|
||||
void *data = (void *)(long)skb->data;
|
||||
void *data_end = (void *)(long)skb->data_end;
|
||||
|
||||
//bpf_printk("Sent packet of size %d bytes\n", data_end - data);
|
||||
|
||||
int proto = -1;
|
||||
struct hdr_cursor nh = { .pos = data };
|
||||
struct ethhdr *eth;
|
||||
@@ -60,13 +58,11 @@ int tc_bpf_prog_egress(struct __sk_buff *skb)
|
||||
if (proto < 0)
|
||||
goto end;
|
||||
|
||||
//bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos);
|
||||
|
||||
__u32 tsval, tsecr;
|
||||
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
|
||||
goto end;
|
||||
|
||||
// We have a TCP timestamp, try adding it to the map
|
||||
//bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr));
|
||||
struct ts_key key;
|
||||
fill_ipv4_flow(&(key.flow), iph->saddr, iph->daddr,
|
||||
tcph->source, tcph->dest);
|
||||
|
@@ -30,18 +30,18 @@ struct {
|
||||
} rtt_events SEC(".maps");
|
||||
|
||||
// XDP program for parsing TSECR-val from ingress traffic and check for match in map
|
||||
SEC("xdp")
|
||||
SEC(XDP_PROG_SEC)
|
||||
int xdp_prog_ingress(struct xdp_md *ctx)
|
||||
{
|
||||
void *data = (void *)(long)ctx->data;
|
||||
void *data_end = (void *)(long)ctx->data_end;
|
||||
|
||||
int proto = -1;
|
||||
struct hdr_cursor nh = { .pos = data };
|
||||
struct ethhdr *eth;
|
||||
struct iphdr *iph;
|
||||
struct tcphdr *tcph;
|
||||
|
||||
//bpf_printk("Received packet of length %d\n", (int)(data_end - data));
|
||||
proto = parse_ethhdr(&nh, data_end, ð);
|
||||
if (bpf_ntohs(proto) != ETH_P_IP)
|
||||
goto end;
|
||||
@@ -52,20 +52,18 @@ int xdp_prog_ingress(struct xdp_md *ctx)
|
||||
if (proto < 0)
|
||||
goto end;
|
||||
|
||||
//bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos);
|
||||
|
||||
__u32 tsval, tsecr;
|
||||
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
|
||||
goto end;
|
||||
|
||||
// We have a TCP-timestamp - now we can check if it's in the map
|
||||
//bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr));
|
||||
struct ts_key key;
|
||||
// Fill in reverse order of egress (dest <--> source)
|
||||
fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr,
|
||||
tcph->dest, tcph->source);
|
||||
key.tsval = tsecr;
|
||||
struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key);
|
||||
|
||||
// Only calculate RTT for first packet with matching TSecr
|
||||
if (ts && ts->used == 0) {
|
||||
/*
|
||||
@@ -81,8 +79,8 @@ int xdp_prog_ingress(struct xdp_md *ctx)
|
||||
event.rtt = bpf_ktime_get_ns() - ts->timestamp;
|
||||
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU,
|
||||
&event, sizeof(event));
|
||||
//bpf_printk("Pushed rtt event with RTT: %llu\n", event.rtt);
|
||||
}
|
||||
|
||||
end:
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
Reference in New Issue
Block a user