From 954c66b0e81ab2b8255ad5b48b374a8141fe6158 Mon Sep 17 00:00:00 2001 From: Simon Sundberg Date: Thu, 7 Jan 2021 18:30:53 +0100 Subject: [PATCH] pping: Add TC-BPF program for egress Split and rename files so there is one userspace program (pping) and two kernel-space ones (one for XDP and one for TC-BPF). Copy the shell script for loading the TC-BPF program from traffic-pacing-edt folder, but add support for loading a specific section. The XDP and TC-BPF programs do not share the ts_start map, so program does not work. Signed-off-by: Simon Sundberg --- pping/Makefile | 4 +- pping/bpf_egress_loader.sh | 85 +++++++++++++++++ pping/functions.sh | 64 +++++++++++++ pping/parameters.sh | 90 ++++++++++++++++++ pping/{pping_user.c => pping.c} | 27 ++++-- pping/{timestamp_map.h => pping.h} | 0 pping/pping_helpers.h | 58 ++++++++++++ pping/pping_kern.c | 141 ----------------------------- pping/pping_kern_tc.c | 68 ++++++++++++++ pping/pping_kern_xdp.c | 83 +++++++++++++++++ 10 files changed, 471 insertions(+), 149 deletions(-) create mode 100755 pping/bpf_egress_loader.sh create mode 100644 pping/functions.sh create mode 100644 pping/parameters.sh rename pping/{pping_user.c => pping.c} (89%) rename pping/{timestamp_map.h => pping.h} (100%) create mode 100644 pping/pping_helpers.h delete mode 100644 pping/pping_kern.c create mode 100644 pping/pping_kern_tc.c create mode 100644 pping/pping_kern_xdp.c diff --git a/pping/Makefile b/pping/Makefile index 14ace53..6166548 100644 --- a/pping/Makefile +++ b/pping/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) -USER_TARGETS := pping_user -BPF_TARGETS := pping_kern +USER_TARGETS := pping +BPF_TARGETS := pping_kern_xdp pping_kern_tc LDFLAGS = -pthread diff --git a/pping/bpf_egress_loader.sh b/pping/bpf_egress_loader.sh new file mode 100755 index 0000000..e14084e --- /dev/null +++ b/pping/bpf_egress_loader.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# +# Author: Jesper Dangaaard Brouer +# License: GPLv2 +# +basedir=`dirname $0` +source ${basedir}/functions.sh + +root_check_run_with_sudo "$@" + +# Use common parameters +source ${basedir}/parameters.sh + +export TC=/sbin/tc + +# This can be changed via --file or --obj +if [[ -z ${BPF_OBJ} ]]; then + # Fallback default + BPF_OBJ=pping.bpf.o +fi + +# This can be changed via --sec +if [[ -z ${BPF_OBJ} ]]; then + # Fallback default + SEC=egress +fi + +info "Applying TC-BPF egress setup on device: $DEV with object file: $BPF_OBJ" + +function tc_remove_clsact() +{ + local device=${1:-$DEV} + shift + + # Removing qdisc clsact, also deletes all filters + call_tc_allow_fail qdisc del dev "$device" clsact 2> /dev/null +} + +function tc_init_clsact() +{ + local device=${1:-$DEV} + shift + + # TODO: find method that avoids flushing (all users) + + # Also deletes all filters + call_tc_allow_fail qdisc del dev "$device" clsact 2> /dev/null + + # Load qdisc clsact which allow us to attach BPF-progs as TC filters + call_tc qdisc add dev "$device" clsact +} + +function tc_egress_bpf_attach() +{ + local device=${1:-$DEV} + local objfile=${2:-$BPF_OBJ} + local section=${3:-$SEC} + shift 3 + + # TODO: Handle selecting program 'sec' - Simon Sundberg added section option 2021-01-08 + call_tc filter add dev "$device" pref 2 handle 2 \ + egress bpf da obj "$objfile" sec "$section" +} + +function tc_egress_list() +{ + local device=${1:-$DEV} + + call_tc filter show dev "$device" egress +} + +if [[ -n $REMOVE ]]; then + tc_remove_clsact $DEV + exit 0 +fi + +tc_init_clsact $DEV +tc_egress_bpf_attach $DEV $BPF_OBJ $SEC + +# Practical to list egress filters after setup. +# (It's a common mistake to have several progs loaded) +if [[ -n $LIST ]]; then + info "Listing egress filter on device" + tc_egress_list $DEV +fi diff --git a/pping/functions.sh b/pping/functions.sh new file mode 100644 index 0000000..a92f482 --- /dev/null +++ b/pping/functions.sh @@ -0,0 +1,64 @@ +# +# Common functions used by scripts in this directory +# - Depending on bash 3 (or higher) syntax +# +# Author: Jesper Dangaaard Brouer +# License: GPLv2 + +## -- sudo trick -- +function root_check_run_with_sudo() { + # Trick so, program can be run as normal user, will just use "sudo" + # call as root_check_run_as_sudo "$@" + if [ "$EUID" -ne 0 ]; then + if [ -x $0 ]; then # Directly executable use sudo + echo "# (Not root, running with sudo)" >&2 + sudo "$0" "$@" + exit $? + fi + echo "cannot perform sudo run of $0" + exit 1 + fi +} + +## -- General shell logging cmds -- +function err() { + local exitcode=$1 + shift + echo -e "ERROR: $@" >&2 + exit $exitcode +} + +function warn() { + echo -e "WARN : $@" >&2 +} + +function info() { + if [[ -n "$VERBOSE" ]]; then + echo "# $@" + fi +} + +## -- Wrapper calls for TC -- +function _call_tc() { + local allow_fail="$1" + shift + if [[ -n "$VERBOSE" ]]; then + echo "tc $@" + fi + if [[ -n "$DRYRUN" ]]; then + return + fi + $TC "$@" + local status=$? + if (( $status != 0 )); then + if [[ "$allow_fail" == "" ]]; then + err 3 "Exec error($status) occurred cmd: \"$TC $@\"" + fi + fi +} +function call_tc() { + _call_tc "" "$@" +} +function call_tc_allow_fail() { + _call_tc "allow_fail" "$@" +} diff --git a/pping/parameters.sh b/pping/parameters.sh new file mode 100644 index 0000000..61c5630 --- /dev/null +++ b/pping/parameters.sh @@ -0,0 +1,90 @@ +# +# Common parameter parsing used by scripts in this directory +# - Depending on bash 3 (or higher) syntax +# +# Author: Jesper Dangaaard Brouer +# License: GPLv2 + +function usage() { + echo "" + echo "Usage: $0 [-vh] --dev ethX" + echo " -d | --dev : (\$DEV) Interface/device (required)" + echo " -v | --verbose : (\$VERBOSE) verbose" + echo " --remove : (\$REMOVE) Remove the rules" + echo " --dry-run : (\$DRYRUN) Dry-run only (echo tc commands)" + echo " -s | --stats : (\$STATS_ONLY) Call statistics command" + echo " -l | --list : (\$LIST) List setup after setup" + echo " --file | --obj : (\$BPF_OBJ) BPF-object file to load" + echo " --sec : (\$SEC) Section of BPF-object to load" + echo "" +} + +# Using external program "getopt" to get --long-options +OPTIONS=$(getopt -o vshd:l \ + --long verbose,dry-run,remove,stats,list,help,dev:,file:,obj:,sec: -- "$@") +if (( $? != 0 )); then + usage + err 2 "Error calling getopt" +fi +eval set -- "$OPTIONS" + +## --- Parse command line arguments / parameters --- +while true; do + case "$1" in + -d | --dev ) # device + export DEV=$2 + info "Device set to: DEV=$DEV" >&2 + shift 2 + ;; + --file | --obj ) + export BPF_OBJ=$2 + info "BPF-object file: $BPF_OBJ" >&2 + shift 2 + ;; + --sec ) + export SEC=$2 + info "Section to load: $SEC" >&2 + shift 2 + ;; + -v | --verbose) + export VERBOSE=yes + # info "Verbose mode: VERBOSE=$VERBOSE" >&2 + shift + ;; + --dry-run ) + export DRYRUN=yes + export VERBOSE=yes + info "Dry-run mode: enable VERBOSE and don't call TC" >&2 + shift + ;; + --remove ) + export REMOVE=yes + shift + ;; + -s | --stats ) + export STATS_ONLY=yes + shift + ;; + -l | --list ) + export LIST=yes + shift + ;; + -- ) + shift + break + ;; + -h | --help ) + usage; + exit 0 + ;; + * ) + shift + break + ;; + esac +done + +if [ -z "$DEV" ]; then + usage + err 2 "Please specify net_device (\$DEV)" +fi diff --git a/pping/pping_user.c b/pping/pping.c similarity index 89% rename from pping/pping_user.c rename to pping/pping.c index adfa9a5..f79040a 100644 --- a/pping/pping_user.c +++ b/pping/pping.c @@ -16,11 +16,14 @@ #include // For setting rlmit #include #include -#include "timestamp_map.h" //key and value structs for the ts_start map +#include "pping.h" //key and value structs for the ts_start map #define BILLION 1000000000UL -#define PPING_ELF_OBJ "pping_kern.o" +#define TCBPF_LOADER_SCRIPT "./bpf_egress_loader.sh" +#define PPING_XDP_OBJ "pping_kern_xdp.o" #define XDP_PROG_SEC "pping_ingress" +#define PPING_TCBPF_OBJ "pping_kern_tc.o" +#define TCBPF_PROG_SEC "pping_egress" #define XDP_FLAGS XDP_FLAGS_UPDATE_IF_NOEXIST #define MAP_NAME "ts_start" #define MAP_CLEANUP_INTERVAL 1*BILLION // Clean timestamp map once per second @@ -28,6 +31,7 @@ #define PERF_BUFFER_PAGES 64 // Related to the perf-buffer size? #define PERF_POLL_TIMEOUT_MS 100 #define RMEMLIM 512UL << 20 /* 512 MBs */ +#define MAX_COMMAND_LEN 1024 #define ERROR_MSG_MAX 1024 #define TIMESTAMP_LIFETIME 10*BILLION // 10 seconds @@ -79,7 +83,7 @@ static int xdp_load_and_attach(int ifindex, char *obj_path, char *sec, __u32 xdp prog = bpf_object__find_program_by_title(*obj, sec); if (!prog) { - if (error_buf) { snprintf(error_buf, ERROR_MSG_MAX, "Could not find section %s in ELF object %s", sec, obj_path); } + if (error_buf) { snprintf(error_buf, ERROR_MSG_MAX, "Could not find section %s in object %s", sec, obj_path); } return -1; } @@ -197,12 +201,23 @@ int main(int argc, char *argv[]) fprintf(stderr, "Could not get index of interface %s: %s\n", argv[1], strerror(-err)); goto cleanup; } + + //Load tc-bpf section on egress + char tc_bpf_load[MAX_COMMAND_LEN]; + snprintf(tc_bpf_load, MAX_COMMAND_LEN, "%s --dev %s --obj %s --sec %s", + TCBPF_LOADER_SCRIPT, argv[1], PPING_TCBPF_OBJ, TCBPF_PROG_SEC); + err = system(tc_bpf_load); + if (err) { + fprintf(stderr, "Could not load section %s of %s on interface %s: %s\n", + TCBPF_PROG_SEC, PPING_TCBPF_OBJ, argv[1], strerror(err)); + goto cleanup; + } // Load and attach XDP program to interface struct bpf_object *obj = NULL; int prog_fd = -1; - err = xdp_load_and_attach(ifindex, PPING_ELF_OBJ, XDP_PROG_SEC, XDP_FLAGS, &obj, &prog_fd, error_msg); + err = xdp_load_and_attach(ifindex, PPING_XDP_OBJ, XDP_PROG_SEC, XDP_FLAGS, &obj, &prog_fd, error_msg); if (err) { fprintf(stderr, "%s: %s\n", error_msg, strerror(-err)); goto cleanup; @@ -212,7 +227,7 @@ int main(int argc, char *argv[]) // Find map fd (to perform periodic cleanup) int map_fd = bpf_object__find_map_fd_by_name(obj, MAP_NAME); if (map_fd < 0) { - fprintf(stderr, "Failed finding map %s in %s: %s\n", MAP_NAME, PPING_ELF_OBJ, strerror(-map_fd)); + fprintf(stderr, "Failed finding map %s in %s: %s\n", MAP_NAME, PPING_XDP_OBJ, strerror(-map_fd)); goto cleanup; } pthread_t tid; @@ -247,7 +262,6 @@ int main(int argc, char *argv[]) } cleanup: - printf("Cleanup!\n"); perf_buffer__free(pb); if (xdp_attached) { err = xdp_deatach(ifindex, XDP_FLAGS); @@ -255,6 +269,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "Failed deatching program from ifindex %d: %s\n", ifindex, strerror(-err)); } } + // TODO: Unload TC-BPF program return err != 0; } diff --git a/pping/timestamp_map.h b/pping/pping.h similarity index 100% rename from pping/timestamp_map.h rename to pping/pping.h diff --git a/pping/pping_helpers.h b/pping/pping_helpers.h new file mode 100644 index 0000000..3d206d2 --- /dev/null +++ b/pping/pping_helpers.h @@ -0,0 +1,58 @@ +#ifndef PPING_HELPERS_H +#define PPING_HELPERS_H + +#include "pping.h" +#define MAX_TCP_OPTIONS 10 + +static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr, __u32 daddr, __u16 sport, __u16 dport) +{ + flow->saddr = saddr; + flow->daddr = daddr; + flow->sport = sport; + flow->dport = dport; + return 0; +} + +// Parses the TSval and TSecr values from the TCP options field - returns 0 if sucessful and -1 on failure +// If sucessful the TSval and TSecr values will be stored at tsval and tsecr (in network byte order!) +static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u32 *tsval, __u32 *tsecr) +{ + if (tcph + 1 > data_end) // To hopefully please verifier + return -1; + int len = tcph->doff << 2; + if (len <= sizeof(struct tcphdr)) // No TCP options + return -1; + void *pos = (void *)(tcph + 1); + void *opt_end = ((void *)tcph + len); + __u8 i, opt, opt_size; + #pragma unroll + for (i = 0; i < MAX_TCP_OPTIONS; i++) { + if (pos+1 > opt_end || pos+1 > data_end) + return -1; + opt = *(__u8 *)pos; // Save value to variable so I don't have to perform any more data_end checks on the option kind + if (opt == 0) // Reached end of TCP options + return -1; + if (opt == 1) {// TCP NOP option - advance one byte + pos++; + continue; + } + // Option > 1, should have option size + if (pos+2 > opt_end || pos+2 > data_end) + return -1; + opt_size = *(__u8 *)(pos+1); // Save value to variable so I don't have to perform any more data_end checks on option size + + if (opt == 8 && opt_size == 10) { // Option-kind is TCP timestap (yey!) + if (pos + opt_size > opt_end || pos + opt_size > data_end) + return -1; + *tsval = *(__u32 *)(pos + 2); + *tsecr = *(__u32 *)(pos + 6); + return 0; + } + + // Some other TCP option - advance option-length bytes + pos += opt_size; + } + return -1; +} + +#endif diff --git a/pping/pping_kern.c b/pping/pping_kern.c deleted file mode 100644 index 345164b..0000000 --- a/pping/pping_kern.c +++ /dev/null @@ -1,141 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include - -#include - -#include "timestamp_map.h" - -#define MAX_TCP_OPTIONS 10 - -char _license[] SEC("license") = "GPL"; - -struct bpf_map_def SEC("maps") ts_start = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(struct ts_key), - .value_size = sizeof(struct ts_timestamp), - .max_entries = 16384, -}; - -struct bpf_map_def SEC("maps") rtt_events = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .key_size = sizeof(__u32), // CPU ID - .value_size = sizeof(__u32), // perf file descriptor? -}; - -static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr, __u32 daddr, __u16 sport, __u16 dport) -{ - flow->saddr = saddr; - flow->daddr = daddr; - flow->sport = sport; - flow->dport = dport; - return 0; -} - -// Parses the TSval and TSecr values from the TCP options field - returns 0 if sucessful and -1 on failure -// If sucessful the TSval and TSecr values will be stored at tsval and tsecr (in network byte order!) -static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u32 *tsval, __u32 *tsecr) -{ - if (tcph + 1 > data_end) // To hopefully please verifier - return -1; - int len = tcph->doff << 2; - if (len <= sizeof(struct tcphdr)) // No TCP options - return -1; - void *pos = (void *)(tcph + 1); - void *opt_end = ((void *)tcph + len); - __u8 i, opt, opt_size; - #pragma unroll - for (i = 0; i < MAX_TCP_OPTIONS; i++) { - if (pos+1 > opt_end || pos+1 > data_end) - return -1; - opt = *(__u8 *)pos; // Save value to variable so I don't have to perform any more data_end checks on the option kind - if (opt == 0) // Reached end of TCP options - return -1; - if (opt == 1) {// TCP NOP option - advance one byte - pos++; - continue; - } - // Option > 1, should have option size - if (pos+2 > opt_end || pos+2 > data_end) - return -1; - opt_size = *(__u8 *)(pos+1); // Save value to variable so I don't have to perform any more data_end checks on option size - - if (opt == 8 && opt_size == 10) { // Option-kind is TCP timestap (yey!) - if (pos + opt_size > opt_end || pos + opt_size > data_end) - return -1; - *tsval = *(__u32 *)(pos + 2); - *tsecr = *(__u32 *)(pos + 6); - return 0; - } - - // Some other TCP option - advance option-length bytes - pos += opt_size; - } - return -1; -} - -// XDP for parsing TSECR-val from ingress traffic and check for match in map -SEC("pping_ingress") -int xdp_prog_ingress(struct xdp_md *ctx) -{ - void *data = (void *)(long)ctx->data, *data_end = (void *)(long)ctx->data_end; - int proto = -1; - struct hdr_cursor nh = {.pos = data }; - struct ethhdr *eth; - struct iphdr *iph; - struct tcphdr *tcph; - - bpf_printk("Received packet of length %d\n", (int)(data_end - data)); - proto = parse_ethhdr(&nh, data_end, ð); - if (bpf_ntohs(proto) != ETH_P_IP) - return XDP_PASS; // Not IPv4 packet (or failed to parse ethernet header) - proto = parse_iphdr(&nh, data_end, &iph); - if (proto != IPPROTO_TCP) - return XDP_PASS; // Not a TCP packet (or failed to parse ethernet header) - proto = parse_tcphdr(&nh, data_end, &tcph); - if (proto < 0) - return XDP_PASS; // Failed parsing TCP-header - - bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos); - - __u32 tsval, tsecr; - if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0) // No TCP timestamp - return XDP_PASS; - // We have a TCP-timestamp - now we can check if it's in the map - bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr)); - struct ts_key key; - fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr, tcph->dest, tcph->source); // Fill in reverse order of egress (dest <--> source) - key.tsval = tsecr; - - // Should look up map map (filling done on egress), but temporarily add to map before I get the TC-BPF part working - struct ts_timestamp wrong_value = {0}; - wrong_value.timestamp = bpf_ktime_get_ns(); //Verifier was unhappy when using bpf_ktime_get_boot_ns - bpf_map_update_elem(&ts_start, &key, &wrong_value, BPF_NOEXIST); - - - struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key); - if (ts && ts->used == 0) { - ts->used = 1; - //__u64 rtt = bpf_ktime_get_ns() - ts->timestamp; - - struct rtt_event event = {0}; - memcpy(&(event.flow), &(key.flow), sizeof(struct ipv4_flow)); - event.rtt = bpf_ktime_get_ns() - ts->timestamp; - bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event, sizeof(event)); - bpf_printk("Pushed rtt event with RTT: %llu", event.rtt); - } - - return XDP_PASS; -} - -// TC-BFP for parsing TSVAL from egress traffic and add to map -SEC("pping_egress") -int tc_bpf_prog_egress(struct __skbuff *skb) -{ - return BPF_OK; -} diff --git a/pping/pping_kern_tc.c b/pping/pping_kern_tc.c new file mode 100644 index 0000000..f403000 --- /dev/null +++ b/pping/pping_kern_tc.c @@ -0,0 +1,68 @@ +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "pping.h" +#include "pping_helpers.h" + + +char _license[] SEC("license") = "GPL"; + +struct bpf_map_def SEC("maps") ts_start = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct ts_key), + .value_size = sizeof(struct ts_timestamp), + .max_entries = 16384, +}; + +// TC-BFP for parsing TSVAL from egress traffic and add to map +SEC("pping_egress") +int tc_bpf_prog_egress(struct __sk_buff *skb) +{ + void *data = (void *)(long)skb->data; + void *data_end = (void *)(long)skb->data_end; + + bpf_printk("Sent packet of size %d bytes\n", data_end - data); + + int proto = -1; + struct hdr_cursor nh = {.pos = data}; + struct ethhdr *eth; + struct iphdr *iph; + struct tcphdr *tcph; + + proto = parse_ethhdr(&nh, data_end, ð); + if (bpf_ntohs(proto) != ETH_P_IP) + goto end; // Not IPv4 packet (or failed to parse ethernet header) + proto = parse_iphdr(&nh, data_end, &iph); + if (proto != IPPROTO_TCP) + goto end; // Not a TCP packet (or failed to parse ethernet header) + proto = parse_tcphdr(&nh, data_end, &tcph); + if (proto < 0) + goto end; // Failed parsing TCP-header + + bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos); + + __u32 tsval, tsecr; + if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0) // No TCP timestamp + goto end; + // We have a TCP-timestamp - now we can check if it's in the map + bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr)); + struct ts_key key; + fill_ipv4_flow(&(key.flow), iph->saddr, iph->daddr, tcph->source, tcph->dest); + key.tsval = tsval; + + // Should only look up map (filling done on egress), but temporarily add to map before I get the TC-BPF part working + struct ts_timestamp ts = {0}; + ts.timestamp = bpf_ktime_get_ns(); //Verifier was unhappy when using bpf_ktime_get_boot_ns + bpf_map_update_elem(&ts_start, &key, &ts, BPF_NOEXIST); + + end: + return BPF_OK; +} diff --git a/pping/pping_kern_xdp.c b/pping/pping_kern_xdp.c new file mode 100644 index 0000000..183b94d --- /dev/null +++ b/pping/pping_kern_xdp.c @@ -0,0 +1,83 @@ +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "pping.h" +#include "pping_helpers.h" + +char _license[] SEC("license") = "GPL"; + +struct bpf_map_def SEC("maps") ts_start = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct ts_key), + .value_size = sizeof(struct ts_timestamp), + .max_entries = 16384, +}; + +struct bpf_map_def SEC("maps") rtt_events = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(__u32), // CPU ID + .value_size = sizeof(__u32), // perf file descriptor? +}; + +// XDP program for parsing TSECR-val from ingress traffic and check for match in map +SEC("pping_ingress") +int xdp_prog_ingress(struct xdp_md *ctx) +{ + void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + int proto = -1; + struct hdr_cursor nh = {.pos = data }; + struct ethhdr *eth; + struct iphdr *iph; + struct tcphdr *tcph; + + //bpf_printk("Received packet of length %d\n", (int)(data_end - data)); + proto = parse_ethhdr(&nh, data_end, ð); + if (bpf_ntohs(proto) != ETH_P_IP) + goto end; // Not IPv4 packet (or failed to parse ethernet header) + proto = parse_iphdr(&nh, data_end, &iph); + if (proto != IPPROTO_TCP) + goto end; // Not a TCP packet (or failed to parse ethernet header) + proto = parse_tcphdr(&nh, data_end, &tcph); + if (proto < 0) + goto end; // Failed parsing TCP-header + + //bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos); + + __u32 tsval, tsecr; + if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0) // No TCP timestamp + goto end; + // We have a TCP-timestamp - now we can check if it's in the map + //bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr)); + struct ts_key key; + fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr, tcph->dest, tcph->source); // Fill in reverse order of egress (dest <--> source) + key.tsval = tsecr; + + // Should only look up map map (filling done on egress), but temporarily add to map before I get the TC-BPF part working + struct ts_timestamp wrong_value = {0}; + wrong_value.timestamp = bpf_ktime_get_ns(); //Verifier was unhappy when using bpf_ktime_get_boot_ns + bpf_map_update_elem(&ts_start, &key, &wrong_value, BPF_NOEXIST); + + + struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key); + if (ts && ts->used == 0) { // Only calculate RTT for first packet with matching TSecr + // As used is not set atomically with the lookup, could potentially have multiple "first" packets (on different CPUs), but all those should then also have very similar RTT, so don't consider it a significant issue + ts->used = 1; + + struct rtt_event event = {0}; + memcpy(&(event.flow), &(key.flow), sizeof(struct ipv4_flow)); + event.rtt = bpf_ktime_get_ns() - ts->timestamp; + bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event, sizeof(event)); + bpf_printk("Pushed rtt event with RTT: %llu\n", event.rtt); + } + end: + return XDP_PASS; +}