diff --git a/pping/Makefile b/pping/Makefile new file mode 100644 index 0000000..0aadb43 --- /dev/null +++ b/pping/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +USER_TARGETS := pping_user +BPF_TARGETS := pping_kern + +LIB_DIR = ../lib + +include $(LIB_DIR)/common.mk diff --git a/pping/README.md b/pping/README.md new file mode 100644 index 0000000..1817837 --- /dev/null +++ b/pping/README.md @@ -0,0 +1,2 @@ +# PPing using XDP and TC-BPF +An implementation of the passive ping ([pping](https://github.com/pollere/pping)) utility based on XDP (for ingress) and TC-BPF (for outgress) \ No newline at end of file diff --git a/pping/pping_kern.c b/pping/pping_kern.c new file mode 100644 index 0000000..9ccb19a --- /dev/null +++ b/pping/pping_kern.c @@ -0,0 +1,129 @@ +#include +#include +#include + +#include +#include +#include +#include + +#include "timestamp_map.h" + +#define MAX_TCP_OPTIONS 10 +#define BILLION 1000000000UL + +char _license[] SEC("license") = "GPL"; + +struct bpf_map_def SEC("maps") ts_start = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct ts_key), + .value_size = sizeof(struct ts_timestamp), + .max_entries = 4096, +}; + +static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr, __u32 daddr, __u16 sport, __u16 dport) +{ + flow->saddr = saddr; + flow->daddr = daddr; + flow->sport = sport; + flow->dport = dport; + return 0; +} + +// Parses the TSval and TSecr values from the TCP options field - returns 0 if sucessful and -1 on failure +static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u32 *tsval, __u32 *tsecr) +{ + if (tcph + 1 > data_end) // To hopefully please verifier + return -1; + int len = tcph->doff << 2; + if (len <= sizeof(struct tcphdr)) // No TCP options + return -1; + void *pos = (void *)(tcph + 1); + void *opt_end = ((void *)tcph + len); + __u8 i, opt, opt_size; + #pragma unroll + for (i = 0; i < MAX_TCP_OPTIONS; i++) { + if (pos+1 > opt_end || pos+1 > data_end) + return -1; + opt = *(__u8 *)pos; // Save value to variable so I don't have to perform any more data_end checks on the option kind + if (opt == 0) // Reached end of TCP options + return -1; + if (opt == 1) {// TCP NOP option - advance one byte + pos++; + continue; + } + // Option > 1, should have option size + if (pos+2 > opt_end || pos+2 > data_end) + return -1; + opt_size = *(__u8 *)(pos+1); // Save value to variable so I don't have to perform any more data_end checks on option size + + if (opt == 8 && opt_size == 10) { // Option-kind is TCP timestap (yey!) + if (pos + opt_size > opt_end ||pos + opt_size > data_end) + return -1; + *tsval = bpf_ntohl(*(__u32 *)(pos + 2)); + *tsecr = bpf_ntohl(*(__u32 *)(pos + 6)); + return 0; + } + + // Some other TCP option - advance option-length bytes + pos += opt_size; + } + return -1; +} + +// XDP for parsing TSECR-val from ingress traffic and check for match in map +SEC("pping_ingress") +int xdp_prog_ingress(struct xdp_md *ctx) +{ + void *data = (void *)(long)ctx->data, *data_end = (void *)(long)ctx->data_end; + int proto = -1; + struct hdr_cursor nh = {.pos = data }; + struct ethhdr *eth; + struct iphdr *iph; + struct tcphdr *tcph; + + bpf_printk("Received packet of length %d\n", (int)(data_end - data)); + proto = parse_ethhdr(&nh, data_end, ð); + if (bpf_ntohs(proto) != ETH_P_IP) + return XDP_PASS; // Not IPv4 packet (or failed to parse ethernet header) + proto = parse_iphdr(&nh, data_end, &iph); + if (proto != IPPROTO_TCP) + return XDP_PASS; // Not a TCP packet (or failed to parse ethernet header) + proto = parse_tcphdr(&nh, data_end, &tcph); + if (proto < 0) + return XDP_PASS; // Failed parsing TCP-header + + bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos); + + __u32 tsval, tsecr; + if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0) // No TCP timestamp + return XDP_PASS; + // We have a TCP-timestamp - now we can check if it's in the map + bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", tsval, tsecr); + struct ts_key key; + fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr, tcph->dest, tcph->source); // Fill in reverse order of egress (dest <--> source) + key.tsval = tsecr; + + // Should look up map map (filling done on egress), but temporarily add to map before I get the TC-BPF part working + struct ts_timestamp wrong_value = {0}; + wrong_value.timestamp = bpf_ktime_get_ns(); //Verifier was unhappy when using bpf_ktime_get_boot_ns + bpf_map_update_elem(&ts_start, &key, &wrong_value, BPF_NOEXIST); + + + struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key); + if (ts && ts->used == 0) { + ts->used = 1; + __u64 rtt = bpf_ktime_get_ns() - ts->timestamp; + // TODO: Push RTT + flow to userspace through perf buffer + bpf_printk("RTT: %llu\n", rtt); + } + + return XDP_PASS; +} + +// TC-BFP for parsing TSVAL from egress traffic and add to map +SEC("pping_egress") +int tc_bpf_prog_egress(struct __skbuff *skb) +{ + return BPF_OK; +} diff --git a/pping/pping_user.c b/pping/pping_user.c new file mode 100644 index 0000000..1381076 --- /dev/null +++ b/pping/pping_user.c @@ -0,0 +1,193 @@ +//#include +#include +#include +#include +#include +#include // For IS_ERR_OR_NULL macro +#include + +#include +#include +#include +#include +#include +#include +#include // For detecting Ctrl-C +#include // For setting rlmit +#include +#include "timestamp_map.h" //key and value structs for the ts_start map + +#define PPING_ELF_OBJ "pping_kern.o" +#define XDP_PROG_SEC "pping_ingress" +#define XDP_FLAGS XDP_FLAGS_UPDATE_IF_NOEXIST +#define MAP_NAME "ts_start" +#define RMEMLIM 512UL << 20 /* 512 MBs */ +#define ERROR_MSG_MAX 1024 +#define BILLION 1000000000UL +#define TIMESTAMP_LIFETIME 10*BILLION // 10 seconds + +/* static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) */ +/* { */ +/* return vfprintf(stderr, format, args); */ +/* } */ + +static volatile int keep_running = 1; + +void abort_main_loop(int sig) +{ + keep_running = 0; +} + +static int set_rlimit(long int lim) +{ + struct rlimit rlim = { + .rlim_cur = lim, + .rlim_max = lim, + }; + + return !setrlimit(RLIMIT_MEMLOCK, &rlim) ? 0 : -errno; +} + +static int xdp_load_and_attach(int ifindex, char *obj_path, char *sec, __u32 xdp_flags, struct bpf_object **obj, int *prog_fd, char *error_buf) +{ + // Load and attach XDP program to interface + struct bpf_program *prog = NULL; + int err; + *prog_fd = -1; + + struct bpf_prog_load_attr attr = { + .prog_type = BPF_PROG_TYPE_XDP, + //.ifindex = ifindex, + .file = obj_path, + }; + //attr.file = obj_path; + + err = bpf_prog_load_xattr(&attr, obj, prog_fd); + if (err) { + if (error_buf) { snprintf(error_buf, ERROR_MSG_MAX, "Could not open %s", obj_path); } + return err; + } + + prog = bpf_object__find_program_by_title(*obj, sec); + if (!prog) { + if (error_buf) { snprintf(error_buf, ERROR_MSG_MAX, "Could not find section %s in ELF object %s", sec, obj_path); } + return -1; + } + + *prog_fd = bpf_program__fd(prog); + err = bpf_set_link_xdp_fd(ifindex, *prog_fd, xdp_flags); + if (err < 0) { + if (error_buf) { snprintf(error_buf, ERROR_MSG_MAX, "Failed attaching XDP program %s in %s to ifindex %d", sec, obj_path, ifindex); } + return err; + } + return 0; +} + +static int xdp_deatach(int ifindex, __u32 xdp_flags) { + return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); +} + +static __u64 get_time_ns(clockid_t clockid) +{ + struct timespec t; + if (clock_gettime(clockid, &t) != 0) // CLOCK_BOOTTIME if using bpf_get_ktime_boot_ns + return 0; + return (__u64)t.tv_sec * BILLION + (__u64)t.tv_nsec; +} + + +static int remove_old_entries_from_map(int map_fd, __u64 max_age) +{ + int removed = 0, entries = 0; + struct ts_key key, prev_key = {0}; + struct ts_timestamp value; + bool delete_prev = false; + __u64 now_nsec = get_time_ns(CLOCK_MONOTONIC); + if (now_nsec == 0) + return -errno; + + // Cannot delete current key because then loop will reset, see https://www.bouncybouncy.net/blog/bpf_map_get_next_key-pitfalls/ + while(bpf_map_get_next_key(map_fd, &prev_key, &key) == 0) { + if (delete_prev) { + bpf_map_delete_elem(map_fd, &prev_key); + removed++; + delete_prev = false; + } + + if (bpf_map_lookup_elem(map_fd, &key, &value) == 0) { + if (now_nsec > value.timestamp && now_nsec - value.timestamp > max_age) { + delete_prev = true; + } + } + entries++; + prev_key = key; + } + if (delete_prev) { + bpf_map_delete_elem(map_fd, &prev_key); + removed++; + } + printf("Gone through %d entries and removed %d of them\n", entries, removed); + return removed; +} + +int main(int argc, char *argv[]) +{ + if (argc < 2) { + printf("Usage: ./pping_user \n"); + return EXIT_FAILURE; + } + + int err; + char error_msg[ERROR_MSG_MAX]; + + // Setup libbpf errors and debug info on callback + //libbpf_set_print(libbpf_print_fn); + + // Increase rlimit + err = set_rlimit(RMEMLIM); + if (err) { + fprintf(stderr, "Could not set rlimit to %ld bytes: %s\n", RMEMLIM, strerror(-err)); + return EXIT_FAILURE; + } + + // Get index of interface + int ifindex = if_nametoindex(argv[1]); + if (ifindex == 0) { + err = -errno; + fprintf(stderr, "Could not get index of interface %s: %s\n", argv[1], strerror(-err)); + return EXIT_FAILURE; + } + + // Load and attach XDP program to interface + struct bpf_object *obj = NULL; + int prog_fd = -1; + + err = xdp_load_and_attach(ifindex, PPING_ELF_OBJ, XDP_PROG_SEC, XDP_FLAGS, &obj, &prog_fd, error_msg); + if (err) { + fprintf(stderr, "%s: %s\n", error_msg, strerror(-err)); + return EXIT_FAILURE; + } + int map_fd = bpf_object__find_map_fd_by_name(obj, MAP_NAME); + if (map_fd < 0) { + fprintf(stderr, "Failed finding map %s in %s: %s\n", MAP_NAME, PPING_ELF_OBJ, strerror(-map_fd)); + xdp_deatach(ifindex, XDP_FLAGS); + return EXIT_FAILURE; + } + + // Main loop + signal(SIGINT, abort_main_loop); + while(keep_running) { + sleep(1); + // TODO - print out + remove_old_entries_from_map(map_fd, TIMESTAMP_LIFETIME); + } + + err = xdp_deatach(ifindex, XDP_FLAGS); + if (err) { + fprintf(stderr, "Failed deatching program from ifindex %d: %s\n", ifindex, strerror(-err)); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + diff --git a/pping/timestamp_map.h b/pping/timestamp_map.h new file mode 100644 index 0000000..ec6096c --- /dev/null +++ b/pping/timestamp_map.h @@ -0,0 +1,26 @@ +#ifndef TIMESTAMP_MAP_H +#define TIMESTAMP_MAP_H +#include + +struct ipv4_flow +{ + __u32 saddr; + __u32 daddr; + __u16 sport; + __u16 dport; +}; + +struct ts_key +{ + struct ipv4_flow flow; + __u32 tsval; +}; + +struct ts_timestamp +{ + __u64 timestamp; + __u8 used; + // __u8 pad[7]; // Need to pad it due to compiler optimization, see "Remove struct padding with aligning members by using #pragma pack." at https://docs.cilium.io/en/v1.9/bpf/ + +}; +#endif