From 45b138b6abbdfb8b6c456189747673263c0723d9 Mon Sep 17 00:00:00 2001 From: Simon Sundberg Date: Thu, 7 Jan 2021 18:14:27 +0100 Subject: [PATCH] pping: Add perf-buffer The XDP program pushes the calculated RTTs to userspace through the perf-buffer and the userspace program polls it to print them out Signed-off-by: Simon Sundberg --- pping/Makefile | 2 + pping/pping_kern.c | 30 +++++++---- pping/pping_user.c | 120 +++++++++++++++++++++++++++++++++--------- pping/timestamp_map.h | 7 +++ 4 files changed, 124 insertions(+), 35 deletions(-) diff --git a/pping/Makefile b/pping/Makefile index 0aadb43..14ace53 100644 --- a/pping/Makefile +++ b/pping/Makefile @@ -3,6 +3,8 @@ USER_TARGETS := pping_user BPF_TARGETS := pping_kern +LDFLAGS = -pthread + LIB_DIR = ../lib include $(LIB_DIR)/common.mk diff --git a/pping/pping_kern.c b/pping/pping_kern.c index 9ccb19a..345164b 100644 --- a/pping/pping_kern.c +++ b/pping/pping_kern.c @@ -7,10 +7,11 @@ #include #include +#include + #include "timestamp_map.h" #define MAX_TCP_OPTIONS 10 -#define BILLION 1000000000UL char _license[] SEC("license") = "GPL"; @@ -18,7 +19,13 @@ struct bpf_map_def SEC("maps") ts_start = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(struct ts_key), .value_size = sizeof(struct ts_timestamp), - .max_entries = 4096, + .max_entries = 16384, +}; + +struct bpf_map_def SEC("maps") rtt_events = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(__u32), // CPU ID + .value_size = sizeof(__u32), // perf file descriptor? }; static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr, __u32 daddr, __u16 sport, __u16 dport) @@ -31,6 +38,7 @@ static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr, _ } // Parses the TSval and TSecr values from the TCP options field - returns 0 if sucessful and -1 on failure +// If sucessful the TSval and TSecr values will be stored at tsval and tsecr (in network byte order!) static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u32 *tsval, __u32 *tsecr) { if (tcph + 1 > data_end) // To hopefully please verifier @@ -58,10 +66,10 @@ static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u opt_size = *(__u8 *)(pos+1); // Save value to variable so I don't have to perform any more data_end checks on option size if (opt == 8 && opt_size == 10) { // Option-kind is TCP timestap (yey!) - if (pos + opt_size > opt_end ||pos + opt_size > data_end) + if (pos + opt_size > opt_end || pos + opt_size > data_end) return -1; - *tsval = bpf_ntohl(*(__u32 *)(pos + 2)); - *tsecr = bpf_ntohl(*(__u32 *)(pos + 6)); + *tsval = *(__u32 *)(pos + 2); + *tsecr = *(__u32 *)(pos + 6); return 0; } @@ -99,7 +107,7 @@ int xdp_prog_ingress(struct xdp_md *ctx) if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0) // No TCP timestamp return XDP_PASS; // We have a TCP-timestamp - now we can check if it's in the map - bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", tsval, tsecr); + bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr)); struct ts_key key; fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr, tcph->dest, tcph->source); // Fill in reverse order of egress (dest <--> source) key.tsval = tsecr; @@ -113,9 +121,13 @@ int xdp_prog_ingress(struct xdp_md *ctx) struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key); if (ts && ts->used == 0) { ts->used = 1; - __u64 rtt = bpf_ktime_get_ns() - ts->timestamp; - // TODO: Push RTT + flow to userspace through perf buffer - bpf_printk("RTT: %llu\n", rtt); + //__u64 rtt = bpf_ktime_get_ns() - ts->timestamp; + + struct rtt_event event = {0}; + memcpy(&(event.flow), &(key.flow), sizeof(struct ipv4_flow)); + event.rtt = bpf_ktime_get_ns() - ts->timestamp; + bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event, sizeof(event)); + bpf_printk("Pushed rtt event with RTT: %llu", event.rtt); } return XDP_PASS; diff --git a/pping/pping_user.c b/pping/pping_user.c index 1381076..adfa9a5 100644 --- a/pping/pping_user.c +++ b/pping/pping_user.c @@ -2,9 +2,9 @@ #include #include #include -#include -#include // For IS_ERR_OR_NULL macro -#include +#include // For if_nametoindex +//#include // For IS_ERR_OR_NULL macro // use libbpf_get_error instead +#include // For inet_ntoa and ntohs #include #include @@ -15,17 +15,27 @@ #include // For detecting Ctrl-C #include // For setting rlmit #include +#include #include "timestamp_map.h" //key and value structs for the ts_start map +#define BILLION 1000000000UL #define PPING_ELF_OBJ "pping_kern.o" #define XDP_PROG_SEC "pping_ingress" #define XDP_FLAGS XDP_FLAGS_UPDATE_IF_NOEXIST #define MAP_NAME "ts_start" +#define MAP_CLEANUP_INTERVAL 1*BILLION // Clean timestamp map once per second +#define PERF_BUFFER_NAME "rtt_events" +#define PERF_BUFFER_PAGES 64 // Related to the perf-buffer size? +#define PERF_POLL_TIMEOUT_MS 100 #define RMEMLIM 512UL << 20 /* 512 MBs */ #define ERROR_MSG_MAX 1024 -#define BILLION 1000000000UL #define TIMESTAMP_LIFETIME 10*BILLION // 10 seconds +struct map_cleanup_args { + int map_fd; + __u64 max_age_ns; +}; + /* static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) */ /* { */ /* return vfprintf(stderr, format, args); */ @@ -33,7 +43,7 @@ static volatile int keep_running = 1; -void abort_main_loop(int sig) +void abort_program(int sig) { keep_running = 0; } @@ -60,7 +70,6 @@ static int xdp_load_and_attach(int ifindex, char *obj_path, char *sec, __u32 xdp //.ifindex = ifindex, .file = obj_path, }; - //attr.file = obj_path; err = bpf_prog_load_xattr(&attr, obj, prog_fd); if (err) { @@ -95,7 +104,6 @@ static __u64 get_time_ns(clockid_t clockid) return (__u64)t.tv_sec * BILLION + (__u64)t.tv_nsec; } - static int remove_old_entries_from_map(int map_fd, __u64 max_age) { int removed = 0, entries = 0; @@ -103,7 +111,7 @@ static int remove_old_entries_from_map(int map_fd, __u64 max_age) struct ts_timestamp value; bool delete_prev = false; __u64 now_nsec = get_time_ns(CLOCK_MONOTONIC); - if (now_nsec == 0) + if (now_nsec == 0) return -errno; // Cannot delete current key because then loop will reset, see https://www.bouncybouncy.net/blog/bpf_map_get_next_key-pitfalls/ @@ -126,10 +134,40 @@ static int remove_old_entries_from_map(int map_fd, __u64 max_age) bpf_map_delete_elem(map_fd, &prev_key); removed++; } - printf("Gone through %d entries and removed %d of them\n", entries, removed); + __u64 duration = get_time_ns(CLOCK_MONOTONIC) - now_nsec; + printf("Gone through %d entries and removed %d of them in %llu.%09llu\n", entries, removed, duration / BILLION, duration % BILLION); return removed; } +static void *periodic_map_cleanup(void *args) +{ + struct map_cleanup_args *argp = args; + struct timespec interval; + interval.tv_sec = MAP_CLEANUP_INTERVAL / BILLION; + interval.tv_nsec = MAP_CLEANUP_INTERVAL % BILLION; + while (keep_running) { + remove_old_entries_from_map(argp->map_fd, argp->max_age_ns); + nanosleep(&interval, NULL); + } + pthread_exit(NULL); +} + +static void handle_rtt_event(void *ctx, int cpu, void *data, __u32 data_size) +{ + const struct rtt_event *e = data; + struct in_addr saddr, daddr; + saddr.s_addr = e->flow.saddr; + daddr.s_addr = e->flow.daddr; + printf("%llu.%09llu ms %s:%d+%s:%d\n", e->rtt / BILLION, e->rtt % BILLION, + inet_ntoa(saddr), ntohs(e->flow.sport), + inet_ntoa(daddr), ntohs(e->flow.dport)); +} + +static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt) +{ + fprintf(stderr, "Lost %llu RTT events on CPU %d\n", lost_cnt, cpu); +} + int main(int argc, char *argv[]) { if (argc < 2) { @@ -137,8 +175,10 @@ int main(int argc, char *argv[]) return EXIT_FAILURE; } - int err; + int err = 0, ifindex = 0; + bool xdp_attached = false; char error_msg[ERROR_MSG_MAX]; + struct perf_buffer *pb = NULL; // Setup libbpf errors and debug info on callback //libbpf_set_print(libbpf_print_fn); @@ -147,15 +187,15 @@ int main(int argc, char *argv[]) err = set_rlimit(RMEMLIM); if (err) { fprintf(stderr, "Could not set rlimit to %ld bytes: %s\n", RMEMLIM, strerror(-err)); - return EXIT_FAILURE; + goto cleanup; } // Get index of interface - int ifindex = if_nametoindex(argv[1]); + ifindex = if_nametoindex(argv[1]); if (ifindex == 0) { err = -errno; fprintf(stderr, "Could not get index of interface %s: %s\n", argv[1], strerror(-err)); - return EXIT_FAILURE; + goto cleanup; } // Load and attach XDP program to interface @@ -165,29 +205,57 @@ int main(int argc, char *argv[]) err = xdp_load_and_attach(ifindex, PPING_ELF_OBJ, XDP_PROG_SEC, XDP_FLAGS, &obj, &prog_fd, error_msg); if (err) { fprintf(stderr, "%s: %s\n", error_msg, strerror(-err)); - return EXIT_FAILURE; + goto cleanup; } + xdp_attached = true; + + // Find map fd (to perform periodic cleanup) int map_fd = bpf_object__find_map_fd_by_name(obj, MAP_NAME); if (map_fd < 0) { fprintf(stderr, "Failed finding map %s in %s: %s\n", MAP_NAME, PPING_ELF_OBJ, strerror(-map_fd)); - xdp_deatach(ifindex, XDP_FLAGS); - return EXIT_FAILURE; + goto cleanup; + } + pthread_t tid; + struct map_cleanup_args args = {.map_fd = map_fd, .max_age_ns = TIMESTAMP_LIFETIME}; + err = pthread_create(&tid, NULL, periodic_map_cleanup, &args); + if (err) { + fprintf(stderr, "Failed starting thread to perform periodic map cleanup: %s\n", strerror(err)); + goto cleanup; + } + + // Set up perf buffer + struct perf_buffer_opts pb_opts; + pb_opts.sample_cb = handle_rtt_event; + pb_opts.lost_cb = handle_missed_rtt_event; + + pb = perf_buffer__new(bpf_object__find_map_fd_by_name(obj, PERF_BUFFER_NAME), PERF_BUFFER_PAGES, &pb_opts); + err = libbpf_get_error(pb); + if (err) { + pb = NULL; + fprintf(stderr, "Failed to open perf buffer %s: %s\n", PERF_BUFFER_NAME, strerror(err)); + goto cleanup; } // Main loop - signal(SIGINT, abort_main_loop); + signal(SIGINT, abort_program); while(keep_running) { - sleep(1); - // TODO - print out - remove_old_entries_from_map(map_fd, TIMESTAMP_LIFETIME); + if ((err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS)) < 0) { + if (keep_running) // Only print polling error if it wasn't caused by program termination + fprintf(stderr, "Error polling perf buffer: %s\n", strerror(-err)); + break; + } } - - err = xdp_deatach(ifindex, XDP_FLAGS); - if (err) { - fprintf(stderr, "Failed deatching program from ifindex %d: %s\n", ifindex, strerror(-err)); - return EXIT_FAILURE; + + cleanup: + printf("Cleanup!\n"); + perf_buffer__free(pb); + if (xdp_attached) { + err = xdp_deatach(ifindex, XDP_FLAGS); + if (err) { + fprintf(stderr, "Failed deatching program from ifindex %d: %s\n", ifindex, strerror(-err)); + } } - return EXIT_SUCCESS; + return err != 0; } diff --git a/pping/timestamp_map.h b/pping/timestamp_map.h index ec6096c..e8cec37 100644 --- a/pping/timestamp_map.h +++ b/pping/timestamp_map.h @@ -23,4 +23,11 @@ struct ts_timestamp // __u8 pad[7]; // Need to pad it due to compiler optimization, see "Remove struct padding with aligning members by using #pragma pack." at https://docs.cilium.io/en/v1.9/bpf/ }; + +struct rtt_event +{ + struct ipv4_flow flow; + __u64 rtt; +}; + #endif