pping: Add perf-buffer

The XDP program pushes the calculated RTTs to userspace through the
perf-buffer and the userspace program polls it to print them out

Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
Simon Sundberg
2021-01-07 18:14:27 +01:00
parent 8628004b6c
commit 45b138b6ab
4 changed files with 124 additions and 35 deletions

View File

@@ -3,6 +3,8 @@
USER_TARGETS := pping_user USER_TARGETS := pping_user
BPF_TARGETS := pping_kern BPF_TARGETS := pping_kern
LDFLAGS = -pthread
LIB_DIR = ../lib LIB_DIR = ../lib
include $(LIB_DIR)/common.mk include $(LIB_DIR)/common.mk

View File

@@ -7,10 +7,11 @@
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/tcp.h> #include <linux/tcp.h>
#include <string.h>
#include "timestamp_map.h" #include "timestamp_map.h"
#define MAX_TCP_OPTIONS 10 #define MAX_TCP_OPTIONS 10
#define BILLION 1000000000UL
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
@@ -18,7 +19,13 @@ struct bpf_map_def SEC("maps") ts_start = {
.type = BPF_MAP_TYPE_HASH, .type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(struct ts_key), .key_size = sizeof(struct ts_key),
.value_size = sizeof(struct ts_timestamp), .value_size = sizeof(struct ts_timestamp),
.max_entries = 4096, .max_entries = 16384,
};
struct bpf_map_def SEC("maps") rtt_events = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(__u32), // CPU ID
.value_size = sizeof(__u32), // perf file descriptor?
}; };
static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr, __u32 daddr, __u16 sport, __u16 dport) static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr, __u32 daddr, __u16 sport, __u16 dport)
@@ -31,6 +38,7 @@ static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr, _
} }
// Parses the TSval and TSecr values from the TCP options field - returns 0 if sucessful and -1 on failure // Parses the TSval and TSecr values from the TCP options field - returns 0 if sucessful and -1 on failure
// If sucessful the TSval and TSecr values will be stored at tsval and tsecr (in network byte order!)
static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u32 *tsval, __u32 *tsecr) static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u32 *tsval, __u32 *tsecr)
{ {
if (tcph + 1 > data_end) // To hopefully please verifier if (tcph + 1 > data_end) // To hopefully please verifier
@@ -58,10 +66,10 @@ static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u
opt_size = *(__u8 *)(pos+1); // Save value to variable so I don't have to perform any more data_end checks on option size opt_size = *(__u8 *)(pos+1); // Save value to variable so I don't have to perform any more data_end checks on option size
if (opt == 8 && opt_size == 10) { // Option-kind is TCP timestap (yey!) if (opt == 8 && opt_size == 10) { // Option-kind is TCP timestap (yey!)
if (pos + opt_size > opt_end ||pos + opt_size > data_end) if (pos + opt_size > opt_end || pos + opt_size > data_end)
return -1; return -1;
*tsval = bpf_ntohl(*(__u32 *)(pos + 2)); *tsval = *(__u32 *)(pos + 2);
*tsecr = bpf_ntohl(*(__u32 *)(pos + 6)); *tsecr = *(__u32 *)(pos + 6);
return 0; return 0;
} }
@@ -99,7 +107,7 @@ int xdp_prog_ingress(struct xdp_md *ctx)
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0) // No TCP timestamp if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0) // No TCP timestamp
return XDP_PASS; return XDP_PASS;
// We have a TCP-timestamp - now we can check if it's in the map // We have a TCP-timestamp - now we can check if it's in the map
bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", tsval, tsecr); bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", bpf_ntohl(tsval), bpf_ntohl(tsecr));
struct ts_key key; struct ts_key key;
fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr, tcph->dest, tcph->source); // Fill in reverse order of egress (dest <--> source) fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr, tcph->dest, tcph->source); // Fill in reverse order of egress (dest <--> source)
key.tsval = tsecr; key.tsval = tsecr;
@@ -113,9 +121,13 @@ int xdp_prog_ingress(struct xdp_md *ctx)
struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key); struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key);
if (ts && ts->used == 0) { if (ts && ts->used == 0) {
ts->used = 1; ts->used = 1;
__u64 rtt = bpf_ktime_get_ns() - ts->timestamp; //__u64 rtt = bpf_ktime_get_ns() - ts->timestamp;
// TODO: Push RTT + flow to userspace through perf buffer
bpf_printk("RTT: %llu\n", rtt); struct rtt_event event = {0};
memcpy(&(event.flow), &(key.flow), sizeof(struct ipv4_flow));
event.rtt = bpf_ktime_get_ns() - ts->timestamp;
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event, sizeof(event));
bpf_printk("Pushed rtt event with RTT: %llu", event.rtt);
} }
return XDP_PASS; return XDP_PASS;

View File

@@ -2,9 +2,9 @@
#include <linux/if_link.h> #include <linux/if_link.h>
#include <bpf/bpf.h> #include <bpf/bpf.h>
#include <bpf/libbpf.h> #include <bpf/libbpf.h>
#include <net/if.h> #include <net/if.h> // For if_nametoindex
#include <linux/err.h> // For IS_ERR_OR_NULL macro //#include <linux/err.h> // For IS_ERR_OR_NULL macro // use libbpf_get_error instead
#include <arpa/inet.h> #include <arpa/inet.h> // For inet_ntoa and ntohs
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@@ -15,17 +15,27 @@
#include <signal.h> // For detecting Ctrl-C #include <signal.h> // For detecting Ctrl-C
#include <sys/resource.h> // For setting rlmit #include <sys/resource.h> // For setting rlmit
#include <time.h> #include <time.h>
#include <pthread.h>
#include "timestamp_map.h" //key and value structs for the ts_start map #include "timestamp_map.h" //key and value structs for the ts_start map
#define BILLION 1000000000UL
#define PPING_ELF_OBJ "pping_kern.o" #define PPING_ELF_OBJ "pping_kern.o"
#define XDP_PROG_SEC "pping_ingress" #define XDP_PROG_SEC "pping_ingress"
#define XDP_FLAGS XDP_FLAGS_UPDATE_IF_NOEXIST #define XDP_FLAGS XDP_FLAGS_UPDATE_IF_NOEXIST
#define MAP_NAME "ts_start" #define MAP_NAME "ts_start"
#define MAP_CLEANUP_INTERVAL 1*BILLION // Clean timestamp map once per second
#define PERF_BUFFER_NAME "rtt_events"
#define PERF_BUFFER_PAGES 64 // Related to the perf-buffer size?
#define PERF_POLL_TIMEOUT_MS 100
#define RMEMLIM 512UL << 20 /* 512 MBs */ #define RMEMLIM 512UL << 20 /* 512 MBs */
#define ERROR_MSG_MAX 1024 #define ERROR_MSG_MAX 1024
#define BILLION 1000000000UL
#define TIMESTAMP_LIFETIME 10*BILLION // 10 seconds #define TIMESTAMP_LIFETIME 10*BILLION // 10 seconds
struct map_cleanup_args {
int map_fd;
__u64 max_age_ns;
};
/* static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) */ /* static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) */
/* { */ /* { */
/* return vfprintf(stderr, format, args); */ /* return vfprintf(stderr, format, args); */
@@ -33,7 +43,7 @@
static volatile int keep_running = 1; static volatile int keep_running = 1;
void abort_main_loop(int sig) void abort_program(int sig)
{ {
keep_running = 0; keep_running = 0;
} }
@@ -60,7 +70,6 @@ static int xdp_load_and_attach(int ifindex, char *obj_path, char *sec, __u32 xdp
//.ifindex = ifindex, //.ifindex = ifindex,
.file = obj_path, .file = obj_path,
}; };
//attr.file = obj_path;
err = bpf_prog_load_xattr(&attr, obj, prog_fd); err = bpf_prog_load_xattr(&attr, obj, prog_fd);
if (err) { if (err) {
@@ -95,7 +104,6 @@ static __u64 get_time_ns(clockid_t clockid)
return (__u64)t.tv_sec * BILLION + (__u64)t.tv_nsec; return (__u64)t.tv_sec * BILLION + (__u64)t.tv_nsec;
} }
static int remove_old_entries_from_map(int map_fd, __u64 max_age) static int remove_old_entries_from_map(int map_fd, __u64 max_age)
{ {
int removed = 0, entries = 0; int removed = 0, entries = 0;
@@ -103,7 +111,7 @@ static int remove_old_entries_from_map(int map_fd, __u64 max_age)
struct ts_timestamp value; struct ts_timestamp value;
bool delete_prev = false; bool delete_prev = false;
__u64 now_nsec = get_time_ns(CLOCK_MONOTONIC); __u64 now_nsec = get_time_ns(CLOCK_MONOTONIC);
if (now_nsec == 0) if (now_nsec == 0)
return -errno; return -errno;
// Cannot delete current key because then loop will reset, see https://www.bouncybouncy.net/blog/bpf_map_get_next_key-pitfalls/ // Cannot delete current key because then loop will reset, see https://www.bouncybouncy.net/blog/bpf_map_get_next_key-pitfalls/
@@ -126,10 +134,40 @@ static int remove_old_entries_from_map(int map_fd, __u64 max_age)
bpf_map_delete_elem(map_fd, &prev_key); bpf_map_delete_elem(map_fd, &prev_key);
removed++; removed++;
} }
printf("Gone through %d entries and removed %d of them\n", entries, removed); __u64 duration = get_time_ns(CLOCK_MONOTONIC) - now_nsec;
printf("Gone through %d entries and removed %d of them in %llu.%09llu\n", entries, removed, duration / BILLION, duration % BILLION);
return removed; return removed;
} }
static void *periodic_map_cleanup(void *args)
{
struct map_cleanup_args *argp = args;
struct timespec interval;
interval.tv_sec = MAP_CLEANUP_INTERVAL / BILLION;
interval.tv_nsec = MAP_CLEANUP_INTERVAL % BILLION;
while (keep_running) {
remove_old_entries_from_map(argp->map_fd, argp->max_age_ns);
nanosleep(&interval, NULL);
}
pthread_exit(NULL);
}
static void handle_rtt_event(void *ctx, int cpu, void *data, __u32 data_size)
{
const struct rtt_event *e = data;
struct in_addr saddr, daddr;
saddr.s_addr = e->flow.saddr;
daddr.s_addr = e->flow.daddr;
printf("%llu.%09llu ms %s:%d+%s:%d\n", e->rtt / BILLION, e->rtt % BILLION,
inet_ntoa(saddr), ntohs(e->flow.sport),
inet_ntoa(daddr), ntohs(e->flow.dport));
}
static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt)
{
fprintf(stderr, "Lost %llu RTT events on CPU %d\n", lost_cnt, cpu);
}
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
if (argc < 2) { if (argc < 2) {
@@ -137,8 +175,10 @@ int main(int argc, char *argv[])
return EXIT_FAILURE; return EXIT_FAILURE;
} }
int err; int err = 0, ifindex = 0;
bool xdp_attached = false;
char error_msg[ERROR_MSG_MAX]; char error_msg[ERROR_MSG_MAX];
struct perf_buffer *pb = NULL;
// Setup libbpf errors and debug info on callback // Setup libbpf errors and debug info on callback
//libbpf_set_print(libbpf_print_fn); //libbpf_set_print(libbpf_print_fn);
@@ -147,15 +187,15 @@ int main(int argc, char *argv[])
err = set_rlimit(RMEMLIM); err = set_rlimit(RMEMLIM);
if (err) { if (err) {
fprintf(stderr, "Could not set rlimit to %ld bytes: %s\n", RMEMLIM, strerror(-err)); fprintf(stderr, "Could not set rlimit to %ld bytes: %s\n", RMEMLIM, strerror(-err));
return EXIT_FAILURE; goto cleanup;
} }
// Get index of interface // Get index of interface
int ifindex = if_nametoindex(argv[1]); ifindex = if_nametoindex(argv[1]);
if (ifindex == 0) { if (ifindex == 0) {
err = -errno; err = -errno;
fprintf(stderr, "Could not get index of interface %s: %s\n", argv[1], strerror(-err)); fprintf(stderr, "Could not get index of interface %s: %s\n", argv[1], strerror(-err));
return EXIT_FAILURE; goto cleanup;
} }
// Load and attach XDP program to interface // Load and attach XDP program to interface
@@ -165,29 +205,57 @@ int main(int argc, char *argv[])
err = xdp_load_and_attach(ifindex, PPING_ELF_OBJ, XDP_PROG_SEC, XDP_FLAGS, &obj, &prog_fd, error_msg); err = xdp_load_and_attach(ifindex, PPING_ELF_OBJ, XDP_PROG_SEC, XDP_FLAGS, &obj, &prog_fd, error_msg);
if (err) { if (err) {
fprintf(stderr, "%s: %s\n", error_msg, strerror(-err)); fprintf(stderr, "%s: %s\n", error_msg, strerror(-err));
return EXIT_FAILURE; goto cleanup;
} }
xdp_attached = true;
// Find map fd (to perform periodic cleanup)
int map_fd = bpf_object__find_map_fd_by_name(obj, MAP_NAME); int map_fd = bpf_object__find_map_fd_by_name(obj, MAP_NAME);
if (map_fd < 0) { if (map_fd < 0) {
fprintf(stderr, "Failed finding map %s in %s: %s\n", MAP_NAME, PPING_ELF_OBJ, strerror(-map_fd)); fprintf(stderr, "Failed finding map %s in %s: %s\n", MAP_NAME, PPING_ELF_OBJ, strerror(-map_fd));
xdp_deatach(ifindex, XDP_FLAGS); goto cleanup;
return EXIT_FAILURE; }
pthread_t tid;
struct map_cleanup_args args = {.map_fd = map_fd, .max_age_ns = TIMESTAMP_LIFETIME};
err = pthread_create(&tid, NULL, periodic_map_cleanup, &args);
if (err) {
fprintf(stderr, "Failed starting thread to perform periodic map cleanup: %s\n", strerror(err));
goto cleanup;
}
// Set up perf buffer
struct perf_buffer_opts pb_opts;
pb_opts.sample_cb = handle_rtt_event;
pb_opts.lost_cb = handle_missed_rtt_event;
pb = perf_buffer__new(bpf_object__find_map_fd_by_name(obj, PERF_BUFFER_NAME), PERF_BUFFER_PAGES, &pb_opts);
err = libbpf_get_error(pb);
if (err) {
pb = NULL;
fprintf(stderr, "Failed to open perf buffer %s: %s\n", PERF_BUFFER_NAME, strerror(err));
goto cleanup;
} }
// Main loop // Main loop
signal(SIGINT, abort_main_loop); signal(SIGINT, abort_program);
while(keep_running) { while(keep_running) {
sleep(1); if ((err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS)) < 0) {
// TODO - print out if (keep_running) // Only print polling error if it wasn't caused by program termination
remove_old_entries_from_map(map_fd, TIMESTAMP_LIFETIME); fprintf(stderr, "Error polling perf buffer: %s\n", strerror(-err));
break;
}
} }
err = xdp_deatach(ifindex, XDP_FLAGS); cleanup:
if (err) { printf("Cleanup!\n");
fprintf(stderr, "Failed deatching program from ifindex %d: %s\n", ifindex, strerror(-err)); perf_buffer__free(pb);
return EXIT_FAILURE; if (xdp_attached) {
err = xdp_deatach(ifindex, XDP_FLAGS);
if (err) {
fprintf(stderr, "Failed deatching program from ifindex %d: %s\n", ifindex, strerror(-err));
}
} }
return EXIT_SUCCESS; return err != 0;
} }

View File

@@ -23,4 +23,11 @@ struct ts_timestamp
// __u8 pad[7]; // Need to pad it due to compiler optimization, see "Remove struct padding with aligning members by using #pragma pack." at https://docs.cilium.io/en/v1.9/bpf/ // __u8 pad[7]; // Need to pad it due to compiler optimization, see "Remove struct padding with aligning members by using #pragma pack." at https://docs.cilium.io/en/v1.9/bpf/
}; };
struct rtt_event
{
struct ipv4_flow flow;
__u64 rtt;
};
#endif #endif