mirror of
https://github.com/xdp-project/bpf-examples.git
synced 2024-05-06 15:54:53 +00:00
pping: Start pping implementation
Add a XDP program to parse TCP timestamps and a simple loader Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
8
pping/Makefile
Normal file
8
pping/Makefile
Normal file
@@ -0,0 +1,8 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
|
||||
|
||||
USER_TARGETS := pping_user
|
||||
BPF_TARGETS := pping_kern
|
||||
|
||||
LIB_DIR = ../lib
|
||||
|
||||
include $(LIB_DIR)/common.mk
|
2
pping/README.md
Normal file
2
pping/README.md
Normal file
@@ -0,0 +1,2 @@
|
||||
# PPing using XDP and TC-BPF
|
||||
An implementation of the passive ping ([pping](https://github.com/pollere/pping)) utility based on XDP (for ingress) and TC-BPF (for outgress)
|
129
pping/pping_kern.c
Normal file
129
pping/pping_kern.c
Normal file
@@ -0,0 +1,129 @@
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <xdp/parsing_helpers.h>
|
||||
|
||||
#include <linux/in.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/tcp.h>
|
||||
|
||||
#include "timestamp_map.h"
|
||||
|
||||
#define MAX_TCP_OPTIONS 10
|
||||
#define BILLION 1000000000UL
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct bpf_map_def SEC("maps") ts_start = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.key_size = sizeof(struct ts_key),
|
||||
.value_size = sizeof(struct ts_timestamp),
|
||||
.max_entries = 4096,
|
||||
};
|
||||
|
||||
static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr, __u32 daddr, __u16 sport, __u16 dport)
|
||||
{
|
||||
flow->saddr = saddr;
|
||||
flow->daddr = daddr;
|
||||
flow->sport = sport;
|
||||
flow->dport = dport;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Parses the TSval and TSecr values from the TCP options field - returns 0 if sucessful and -1 on failure
|
||||
static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u32 *tsval, __u32 *tsecr)
|
||||
{
|
||||
if (tcph + 1 > data_end) // To hopefully please verifier
|
||||
return -1;
|
||||
int len = tcph->doff << 2;
|
||||
if (len <= sizeof(struct tcphdr)) // No TCP options
|
||||
return -1;
|
||||
void *pos = (void *)(tcph + 1);
|
||||
void *opt_end = ((void *)tcph + len);
|
||||
__u8 i, opt, opt_size;
|
||||
#pragma unroll
|
||||
for (i = 0; i < MAX_TCP_OPTIONS; i++) {
|
||||
if (pos+1 > opt_end || pos+1 > data_end)
|
||||
return -1;
|
||||
opt = *(__u8 *)pos; // Save value to variable so I don't have to perform any more data_end checks on the option kind
|
||||
if (opt == 0) // Reached end of TCP options
|
||||
return -1;
|
||||
if (opt == 1) {// TCP NOP option - advance one byte
|
||||
pos++;
|
||||
continue;
|
||||
}
|
||||
// Option > 1, should have option size
|
||||
if (pos+2 > opt_end || pos+2 > data_end)
|
||||
return -1;
|
||||
opt_size = *(__u8 *)(pos+1); // Save value to variable so I don't have to perform any more data_end checks on option size
|
||||
|
||||
if (opt == 8 && opt_size == 10) { // Option-kind is TCP timestap (yey!)
|
||||
if (pos + opt_size > opt_end ||pos + opt_size > data_end)
|
||||
return -1;
|
||||
*tsval = bpf_ntohl(*(__u32 *)(pos + 2));
|
||||
*tsecr = bpf_ntohl(*(__u32 *)(pos + 6));
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Some other TCP option - advance option-length bytes
|
||||
pos += opt_size;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// XDP for parsing TSECR-val from ingress traffic and check for match in map
|
||||
SEC("pping_ingress")
|
||||
int xdp_prog_ingress(struct xdp_md *ctx)
|
||||
{
|
||||
void *data = (void *)(long)ctx->data, *data_end = (void *)(long)ctx->data_end;
|
||||
int proto = -1;
|
||||
struct hdr_cursor nh = {.pos = data };
|
||||
struct ethhdr *eth;
|
||||
struct iphdr *iph;
|
||||
struct tcphdr *tcph;
|
||||
|
||||
bpf_printk("Received packet of length %d\n", (int)(data_end - data));
|
||||
proto = parse_ethhdr(&nh, data_end, ð);
|
||||
if (bpf_ntohs(proto) != ETH_P_IP)
|
||||
return XDP_PASS; // Not IPv4 packet (or failed to parse ethernet header)
|
||||
proto = parse_iphdr(&nh, data_end, &iph);
|
||||
if (proto != IPPROTO_TCP)
|
||||
return XDP_PASS; // Not a TCP packet (or failed to parse ethernet header)
|
||||
proto = parse_tcphdr(&nh, data_end, &tcph);
|
||||
if (proto < 0)
|
||||
return XDP_PASS; // Failed parsing TCP-header
|
||||
|
||||
bpf_printk("TCP-packet with %d byte header and %lu bytes of data\n", proto, data_end - nh.pos);
|
||||
|
||||
__u32 tsval, tsecr;
|
||||
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0) // No TCP timestamp
|
||||
return XDP_PASS;
|
||||
// We have a TCP-timestamp - now we can check if it's in the map
|
||||
bpf_printk("TCP-packet with timestap. TSval: %u, TSecr: %u\n", tsval, tsecr);
|
||||
struct ts_key key;
|
||||
fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr, tcph->dest, tcph->source); // Fill in reverse order of egress (dest <--> source)
|
||||
key.tsval = tsecr;
|
||||
|
||||
// Should look up map map (filling done on egress), but temporarily add to map before I get the TC-BPF part working
|
||||
struct ts_timestamp wrong_value = {0};
|
||||
wrong_value.timestamp = bpf_ktime_get_ns(); //Verifier was unhappy when using bpf_ktime_get_boot_ns
|
||||
bpf_map_update_elem(&ts_start, &key, &wrong_value, BPF_NOEXIST);
|
||||
|
||||
|
||||
struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key);
|
||||
if (ts && ts->used == 0) {
|
||||
ts->used = 1;
|
||||
__u64 rtt = bpf_ktime_get_ns() - ts->timestamp;
|
||||
// TODO: Push RTT + flow to userspace through perf buffer
|
||||
bpf_printk("RTT: %llu\n", rtt);
|
||||
}
|
||||
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
// TC-BFP for parsing TSVAL from egress traffic and add to map
|
||||
SEC("pping_egress")
|
||||
int tc_bpf_prog_egress(struct __skbuff *skb)
|
||||
{
|
||||
return BPF_OK;
|
||||
}
|
193
pping/pping_user.c
Normal file
193
pping/pping_user.c
Normal file
@@ -0,0 +1,193 @@
|
||||
//#include <linux/bpf.h>
|
||||
#include <linux/if_link.h>
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <net/if.h>
|
||||
#include <linux/err.h> // For IS_ERR_OR_NULL macro
|
||||
#include <arpa/inet.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
#include <signal.h> // For detecting Ctrl-C
|
||||
#include <sys/resource.h> // For setting rlmit
|
||||
#include <time.h>
|
||||
#include "timestamp_map.h" //key and value structs for the ts_start map
|
||||
|
||||
#define PPING_ELF_OBJ "pping_kern.o"
|
||||
#define XDP_PROG_SEC "pping_ingress"
|
||||
#define XDP_FLAGS XDP_FLAGS_UPDATE_IF_NOEXIST
|
||||
#define MAP_NAME "ts_start"
|
||||
#define RMEMLIM 512UL << 20 /* 512 MBs */
|
||||
#define ERROR_MSG_MAX 1024
|
||||
#define BILLION 1000000000UL
|
||||
#define TIMESTAMP_LIFETIME 10*BILLION // 10 seconds
|
||||
|
||||
/* static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) */
|
||||
/* { */
|
||||
/* return vfprintf(stderr, format, args); */
|
||||
/* } */
|
||||
|
||||
static volatile int keep_running = 1;
|
||||
|
||||
void abort_main_loop(int sig)
|
||||
{
|
||||
keep_running = 0;
|
||||
}
|
||||
|
||||
static int set_rlimit(long int lim)
|
||||
{
|
||||
struct rlimit rlim = {
|
||||
.rlim_cur = lim,
|
||||
.rlim_max = lim,
|
||||
};
|
||||
|
||||
return !setrlimit(RLIMIT_MEMLOCK, &rlim) ? 0 : -errno;
|
||||
}
|
||||
|
||||
static int xdp_load_and_attach(int ifindex, char *obj_path, char *sec, __u32 xdp_flags, struct bpf_object **obj, int *prog_fd, char *error_buf)
|
||||
{
|
||||
// Load and attach XDP program to interface
|
||||
struct bpf_program *prog = NULL;
|
||||
int err;
|
||||
*prog_fd = -1;
|
||||
|
||||
struct bpf_prog_load_attr attr = {
|
||||
.prog_type = BPF_PROG_TYPE_XDP,
|
||||
//.ifindex = ifindex,
|
||||
.file = obj_path,
|
||||
};
|
||||
//attr.file = obj_path;
|
||||
|
||||
err = bpf_prog_load_xattr(&attr, obj, prog_fd);
|
||||
if (err) {
|
||||
if (error_buf) { snprintf(error_buf, ERROR_MSG_MAX, "Could not open %s", obj_path); }
|
||||
return err;
|
||||
}
|
||||
|
||||
prog = bpf_object__find_program_by_title(*obj, sec);
|
||||
if (!prog) {
|
||||
if (error_buf) { snprintf(error_buf, ERROR_MSG_MAX, "Could not find section %s in ELF object %s", sec, obj_path); }
|
||||
return -1;
|
||||
}
|
||||
|
||||
*prog_fd = bpf_program__fd(prog);
|
||||
err = bpf_set_link_xdp_fd(ifindex, *prog_fd, xdp_flags);
|
||||
if (err < 0) {
|
||||
if (error_buf) { snprintf(error_buf, ERROR_MSG_MAX, "Failed attaching XDP program %s in %s to ifindex %d", sec, obj_path, ifindex); }
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xdp_deatach(int ifindex, __u32 xdp_flags) {
|
||||
return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
|
||||
}
|
||||
|
||||
static __u64 get_time_ns(clockid_t clockid)
|
||||
{
|
||||
struct timespec t;
|
||||
if (clock_gettime(clockid, &t) != 0) // CLOCK_BOOTTIME if using bpf_get_ktime_boot_ns
|
||||
return 0;
|
||||
return (__u64)t.tv_sec * BILLION + (__u64)t.tv_nsec;
|
||||
}
|
||||
|
||||
|
||||
static int remove_old_entries_from_map(int map_fd, __u64 max_age)
|
||||
{
|
||||
int removed = 0, entries = 0;
|
||||
struct ts_key key, prev_key = {0};
|
||||
struct ts_timestamp value;
|
||||
bool delete_prev = false;
|
||||
__u64 now_nsec = get_time_ns(CLOCK_MONOTONIC);
|
||||
if (now_nsec == 0)
|
||||
return -errno;
|
||||
|
||||
// Cannot delete current key because then loop will reset, see https://www.bouncybouncy.net/blog/bpf_map_get_next_key-pitfalls/
|
||||
while(bpf_map_get_next_key(map_fd, &prev_key, &key) == 0) {
|
||||
if (delete_prev) {
|
||||
bpf_map_delete_elem(map_fd, &prev_key);
|
||||
removed++;
|
||||
delete_prev = false;
|
||||
}
|
||||
|
||||
if (bpf_map_lookup_elem(map_fd, &key, &value) == 0) {
|
||||
if (now_nsec > value.timestamp && now_nsec - value.timestamp > max_age) {
|
||||
delete_prev = true;
|
||||
}
|
||||
}
|
||||
entries++;
|
||||
prev_key = key;
|
||||
}
|
||||
if (delete_prev) {
|
||||
bpf_map_delete_elem(map_fd, &prev_key);
|
||||
removed++;
|
||||
}
|
||||
printf("Gone through %d entries and removed %d of them\n", entries, removed);
|
||||
return removed;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
printf("Usage: ./pping_user <dev>\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
int err;
|
||||
char error_msg[ERROR_MSG_MAX];
|
||||
|
||||
// Setup libbpf errors and debug info on callback
|
||||
//libbpf_set_print(libbpf_print_fn);
|
||||
|
||||
// Increase rlimit
|
||||
err = set_rlimit(RMEMLIM);
|
||||
if (err) {
|
||||
fprintf(stderr, "Could not set rlimit to %ld bytes: %s\n", RMEMLIM, strerror(-err));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Get index of interface
|
||||
int ifindex = if_nametoindex(argv[1]);
|
||||
if (ifindex == 0) {
|
||||
err = -errno;
|
||||
fprintf(stderr, "Could not get index of interface %s: %s\n", argv[1], strerror(-err));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Load and attach XDP program to interface
|
||||
struct bpf_object *obj = NULL;
|
||||
int prog_fd = -1;
|
||||
|
||||
err = xdp_load_and_attach(ifindex, PPING_ELF_OBJ, XDP_PROG_SEC, XDP_FLAGS, &obj, &prog_fd, error_msg);
|
||||
if (err) {
|
||||
fprintf(stderr, "%s: %s\n", error_msg, strerror(-err));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
int map_fd = bpf_object__find_map_fd_by_name(obj, MAP_NAME);
|
||||
if (map_fd < 0) {
|
||||
fprintf(stderr, "Failed finding map %s in %s: %s\n", MAP_NAME, PPING_ELF_OBJ, strerror(-map_fd));
|
||||
xdp_deatach(ifindex, XDP_FLAGS);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Main loop
|
||||
signal(SIGINT, abort_main_loop);
|
||||
while(keep_running) {
|
||||
sleep(1);
|
||||
// TODO - print out
|
||||
remove_old_entries_from_map(map_fd, TIMESTAMP_LIFETIME);
|
||||
}
|
||||
|
||||
err = xdp_deatach(ifindex, XDP_FLAGS);
|
||||
if (err) {
|
||||
fprintf(stderr, "Failed deatching program from ifindex %d: %s\n", ifindex, strerror(-err));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
26
pping/timestamp_map.h
Normal file
26
pping/timestamp_map.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#ifndef TIMESTAMP_MAP_H
|
||||
#define TIMESTAMP_MAP_H
|
||||
#include <linux/types.h>
|
||||
|
||||
struct ipv4_flow
|
||||
{
|
||||
__u32 saddr;
|
||||
__u32 daddr;
|
||||
__u16 sport;
|
||||
__u16 dport;
|
||||
};
|
||||
|
||||
struct ts_key
|
||||
{
|
||||
struct ipv4_flow flow;
|
||||
__u32 tsval;
|
||||
};
|
||||
|
||||
struct ts_timestamp
|
||||
{
|
||||
__u64 timestamp;
|
||||
__u8 used;
|
||||
// __u8 pad[7]; // Need to pad it due to compiler optimization, see "Remove struct padding with aligning members by using #pragma pack." at https://docs.cilium.io/en/v1.9/bpf/
|
||||
|
||||
};
|
||||
#endif
|
Reference in New Issue
Block a user