pping: Refactor TC and XDP programs

Refactor TC and XDP programs to reuse common logic for parsing
packets. Add functions for parsing packets for an identifier to
pping_helpers.h which both TC and XDP parts use. Also make it easier
to extend pping with support for new protocols, as only new parsing
functions have to be added and inserted into a single place.

Also add reserved members to end of structs in pping.h to indicate
padding.

Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
Simon Sundberg
2021-02-09 18:09:30 +01:00
parent eafdf87d80
commit 3268ba87bb
5 changed files with 129 additions and 131 deletions

View File

@@ -81,7 +81,7 @@ static int set_rlimit(long int lim)
static int mkdir_if_noexist(const char *path)
{
int ret;
struct stat st = {0};
struct stat st = { 0 };
ret = stat(path, &st);
if (ret) {
@@ -261,12 +261,12 @@ static void handle_rtt_event(void *ctx, int cpu, void *data, __u32 data_size)
char saddr[INET6_ADDRSTRLEN];
char daddr[INET6_ADDRSTRLEN];
format_ip_address(e->flow.ipv, &e->flow.saddr, saddr, sizeof(saddr));
format_ip_address(e->flow.ipv, &e->flow.daddr, daddr, sizeof(daddr));
format_ip_address(e->flow.ipv, &e->flow.saddr.ip, saddr, sizeof(saddr));
format_ip_address(e->flow.ipv, &e->flow.daddr.ip, daddr, sizeof(daddr));
printf("%llu.%06llu ms %s:%d+%s:%d\n", e->rtt / NS_PER_MS,
e->rtt % NS_PER_MS, saddr, ntohs(e->flow.sport), daddr,
ntohs(e->flow.dport));
e->rtt % NS_PER_MS, saddr, ntohs(e->flow.saddr.port), daddr,
ntohs(e->flow.daddr.port));
}
static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt)

View File

@@ -9,21 +9,29 @@
#define TCBPF_PROG_SEC "pping_egress"
/*
* Struct to hold a full network tuple
* Struct that can hold the source or destination address for a flow (l3+l4).
* Works for both IPv4 and IPv6, as IPv4 addresses can be mapped to IPv6 ones
* based on RFC 4291 Section 2.5.5.2. The ipv member is technically not
* necessary, but makes it easier to determine if it is an IPv4 or IPv6 address
* (don't need to look at the first 12 bytes of address).
* The proto memeber is not currently used, but could be useful once pping
* is extended to work for other protocols than TCP
* based on RFC 4291 Section 2.5.5.2.
*/
struct flow_address {
struct in6_addr ip;
__u16 port;
__u16 reserved;
};
/*
* Struct to hold a full network tuple
* The ipv member is technically not necessary, but makes it easier to
* determine if saddr/daddr are IPv4 or IPv6 address (don't need to look at the
* first 12 bytes of address). The proto memeber is not currently used, but
* could be useful once pping is extended to work for other protocols than TCP.
*/
struct network_tuple {
struct in6_addr saddr;
struct in6_addr daddr;
__u16 sport;
__u16 dport;
struct flow_address saddr;
struct flow_address daddr;
__u16 proto; //IPPROTO_TCP, IPPROTO_ICMP, QUIC etc
__u16 ipv; //AF_INET or AF_INET6
__u8 ipv; //AF_INET or AF_INET6
__u8 reserved;
};
struct packet_id {
@@ -34,11 +42,13 @@ struct packet_id {
struct packet_timestamp {
__u64 timestamp;
__u8 used;
__u8 reserved[7];
};
struct rtt_event {
__u64 rtt;
struct network_tuple flow;
__u32 reserved;
};
#endif

View File

@@ -2,9 +2,16 @@
#ifndef PPING_HELPERS_H
#define PPING_HELPERS_H
#include <linux/bpf.h>
#include <xdp/parsing_helpers.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <string.h>
#include <stdbool.h>
#include "pping.h"
#define AF_INET 2
@@ -14,12 +21,12 @@
/*
* Maps and IPv4 address into an IPv6 address according to RFC 4291 sec 2.5.5.2
*/
static __always_inline void map_ipv4_to_ipv6(__be32 ipv4, struct in6_addr *ipv6)
static void map_ipv4_to_ipv6(__be32 ipv4, struct in6_addr *ipv6)
{
/* __u16 ipv4_prefix[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0xffff}; */
/* memcpy(ipv6, ipv4_prefix, sizeof(ipv4_prefix)); // Won't load on TC */
memset(&ipv6->in6_u.u6_addr8[0], 0x00, 10);
memset(&ipv6->in6_u.u6_addr8[10], 0xff, 2);
/* __builtin_memcpy(ipv6, ipv4_prefix, sizeof(ipv4_prefix)); */
__builtin_memset(&ipv6->in6_u.u6_addr8[0], 0x00, 10);
__builtin_memset(&ipv6->in6_u.u6_addr8[10], 0xff, 2);
ipv6->in6_u.u6_addr32[3] = ipv4;
}
@@ -29,8 +36,8 @@ static __always_inline void map_ipv4_to_ipv6(__be32 ipv4, struct in6_addr *ipv6)
* byte order).
* Returns 0 if sucessful and -1 on failure
*/
static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end,
__u32 *tsval, __u32 *tsecr)
static int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u32 *tsval,
__u32 *tsecr)
{
int len = tcph->doff << 2;
void *opt_end = (void *)tcph + len;
@@ -73,5 +80,80 @@ static __always_inline int parse_tcp_ts(struct tcphdr *tcph, void *data_end,
}
return -1;
}
/*
* Attempts to fetch an identifier for TCP packets, based on the TCP timestamp
* option. If sucessful, identifier will be set to TSval if is_ingress, TSecr
* otherwise, the port-members of saddr and daddr will be set the the TCP source
* and dest, respectively, and 0 will be returned. On failure, -1 will be
* returned.
*/
static int parse_tcp_identifier(struct hdr_cursor *nh, void *data_end,
bool is_egress, struct flow_address *saddr,
struct flow_address *daddr, __u32 *identifier)
{
__u32 tsval, tsecr;
struct tcphdr *tcph;
if (parse_tcphdr(nh, data_end, &tcph) < 0)
return -1;
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
return -1; //Possible TODO, fall back on seq/ack instead
saddr->port = tcph->source;
daddr->port = tcph->dest;
*identifier = is_egress ? tsval : tsecr;
return 0;
}
/*
* Attempts to parse the packet limited by the data and data_end pointers,
* to retrieve a protocol dependent packet identifier. If sucessful, the
* ipv and identifier of p_id will be set, saddr and daddr (which may be part
* of p_id) will be filled with the source and destionation addresses of the
* packet, and 0 will be returned. On failure, -1 will be returned.
*/
static int parse_packet_identifier(void *data, void *data_end, bool is_egress,
struct packet_id *p_id,
struct flow_address *saddr,
struct flow_address *daddr)
{
struct hdr_cursor nh = { .pos = data };
struct ethhdr *eth;
struct iphdr *iph;
struct ipv6hdr *ip6h;
int proto, err;
proto = parse_ethhdr(&nh, data_end, &eth);
// Parse IPv4/6 header
if (proto == bpf_htons(ETH_P_IP)) {
p_id->flow.ipv = AF_INET;
proto = parse_iphdr(&nh, data_end, &iph);
} else if (proto == bpf_htons(ETH_P_IPV6)) {
p_id->flow.ipv = AF_INET6;
proto = parse_ip6hdr(&nh, data_end, &ip6h);
} else
return -1;
// Add new protocols here
if (proto == IPPROTO_TCP)
err = parse_tcp_identifier(&nh, data_end, is_egress, saddr,
daddr, &p_id->identifier);
else
return -1;
if (err)
return -1;
// Sucessfully parsed packet identifier - fill in IP-addresses and return
if (p_id->flow.ipv == AF_INET) {
map_ipv4_to_ipv6(iph->saddr, &saddr->ip);
map_ipv4_to_ipv6(iph->daddr, &daddr->ip);
} else { // IPv6
saddr->ip = ip6h->saddr;
daddr->ip = ip6h->daddr;
}
return 0;
}
#endif

View File

@@ -2,16 +2,6 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <iproute2/bpf_elf.h>
#include <xdp/parsing_helpers.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <string.h>
#include "pping.h"
#include "pping_helpers.h"
@@ -37,57 +27,20 @@ struct bpf_elf_map SEC("maps") ts_start = {
};
#endif
// TC-BFP for parsing TSVAL from egress traffic and add to map
// TC-BFP for parsing packet identifier from egress traffic and add to map
SEC(TCBPF_PROG_SEC)
int tc_bpf_prog_egress(struct __sk_buff *skb)
{
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
int proto = -1;
__u32 tsval, tsecr;
struct hdr_cursor nh = { .pos = data };
struct ethhdr *eth;
struct iphdr *iph;
struct ipv6hdr *ip6h;
struct tcphdr *tcph;
struct packet_id p_id = { 0 };
struct packet_timestamp p_ts = { 0 };
proto = parse_ethhdr(&nh, data_end, &eth);
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
// Parse IPv4/6 header
if (proto == bpf_htons(ETH_P_IP)) {
p_id.flow.ipv = AF_INET;
proto = parse_iphdr(&nh, data_end, &iph);
} else if (proto == bpf_htons(ETH_P_IPV6)) {
p_id.flow.ipv = AF_INET6;
proto = parse_ip6hdr(&nh, data_end, &ip6h);
} else
if (parse_packet_identifier(data, data_end, true, &p_id,
&p_id.flow.saddr, &p_id.flow.daddr) < 0)
goto end;
// Parse TCP timestamp
if (proto != IPPROTO_TCP)
goto end;
if (parse_tcphdr(&nh, data_end, &tcph) < 0)
goto end;
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
goto end;
// We have a TCP timestamp, try adding it to the map
p_id.identifier = tsval;
if (p_id.flow.ipv == AF_INET) {
map_ipv4_to_ipv6(iph->saddr, &p_id.flow.saddr);
map_ipv4_to_ipv6(iph->daddr, &p_id.flow.daddr);
} else { // IPv6
p_id.flow.saddr = ip6h->saddr;
p_id.flow.daddr = ip6h->daddr;
}
p_id.flow.sport = tcph->source;
p_id.flow.dport = tcph->dest;
p_ts.timestamp = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns
bpf_map_update_elem(&ts_start, &p_id, &p_ts, BPF_NOEXIST);

View File

@@ -1,16 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <xdp/parsing_helpers.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <string.h>
#include "pping.h"
#include "pping_helpers.h"
@@ -31,63 +21,25 @@ struct {
__uint(value_size, sizeof(__u32));
} rtt_events SEC(".maps");
// XDP program for parsing TSECR-val from ingress traffic and check for match in map
// XDP program for parsing identifier in ingress traffic and check for match in map
SEC(XDP_PROG_SEC)
int xdp_prog_ingress(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
void *data_end = (void *)(long)ctx->data_end;
int proto = -1;
__u32 tsval, tsecr;
struct hdr_cursor nh = { .pos = data };
struct ethhdr *eth;
struct iphdr *iph;
struct ipv6hdr *ip6h;
struct tcphdr *tcph;
struct packet_id p_id = { 0 };
struct packet_timestamp *p_ts;
struct rtt_event event = { 0 };
proto = bpf_ntohs(parse_ethhdr(&nh, data_end, &eth));
void *data = (void *)(long)ctx->data;
void *data_end = (void *)(long)ctx->data_end;
// Parse IPv4/6 header
if (proto == ETH_P_IP) {
p_id.flow.ipv = AF_INET;
proto = parse_iphdr(&nh, data_end, &iph);
} else if (proto == ETH_P_IPV6) {
p_id.flow.ipv = AF_INET6;
proto = parse_ip6hdr(&nh, data_end, &ip6h);
} else
// saddr and daddr in reverse order of egress (source <--> dest)
if (parse_packet_identifier(data, data_end, false, &p_id,
&p_id.flow.daddr, &p_id.flow.saddr) < 0)
goto end;
// Parse TCP timestamp
if (proto != IPPROTO_TCP)
goto end;
if (parse_tcphdr(&nh, data_end, &tcph) < 0)
goto end;
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
goto end;
// We have a TCP-timestamp - now we can check if it's in the map
p_id.identifier = tsecr;
p_id.flow.proto == proto;
// Fill in reverse order of egress (dest <--> source)
if (p_id.flow.ipv == AF_INET) {
map_ipv4_to_ipv6(iph->daddr, &p_id.flow.saddr);
map_ipv4_to_ipv6(iph->saddr, &p_id.flow.daddr);
} else { // IPv6
p_id.flow.saddr = ip6h->daddr;
p_id.flow.daddr = ip6h->saddr;
}
p_id.flow.sport = tcph->dest;
p_id.flow.dport = tcph->source;
p_ts = bpf_map_lookup_elem(&ts_start, &p_id);
// Only calculate RTT for first packet with matching TSecr
// Only calculate RTT for first packet with matching identifer
if (p_ts && p_ts->used == 0) {
/*
* As used is not set atomically with the lookup, could
@@ -98,7 +50,8 @@ int xdp_prog_ingress(struct xdp_md *ctx)
p_ts->used = 1;
// TODO - Optional delete of entry (if identifier is garantued unique)
memcpy(&event.flow, &p_id.flow, sizeof(struct network_tuple));
__builtin_memcpy(&event.flow, &p_id.flow,
sizeof(struct network_tuple));
event.rtt = bpf_ktime_get_ns() - p_ts->timestamp;
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU,
&event, sizeof(event));