mirror of
https://github.com/xdp-project/bpf-examples.git
synced 2024-05-06 15:54:53 +00:00
pping: Add IPv6 support
Several changes to add IPv6 support: - Change structs in pping.h - replace ipv4_flow with network_tuple - rename ts_key to packet_id - rename ts_timestamp to packet_timestamp - Add map_ipv4_to_ipv4 in pping_helpers.h - Also remove obsolete fill_ipv4_flow - Rewrite pping_kern* - parse either IPv4 or IPv6 header (depending on proto) - Use map_ipv4_to_ipv6 to store IPv4 address in network_tuple Support printout of IPv6 addresses in pping.c - Add function format_ip_address as wrapper over inet_ntop - Change handle_rtt_event to first format IP-address strings in local buffers, then perform single printout While some steps have been taken to be more general towards different types of packet identifiers (not just the currently supported TCP timestamps), significant refactorization of pping_kern* will still be required. Also, pping_kern_xdp and pping_kern_tc also have large sections of very similar code that can be refactored into functions. Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
This commit is contained in:
@ -187,8 +187,8 @@ static __u64 get_time_ns(void)
|
||||
static int clean_map(int map_fd, __u64 max_age)
|
||||
{
|
||||
int removed = 0;
|
||||
struct ts_key key, prev_key = { 0 };
|
||||
struct ts_timestamp value;
|
||||
struct packet_id key, prev_key = { 0 };
|
||||
struct packet_timestamp value;
|
||||
bool delete_prev = false;
|
||||
__u64 now_nsec = get_time_ns();
|
||||
|
||||
@ -240,17 +240,33 @@ static void *periodic_map_cleanup(void *args)
|
||||
pthread_exit(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrapper around inet_ntop designed to handle the "bug" that mapped IPv4
|
||||
* addresses are formated as IPv6 addresses for AF_INET6
|
||||
*/
|
||||
static int format_ip_address(int af, const struct in6_addr *addr, char *buf,
|
||||
size_t size)
|
||||
{
|
||||
if (af == AF_INET)
|
||||
return inet_ntop(af, &(addr->s6_addr[12]),
|
||||
buf, size) ? -errno : 0;
|
||||
else if (af == AF_INET6)
|
||||
return inet_ntop(af, addr, buf, size) ? -errno : 0;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void handle_rtt_event(void *ctx, int cpu, void *data, __u32 data_size)
|
||||
{
|
||||
const struct rtt_event *e = data;
|
||||
struct in_addr saddr, daddr;
|
||||
saddr.s_addr = e->flow.saddr;
|
||||
daddr.s_addr = e->flow.daddr;
|
||||
char saddr[INET6_ADDRSTRLEN];
|
||||
char daddr[INET6_ADDRSTRLEN];
|
||||
|
||||
// inet_ntoa is deprecated, will switch to inet_ntop when adding IPv6 support
|
||||
printf("%llu.%06llu ms %s:%d+", e->rtt / NS_PER_MS, e->rtt % NS_PER_MS,
|
||||
inet_ntoa(daddr), ntohs(e->flow.dport));
|
||||
printf("%s:%d\n", inet_ntoa(saddr), ntohs(e->flow.sport));
|
||||
format_ip_address(e->flow.ipv, &(e->flow.saddr), saddr, sizeof(saddr));
|
||||
format_ip_address(e->flow.ipv, &(e->flow.daddr), daddr, sizeof(daddr));
|
||||
|
||||
printf("%llu.%06llu ms %s:%d+%s:%d\n", e->rtt / NS_PER_MS,
|
||||
e->rtt % NS_PER_MS, saddr, ntohs(e->flow.sport), daddr,
|
||||
ntohs(e->flow.dport));
|
||||
}
|
||||
|
||||
static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt)
|
||||
|
@ -3,30 +3,41 @@
|
||||
#define PPING_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/in6.h>
|
||||
|
||||
#define XDP_PROG_SEC "xdp"
|
||||
#define TCBPF_PROG_SEC "pping_egress"
|
||||
|
||||
// TODO - change to support both IPv4 and IPv6 (IPv4 addresses can be mapped to IPv6 addresses)
|
||||
struct ipv4_flow {
|
||||
__u32 saddr;
|
||||
__u32 daddr;
|
||||
/*
|
||||
* Struct to hold a full network tuple
|
||||
* Works for both IPv4 and IPv6, as IPv4 addresses can be mapped to IPv6 ones
|
||||
* based on RFC 4291 Section 2.5.5.2. The ipv member is technically not
|
||||
* necessary, but makes it easier to determine if it is an IPv4 or IPv6 address
|
||||
* (don't need to look at the first 12 bytes of address).
|
||||
* The proto memeber is not currently used, but could be useful once pping
|
||||
* is extended to work for other protocols than TCP
|
||||
*/
|
||||
struct network_tuple {
|
||||
__u8 ipv; //AF_INET or AF_INET6
|
||||
struct in6_addr saddr;
|
||||
struct in6_addr daddr;
|
||||
__u16 sport;
|
||||
__u16 dport;
|
||||
__u16 proto; //IPPROTO_TCP, IPPROTO_ICMP, QUIC etc
|
||||
};
|
||||
|
||||
struct ts_key {
|
||||
struct ipv4_flow flow;
|
||||
__u32 tsval;
|
||||
struct packet_id {
|
||||
struct network_tuple flow;
|
||||
__u32 identifier; //tsval for TCP packets
|
||||
};
|
||||
|
||||
struct ts_timestamp {
|
||||
struct packet_timestamp {
|
||||
__u64 timestamp;
|
||||
__u8 used;
|
||||
};
|
||||
|
||||
struct rtt_event {
|
||||
struct ipv4_flow flow;
|
||||
struct network_tuple flow;
|
||||
__u64 rtt;
|
||||
};
|
||||
|
||||
|
@ -2,19 +2,27 @@
|
||||
#ifndef PPING_HELPERS_H
|
||||
#define PPING_HELPERS_H
|
||||
|
||||
#include "pping.h"
|
||||
#include <linux/in6.h>
|
||||
#include <linux/tcp.h>
|
||||
#include <string.h>
|
||||
#include "pping.h"
|
||||
|
||||
#define MAX_TCP_OPTIONS 10
|
||||
|
||||
static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr,
|
||||
__u32 daddr, __u16 sport, __u16 dport)
|
||||
/*
|
||||
* Maps and IPv4 address into an IPv6 address according to RFC 4291 sec 2.5.5.2
|
||||
*/
|
||||
static __always_inline void map_ipv4_to_ipv6(__be32 ipv4, struct in6_addr *ipv6)
|
||||
{
|
||||
flow->saddr = saddr;
|
||||
flow->daddr = daddr;
|
||||
flow->sport = sport;
|
||||
flow->dport = dport;
|
||||
return 0;
|
||||
/* __u16 ipv4_prefix[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0xffff}; */
|
||||
/* memcpy(&(ipv6->in6_u.u6_addr8), ipv4_prefix, sizeof(ipv4_prefix)); */
|
||||
memset(&(ipv6->in6_u.u6_addr8[0]), 0x00, 10);
|
||||
memset(&(ipv6->in6_u.u6_addr8[10]), 0xff, 2);
|
||||
#if __UAPI_DEF_IN6_ADDR_ALT
|
||||
ipv6->in6_u.u6_addr32[3] = ipv4;
|
||||
#else
|
||||
memcpy(&(ipv6->in6_u.u6_addr8[12]), &ipv4, sizeof(ipv4));
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -5,8 +5,10 @@
|
||||
#include <xdp/parsing_helpers.h>
|
||||
|
||||
#include <linux/in.h>
|
||||
#include <linux/in6.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/tcp.h>
|
||||
|
||||
#include <string.h>
|
||||
@ -19,8 +21,8 @@ char _license[] SEC("license") = "GPL";
|
||||
#ifdef HAVE_TC_LIBBPF /* detected by configure script in config.mk */
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(key_size, sizeof(struct ts_key));
|
||||
__uint(value_size, sizeof(struct ts_timestamp));
|
||||
__uint(key_size, sizeof(struct packet_id));
|
||||
__uint(value_size, sizeof(struct packet_timestamp));
|
||||
__uint(max_entries, 16384);
|
||||
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
||||
} ts_start SEC(".maps");
|
||||
@ -28,8 +30,8 @@ struct {
|
||||
#else
|
||||
struct bpf_elf_map SEC("maps") ts_start = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.size_key = sizeof(struct ts_key),
|
||||
.size_value = sizeof(struct ts_timestamp),
|
||||
.size_key = sizeof(struct packet_id),
|
||||
.size_value = sizeof(struct packet_timestamp),
|
||||
.max_elem = 16384,
|
||||
.pinning = PIN_GLOBAL_NS,
|
||||
};
|
||||
@ -43,34 +45,51 @@ int tc_bpf_prog_egress(struct __sk_buff *skb)
|
||||
void *data_end = (void *)(long)skb->data_end;
|
||||
|
||||
int proto = -1;
|
||||
__u32 tsval, tsecr;
|
||||
|
||||
struct hdr_cursor nh = { .pos = data };
|
||||
struct ethhdr *eth;
|
||||
struct iphdr *iph;
|
||||
struct ipv6hdr *ip6h;
|
||||
struct tcphdr *tcph;
|
||||
|
||||
proto = parse_ethhdr(&nh, data_end, ð);
|
||||
if (bpf_ntohs(proto) != ETH_P_IP)
|
||||
goto end;
|
||||
proto = parse_iphdr(&nh, data_end, &iph);
|
||||
if (proto != IPPROTO_TCP)
|
||||
goto end;
|
||||
proto = parse_tcphdr(&nh, data_end, &tcph);
|
||||
if (proto < 0)
|
||||
struct packet_id p_id = { 0 };
|
||||
struct packet_timestamp p_ts = { 0 };
|
||||
|
||||
proto = bpf_ntohs(parse_ethhdr(&nh, data_end, ð));
|
||||
|
||||
// Parse IPv4/6 header
|
||||
if (proto == ETH_P_IP) {
|
||||
p_id.flow.ipv = AF_INET;
|
||||
proto = parse_iphdr(&nh, data_end, &iph);
|
||||
} else if (proto == ETH_P_IPV6) {
|
||||
p_id.flow.ipv = AF_INET6;
|
||||
proto = parse_ip6hdr(&nh, data_end, &ip6h);
|
||||
} else
|
||||
goto end;
|
||||
|
||||
__u32 tsval, tsecr;
|
||||
// Parse TCP timestamp
|
||||
if (proto != IPPROTO_TCP)
|
||||
goto end;
|
||||
if (parse_tcphdr(&nh, data_end, &tcph) < 0)
|
||||
goto end;
|
||||
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
|
||||
goto end;
|
||||
|
||||
// We have a TCP timestamp, try adding it to the map
|
||||
struct ts_key key;
|
||||
fill_ipv4_flow(&(key.flow), iph->saddr, iph->daddr, tcph->source,
|
||||
tcph->dest);
|
||||
key.tsval = tsval;
|
||||
p_id.identifier = tsval;
|
||||
if (p_id.flow.ipv == AF_INET) {
|
||||
map_ipv4_to_ipv6(iph->saddr, &(p_id.flow.saddr));
|
||||
map_ipv4_to_ipv6(iph->daddr, &(p_id.flow.daddr));
|
||||
} else { // IPv6
|
||||
p_id.flow.saddr = ip6h->saddr;
|
||||
p_id.flow.daddr = ip6h->daddr;
|
||||
}
|
||||
p_id.flow.sport = tcph->source;
|
||||
p_id.flow.dport = tcph->dest;
|
||||
|
||||
struct ts_timestamp ts = { 0 };
|
||||
ts.timestamp = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns
|
||||
bpf_map_update_elem(&ts_start, &key, &ts, BPF_NOEXIST);
|
||||
p_ts.timestamp = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns
|
||||
bpf_map_update_elem(&ts_start, &p_id, &p_ts, BPF_NOEXIST);
|
||||
|
||||
end:
|
||||
return BPF_OK;
|
||||
|
@ -4,8 +4,10 @@
|
||||
#include <xdp/parsing_helpers.h>
|
||||
|
||||
#include <linux/in.h>
|
||||
#include <linux/in6.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/tcp.h>
|
||||
|
||||
#include <string.h>
|
||||
@ -17,8 +19,8 @@ char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(key_size, sizeof(struct ts_key));
|
||||
__uint(value_size, sizeof(struct ts_timestamp));
|
||||
__uint(key_size, sizeof(struct packet_id));
|
||||
__uint(value_size, sizeof(struct packet_timestamp));
|
||||
__uint(max_entries, 16384);
|
||||
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
||||
} ts_start SEC(".maps");
|
||||
@ -37,46 +39,68 @@ int xdp_prog_ingress(struct xdp_md *ctx)
|
||||
void *data_end = (void *)(long)ctx->data_end;
|
||||
|
||||
int proto = -1;
|
||||
__u32 tsval, tsecr;
|
||||
|
||||
struct hdr_cursor nh = { .pos = data };
|
||||
struct ethhdr *eth;
|
||||
struct iphdr *iph;
|
||||
struct ipv6hdr *ip6h;
|
||||
struct tcphdr *tcph;
|
||||
|
||||
proto = parse_ethhdr(&nh, data_end, ð);
|
||||
if (bpf_ntohs(proto) != ETH_P_IP)
|
||||
goto end;
|
||||
proto = parse_iphdr(&nh, data_end, &iph);
|
||||
if (proto != IPPROTO_TCP)
|
||||
goto end;
|
||||
proto = parse_tcphdr(&nh, data_end, &tcph);
|
||||
if (proto < 0)
|
||||
struct packet_id p_id = { 0 };
|
||||
struct packet_timestamp *p_ts;
|
||||
struct rtt_event event = { 0 };
|
||||
|
||||
proto = bpf_ntohs(parse_ethhdr(&nh, data_end, ð));
|
||||
|
||||
// Parse IPv4/6 header
|
||||
if (proto == ETH_P_IP) {
|
||||
p_id.flow.ipv = AF_INET;
|
||||
proto = parse_iphdr(&nh, data_end, &iph);
|
||||
} else if (proto == ETH_P_IPV6) {
|
||||
p_id.flow.ipv = AF_INET6;
|
||||
proto = parse_ip6hdr(&nh, data_end, &ip6h);
|
||||
} else
|
||||
goto end;
|
||||
|
||||
__u32 tsval, tsecr;
|
||||
// Parse TCP timestamp
|
||||
if (proto != IPPROTO_TCP)
|
||||
goto end;
|
||||
if (parse_tcphdr(&nh, data_end, &tcph) < 0)
|
||||
goto end;
|
||||
if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0)
|
||||
goto end;
|
||||
|
||||
// We have a TCP-timestamp - now we can check if it's in the map
|
||||
struct ts_key key;
|
||||
p_id.identifier = tsecr;
|
||||
p_id.flow.proto == proto;
|
||||
// Fill in reverse order of egress (dest <--> source)
|
||||
fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr, tcph->dest,
|
||||
tcph->source);
|
||||
key.tsval = tsecr;
|
||||
struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key);
|
||||
if (p_id.flow.ipv == AF_INET) {
|
||||
map_ipv4_to_ipv6(iph->daddr, &(p_id.flow.saddr));
|
||||
map_ipv4_to_ipv6(iph->saddr, &(p_id.flow.daddr));
|
||||
} else { // IPv6
|
||||
p_id.flow.saddr = ip6h->daddr;
|
||||
p_id.flow.daddr = ip6h->saddr;
|
||||
}
|
||||
p_id.flow.sport = tcph->dest;
|
||||
p_id.flow.dport = tcph->source;
|
||||
|
||||
p_ts = bpf_map_lookup_elem(&ts_start, &p_id);
|
||||
|
||||
// Only calculate RTT for first packet with matching TSecr
|
||||
if (ts && ts->used == 0) {
|
||||
if (p_ts && p_ts->used == 0) {
|
||||
/*
|
||||
* As used is not set atomically with the lookup, could
|
||||
* potentially have multiple "first" packets (on different
|
||||
* CPUs), but all those should then also have very similar RTT,
|
||||
* so don't consider it a significant issue
|
||||
*/
|
||||
ts->used = 1;
|
||||
p_ts->used = 1;
|
||||
// TODO - Optional delete of entry (if identifier is garantued unique)
|
||||
|
||||
struct rtt_event event = { 0 };
|
||||
memcpy(&(event.flow), &(key.flow), sizeof(struct ipv4_flow));
|
||||
event.rtt = bpf_ktime_get_ns() - ts->timestamp;
|
||||
memcpy(&(event.flow), &(p_id.flow),
|
||||
sizeof(struct network_tuple));
|
||||
event.rtt = bpf_ktime_get_ns() - p_ts->timestamp;
|
||||
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU,
|
||||
&event, sizeof(event));
|
||||
}
|
||||
|
Reference in New Issue
Block a user