diff --git a/pping/pping.c b/pping/pping.c index 5154a5d..bae883b 100644 --- a/pping/pping.c +++ b/pping/pping.c @@ -187,8 +187,8 @@ static __u64 get_time_ns(void) static int clean_map(int map_fd, __u64 max_age) { int removed = 0; - struct ts_key key, prev_key = { 0 }; - struct ts_timestamp value; + struct packet_id key, prev_key = { 0 }; + struct packet_timestamp value; bool delete_prev = false; __u64 now_nsec = get_time_ns(); @@ -240,17 +240,33 @@ static void *periodic_map_cleanup(void *args) pthread_exit(NULL); } +/* + * Wrapper around inet_ntop designed to handle the "bug" that mapped IPv4 + * addresses are formated as IPv6 addresses for AF_INET6 + */ +static int format_ip_address(int af, const struct in6_addr *addr, char *buf, + size_t size) +{ + if (af == AF_INET) + return inet_ntop(af, &(addr->s6_addr[12]), + buf, size) ? -errno : 0; + else if (af == AF_INET6) + return inet_ntop(af, addr, buf, size) ? -errno : 0; + return -EINVAL; +} + static void handle_rtt_event(void *ctx, int cpu, void *data, __u32 data_size) { const struct rtt_event *e = data; - struct in_addr saddr, daddr; - saddr.s_addr = e->flow.saddr; - daddr.s_addr = e->flow.daddr; + char saddr[INET6_ADDRSTRLEN]; + char daddr[INET6_ADDRSTRLEN]; - // inet_ntoa is deprecated, will switch to inet_ntop when adding IPv6 support - printf("%llu.%06llu ms %s:%d+", e->rtt / NS_PER_MS, e->rtt % NS_PER_MS, - inet_ntoa(daddr), ntohs(e->flow.dport)); - printf("%s:%d\n", inet_ntoa(saddr), ntohs(e->flow.sport)); + format_ip_address(e->flow.ipv, &(e->flow.saddr), saddr, sizeof(saddr)); + format_ip_address(e->flow.ipv, &(e->flow.daddr), daddr, sizeof(daddr)); + + printf("%llu.%06llu ms %s:%d+%s:%d\n", e->rtt / NS_PER_MS, + e->rtt % NS_PER_MS, saddr, ntohs(e->flow.sport), daddr, + ntohs(e->flow.dport)); } static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt) diff --git a/pping/pping.h b/pping/pping.h index 08dcba4..2dcb07b 100644 --- a/pping/pping.h +++ b/pping/pping.h @@ -3,30 +3,41 @@ #define PPING_H #include +#include #define XDP_PROG_SEC "xdp" #define TCBPF_PROG_SEC "pping_egress" -// TODO - change to support both IPv4 and IPv6 (IPv4 addresses can be mapped to IPv6 addresses) -struct ipv4_flow { - __u32 saddr; - __u32 daddr; +/* + * Struct to hold a full network tuple + * Works for both IPv4 and IPv6, as IPv4 addresses can be mapped to IPv6 ones + * based on RFC 4291 Section 2.5.5.2. The ipv member is technically not + * necessary, but makes it easier to determine if it is an IPv4 or IPv6 address + * (don't need to look at the first 12 bytes of address). + * The proto memeber is not currently used, but could be useful once pping + * is extended to work for other protocols than TCP + */ +struct network_tuple { + __u8 ipv; //AF_INET or AF_INET6 + struct in6_addr saddr; + struct in6_addr daddr; __u16 sport; __u16 dport; + __u16 proto; //IPPROTO_TCP, IPPROTO_ICMP, QUIC etc }; -struct ts_key { - struct ipv4_flow flow; - __u32 tsval; +struct packet_id { + struct network_tuple flow; + __u32 identifier; //tsval for TCP packets }; -struct ts_timestamp { +struct packet_timestamp { __u64 timestamp; __u8 used; }; struct rtt_event { - struct ipv4_flow flow; + struct network_tuple flow; __u64 rtt; }; diff --git a/pping/pping_helpers.h b/pping/pping_helpers.h index ee6d8b9..6e5b14f 100644 --- a/pping/pping_helpers.h +++ b/pping/pping_helpers.h @@ -2,19 +2,27 @@ #ifndef PPING_HELPERS_H #define PPING_HELPERS_H -#include "pping.h" +#include #include +#include +#include "pping.h" #define MAX_TCP_OPTIONS 10 -static __always_inline int fill_ipv4_flow(struct ipv4_flow *flow, __u32 saddr, - __u32 daddr, __u16 sport, __u16 dport) +/* + * Maps and IPv4 address into an IPv6 address according to RFC 4291 sec 2.5.5.2 + */ +static __always_inline void map_ipv4_to_ipv6(__be32 ipv4, struct in6_addr *ipv6) { - flow->saddr = saddr; - flow->daddr = daddr; - flow->sport = sport; - flow->dport = dport; - return 0; + /* __u16 ipv4_prefix[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0xffff}; */ + /* memcpy(&(ipv6->in6_u.u6_addr8), ipv4_prefix, sizeof(ipv4_prefix)); */ + memset(&(ipv6->in6_u.u6_addr8[0]), 0x00, 10); + memset(&(ipv6->in6_u.u6_addr8[10]), 0xff, 2); +#if __UAPI_DEF_IN6_ADDR_ALT + ipv6->in6_u.u6_addr32[3] = ipv4; +#else + memcpy(&(ipv6->in6_u.u6_addr8[12]), &ipv4, sizeof(ipv4)); +#endif } /* diff --git a/pping/pping_kern_tc.c b/pping/pping_kern_tc.c index 92e235e..a4c2060 100644 --- a/pping/pping_kern_tc.c +++ b/pping/pping_kern_tc.c @@ -5,8 +5,10 @@ #include #include +#include #include #include +#include #include #include @@ -19,8 +21,8 @@ char _license[] SEC("license") = "GPL"; #ifdef HAVE_TC_LIBBPF /* detected by configure script in config.mk */ struct { __uint(type, BPF_MAP_TYPE_HASH); - __uint(key_size, sizeof(struct ts_key)); - __uint(value_size, sizeof(struct ts_timestamp)); + __uint(key_size, sizeof(struct packet_id)); + __uint(value_size, sizeof(struct packet_timestamp)); __uint(max_entries, 16384); __uint(pinning, LIBBPF_PIN_BY_NAME); } ts_start SEC(".maps"); @@ -28,8 +30,8 @@ struct { #else struct bpf_elf_map SEC("maps") ts_start = { .type = BPF_MAP_TYPE_HASH, - .size_key = sizeof(struct ts_key), - .size_value = sizeof(struct ts_timestamp), + .size_key = sizeof(struct packet_id), + .size_value = sizeof(struct packet_timestamp), .max_elem = 16384, .pinning = PIN_GLOBAL_NS, }; @@ -43,34 +45,51 @@ int tc_bpf_prog_egress(struct __sk_buff *skb) void *data_end = (void *)(long)skb->data_end; int proto = -1; + __u32 tsval, tsecr; + struct hdr_cursor nh = { .pos = data }; struct ethhdr *eth; struct iphdr *iph; + struct ipv6hdr *ip6h; struct tcphdr *tcph; - proto = parse_ethhdr(&nh, data_end, ð); - if (bpf_ntohs(proto) != ETH_P_IP) - goto end; - proto = parse_iphdr(&nh, data_end, &iph); - if (proto != IPPROTO_TCP) - goto end; - proto = parse_tcphdr(&nh, data_end, &tcph); - if (proto < 0) + struct packet_id p_id = { 0 }; + struct packet_timestamp p_ts = { 0 }; + + proto = bpf_ntohs(parse_ethhdr(&nh, data_end, ð)); + + // Parse IPv4/6 header + if (proto == ETH_P_IP) { + p_id.flow.ipv = AF_INET; + proto = parse_iphdr(&nh, data_end, &iph); + } else if (proto == ETH_P_IPV6) { + p_id.flow.ipv = AF_INET6; + proto = parse_ip6hdr(&nh, data_end, &ip6h); + } else goto end; - __u32 tsval, tsecr; + // Parse TCP timestamp + if (proto != IPPROTO_TCP) + goto end; + if (parse_tcphdr(&nh, data_end, &tcph) < 0) + goto end; if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0) goto end; // We have a TCP timestamp, try adding it to the map - struct ts_key key; - fill_ipv4_flow(&(key.flow), iph->saddr, iph->daddr, tcph->source, - tcph->dest); - key.tsval = tsval; + p_id.identifier = tsval; + if (p_id.flow.ipv == AF_INET) { + map_ipv4_to_ipv6(iph->saddr, &(p_id.flow.saddr)); + map_ipv4_to_ipv6(iph->daddr, &(p_id.flow.daddr)); + } else { // IPv6 + p_id.flow.saddr = ip6h->saddr; + p_id.flow.daddr = ip6h->daddr; + } + p_id.flow.sport = tcph->source; + p_id.flow.dport = tcph->dest; - struct ts_timestamp ts = { 0 }; - ts.timestamp = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns - bpf_map_update_elem(&ts_start, &key, &ts, BPF_NOEXIST); + p_ts.timestamp = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns + bpf_map_update_elem(&ts_start, &p_id, &p_ts, BPF_NOEXIST); end: return BPF_OK; diff --git a/pping/pping_kern_xdp.c b/pping/pping_kern_xdp.c index 1379124..c94ab36 100644 --- a/pping/pping_kern_xdp.c +++ b/pping/pping_kern_xdp.c @@ -4,8 +4,10 @@ #include #include +#include #include #include +#include #include #include @@ -17,8 +19,8 @@ char _license[] SEC("license") = "GPL"; struct { __uint(type, BPF_MAP_TYPE_HASH); - __uint(key_size, sizeof(struct ts_key)); - __uint(value_size, sizeof(struct ts_timestamp)); + __uint(key_size, sizeof(struct packet_id)); + __uint(value_size, sizeof(struct packet_timestamp)); __uint(max_entries, 16384); __uint(pinning, LIBBPF_PIN_BY_NAME); } ts_start SEC(".maps"); @@ -37,46 +39,68 @@ int xdp_prog_ingress(struct xdp_md *ctx) void *data_end = (void *)(long)ctx->data_end; int proto = -1; + __u32 tsval, tsecr; + struct hdr_cursor nh = { .pos = data }; struct ethhdr *eth; struct iphdr *iph; + struct ipv6hdr *ip6h; struct tcphdr *tcph; - proto = parse_ethhdr(&nh, data_end, ð); - if (bpf_ntohs(proto) != ETH_P_IP) - goto end; - proto = parse_iphdr(&nh, data_end, &iph); - if (proto != IPPROTO_TCP) - goto end; - proto = parse_tcphdr(&nh, data_end, &tcph); - if (proto < 0) + struct packet_id p_id = { 0 }; + struct packet_timestamp *p_ts; + struct rtt_event event = { 0 }; + + proto = bpf_ntohs(parse_ethhdr(&nh, data_end, ð)); + + // Parse IPv4/6 header + if (proto == ETH_P_IP) { + p_id.flow.ipv = AF_INET; + proto = parse_iphdr(&nh, data_end, &iph); + } else if (proto == ETH_P_IPV6) { + p_id.flow.ipv = AF_INET6; + proto = parse_ip6hdr(&nh, data_end, &ip6h); + } else goto end; - __u32 tsval, tsecr; + // Parse TCP timestamp + if (proto != IPPROTO_TCP) + goto end; + if (parse_tcphdr(&nh, data_end, &tcph) < 0) + goto end; if (parse_tcp_ts(tcph, data_end, &tsval, &tsecr) < 0) goto end; // We have a TCP-timestamp - now we can check if it's in the map - struct ts_key key; + p_id.identifier = tsecr; + p_id.flow.proto == proto; // Fill in reverse order of egress (dest <--> source) - fill_ipv4_flow(&(key.flow), iph->daddr, iph->saddr, tcph->dest, - tcph->source); - key.tsval = tsecr; - struct ts_timestamp *ts = bpf_map_lookup_elem(&ts_start, &key); + if (p_id.flow.ipv == AF_INET) { + map_ipv4_to_ipv6(iph->daddr, &(p_id.flow.saddr)); + map_ipv4_to_ipv6(iph->saddr, &(p_id.flow.daddr)); + } else { // IPv6 + p_id.flow.saddr = ip6h->daddr; + p_id.flow.daddr = ip6h->saddr; + } + p_id.flow.sport = tcph->dest; + p_id.flow.dport = tcph->source; + + p_ts = bpf_map_lookup_elem(&ts_start, &p_id); // Only calculate RTT for first packet with matching TSecr - if (ts && ts->used == 0) { + if (p_ts && p_ts->used == 0) { /* * As used is not set atomically with the lookup, could * potentially have multiple "first" packets (on different * CPUs), but all those should then also have very similar RTT, * so don't consider it a significant issue */ - ts->used = 1; + p_ts->used = 1; + // TODO - Optional delete of entry (if identifier is garantued unique) - struct rtt_event event = { 0 }; - memcpy(&(event.flow), &(key.flow), sizeof(struct ipv4_flow)); - event.rtt = bpf_ktime_get_ns() - ts->timestamp; + memcpy(&(event.flow), &(p_id.flow), + sizeof(struct network_tuple)); + event.rtt = bpf_ktime_get_ns() - p_ts->timestamp; bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event, sizeof(event)); }