Files
xdp-project-bpf-examples/pping/pping_helpers.h

231 lines
6.7 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef PPING_HELPERS_H
#define PPING_HELPERS_H
#include <linux/bpf.h>
#include <xdp/parsing_helpers.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <stdbool.h>
#include "pping.h"
#define AF_INET 2
#define AF_INET6 10
#define MAX_TCP_OPTIONS 10
/*
* This struct keeps track of the data and data_end pointers from the xdp_md or
* __skb_buff contexts, as well as a currently parsed to position kept in nh.
* Additionally, it also keeps the length of the entire packet, which together
* with the other members can be used to determine ex. how much data each
* header encloses.
*/
struct parsing_context {
void *data; //Start of eth hdr
void *data_end; //End of safe acessible area
struct hdr_cursor nh; //Position to parse next
__u32 pkt_len; //Full packet length (headers+data)
bool is_egress; //Is packet on egress or ingress?
};
static volatile const struct user_config config = {};
// Timestamp map
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, struct packet_id);
__type(value, __u64);
__uint(max_entries, 16384);
__uint(pinning, LIBBPF_PIN_BY_NAME);
} ts_start SEC(".maps");
// Flow state map
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, struct network_tuple);
__type(value, struct flow_state);
__uint(max_entries, 16384);
__uint(pinning, LIBBPF_PIN_BY_NAME);
} flow_state SEC(".maps");
/*
* Maps an IPv4 address into an IPv6 address according to RFC 4291 sec 2.5.5.2
*/
static void map_ipv4_to_ipv6(__be32 ipv4, struct in6_addr *ipv6)
{
__builtin_memset(&ipv6->in6_u.u6_addr8[0], 0x00, 10);
__builtin_memset(&ipv6->in6_u.u6_addr8[10], 0xff, 2);
ipv6->in6_u.u6_addr32[3] = ipv4;
}
/*
* Parses the TSval and TSecr values from the TCP options field. If sucessful
* the TSval and TSecr values will be stored at tsval and tsecr (in network
* byte order).
* Returns 0 if sucessful and -1 on failure
*/
static int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u32 *tsval,
__u32 *tsecr)
{
int len = tcph->doff << 2;
void *opt_end = (void *)tcph + len;
__u8 *pos = (__u8 *)(tcph + 1); //Current pos in TCP options
__u8 i, opt, opt_size;
if (tcph + 1 > data_end || len <= sizeof(struct tcphdr))
return -1;
#pragma unroll //temporary solution until we can identify why the non-unrolled loop gets stuck in an infinite loop
for (i = 0; i < MAX_TCP_OPTIONS; i++) {
if (pos + 1 > opt_end || pos + 1 > data_end)
return -1;
opt = *pos;
if (opt == 0) // Reached end of TCP options
return -1;
if (opt == 1) { // TCP NOP option - advance one byte
pos++;
continue;
}
// Option > 1, should have option size
if (pos + 2 > opt_end || pos + 2 > data_end)
return -1;
opt_size = *(pos + 1);
// Option-kind is TCP timestap (yey!)
if (opt == 8 && opt_size == 10) {
if (pos + opt_size > opt_end ||
pos + opt_size > data_end)
return -1;
*tsval = *(__u32 *)(pos + 2);
*tsecr = *(__u32 *)(pos + 6);
return 0;
}
// Some other TCP option - advance option-length bytes
/*
* The &= is to make sure that verifier knows opt_size must
* be positive, as verifier may not remeber u8 constraint if
* it fetches variable from stack, see ex
* https://blog.path.net/ebpf-xdp-and-network-security/.
* Use 0x3f instead of 0xff to avoid compiler optimizing
* away the check, and no option should be able to have a
* size of above 63 anyways (maximum TCP header size is 64
* bytes, and no more than 64-20=44 of them can by options).
*/
opt_size &= 0x3f;
pos += opt_size;
}
return -1;
}
/*
* Attempts to fetch an identifier for TCP packets, based on the TCP timestamp
* option. If sucessful, identifier will be set to TSval if is_ingress, TSecr
* otherwise, the port-members of saddr and daddr will be set the the TCP source
* and dest, respectively, and 0 will be returned. On failure, -1 will be
* returned. Additionally, if the connection is closing (FIN or RST flag), sets
* flow_closing to true.
*/
static int parse_tcp_identifier(struct parsing_context *ctx, __be16 *sport,
__be16 *dport, bool *flow_closing,
__u32 *identifier)
{
__u32 tsval, tsecr;
struct tcphdr *tcph;
if (parse_tcphdr(&ctx->nh, ctx->data_end, &tcph) < 0)
return -1;
// Check if connection is closing
if (tcph->fin || tcph->rst) {
*flow_closing = true;
/* bpf_printk("Detected connection closing on %d\n", */
/* ctx->is_egress); //Upsets verifier? */
}
// Do not timestamp pure ACKs
if (ctx->is_egress && ctx->nh.pos - ctx->data >= ctx->pkt_len &&
!tcph->syn)
return -1;
if (parse_tcp_ts(tcph, ctx->data_end, &tsval, &tsecr) < 0)
return -1; //Possible TODO, fall back on seq/ack instead
*sport = tcph->source;
*dport = tcph->dest;
*identifier = ctx->is_egress ? tsval : tsecr;
return 0;
}
/*
* Attempts to parse the packet limited by the data and data_end pointers,
* to retrieve a protocol dependent packet identifier. If sucessful, the
* pointed to p_id will be filled with parsed information from the packet
* packet, and 0 will be returned. On failure, -1 will be returned.
* If is_egress saddr and daddr will match source and destination of packet,
* respectively, and identifier will be set to the identifer for an outgoing
* packet. Otherwise, saddr and daddr will be swapped (will match
* destination and source of packet, respectively), and identifier will be
* set to the identifier of a response.
*/
static int parse_packet_identifier(struct parsing_context *ctx,
struct packet_id *p_id, bool *flow_closing)
{
int proto, err;
struct ethhdr *eth;
struct iphdr *iph;
struct ipv6hdr *ip6h;
struct flow_address *saddr, *daddr;
// Switch saddr <--> daddr on ingress to match egress
if (ctx->is_egress) {
saddr = &p_id->flow.saddr;
daddr = &p_id->flow.daddr;
} else {
saddr = &p_id->flow.daddr;
daddr = &p_id->flow.saddr;
}
proto = parse_ethhdr(&ctx->nh, ctx->data_end, &eth);
// Parse IPv4/6 header
if (proto == bpf_htons(ETH_P_IP)) {
p_id->flow.ipv = AF_INET;
proto = parse_iphdr(&ctx->nh, ctx->data_end, &iph);
} else if (proto == bpf_htons(ETH_P_IPV6)) {
p_id->flow.ipv = AF_INET6;
proto = parse_ip6hdr(&ctx->nh, ctx->data_end, &ip6h);
} else {
return -1;
}
// Add new protocols here
if (proto == IPPROTO_TCP) {
err = parse_tcp_identifier(ctx, &saddr->port, &daddr->port,
flow_closing, &p_id->identifier);
if (err)
return -1;
} else {
return -1;
}
// Sucessfully parsed packet identifier - fill in IP-addresses and return
if (p_id->flow.ipv == AF_INET) {
map_ipv4_to_ipv6(iph->saddr, &saddr->ip);
map_ipv4_to_ipv6(iph->daddr, &daddr->ip);
} else { // IPv6
saddr->ip = ip6h->saddr;
daddr->ip = ip6h->daddr;
}
return 0;
}
#endif