2021-01-18 13:13:51 +01:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2021-02-09 18:09:30 +01:00
|
|
|
#include <linux/bpf.h>
|
2021-04-15 14:13:54 +02:00
|
|
|
#include <bpf/bpf_helpers.h>
|
2021-02-09 18:09:30 +01:00
|
|
|
#include <linux/in.h>
|
2021-02-08 20:28:46 +01:00
|
|
|
#include <linux/in6.h>
|
2021-02-09 18:09:30 +01:00
|
|
|
#include <linux/if_ether.h>
|
|
|
|
#include <linux/ip.h>
|
|
|
|
#include <linux/ipv6.h>
|
2021-01-26 18:34:23 +01:00
|
|
|
#include <linux/tcp.h>
|
2021-02-09 18:09:30 +01:00
|
|
|
#include <stdbool.h>
|
2021-04-15 14:13:54 +02:00
|
|
|
|
2021-04-22 17:51:49 +02:00
|
|
|
// overwrite xdp/parsing_helpers.h value to avoid hitting verifier limit
|
|
|
|
#ifdef IPV6_EXT_MAX_CHAIN
|
|
|
|
#undef IPV6_EXT_MAX_CHAIN
|
|
|
|
#endif
|
|
|
|
#define IPV6_EXT_MAX_CHAIN 3
|
|
|
|
|
|
|
|
#include <xdp/parsing_helpers.h>
|
2021-02-08 20:28:46 +01:00
|
|
|
#include "pping.h"
|
2021-01-26 18:34:23 +01:00
|
|
|
|
2021-02-09 13:00:28 +01:00
|
|
|
#define AF_INET 2
|
|
|
|
#define AF_INET6 10
|
2021-01-07 18:30:53 +01:00
|
|
|
#define MAX_TCP_OPTIONS 10
|
|
|
|
|
2021-02-12 18:31:30 +01:00
|
|
|
/*
|
|
|
|
* This struct keeps track of the data and data_end pointers from the xdp_md or
|
|
|
|
* __skb_buff contexts, as well as a currently parsed to position kept in nh.
|
2021-02-16 12:34:19 +01:00
|
|
|
* Additionally, it also keeps the length of the entire packet, which together
|
|
|
|
* with the other members can be used to determine ex. how much data each
|
|
|
|
* header encloses.
|
2021-02-12 18:31:30 +01:00
|
|
|
*/
|
|
|
|
struct parsing_context {
|
2021-04-15 14:13:54 +02:00
|
|
|
void *data; //Start of eth hdr
|
|
|
|
void *data_end; //End of safe acessible area
|
2021-02-12 18:31:30 +01:00
|
|
|
struct hdr_cursor nh; //Position to parse next
|
2021-03-22 12:23:27 +01:00
|
|
|
__u32 pkt_len; //Full packet length (headers+data)
|
|
|
|
bool is_egress; //Is packet on egress or ingress?
|
2021-02-12 18:31:30 +01:00
|
|
|
};
|
|
|
|
|
2021-04-15 14:13:54 +02:00
|
|
|
char _license[] SEC("license") = "GPL";
|
|
|
|
// Global config struct - set from userspace
|
|
|
|
static volatile const struct bpf_config config = {};
|
2021-03-22 12:23:27 +01:00
|
|
|
|
2021-04-15 14:13:54 +02:00
|
|
|
// Map definitions
|
2021-03-02 17:40:51 +01:00
|
|
|
struct {
|
|
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
|
|
__type(key, struct packet_id);
|
|
|
|
__type(value, __u64);
|
|
|
|
__uint(max_entries, 16384);
|
2021-04-15 14:13:54 +02:00
|
|
|
} packet_ts SEC(".maps");
|
2021-03-02 17:40:51 +01:00
|
|
|
|
2021-03-09 19:58:42 +01:00
|
|
|
struct {
|
|
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
|
|
__type(key, struct network_tuple);
|
|
|
|
__type(value, struct flow_state);
|
|
|
|
__uint(max_entries, 16384);
|
|
|
|
} flow_state SEC(".maps");
|
|
|
|
|
2021-04-15 14:13:54 +02:00
|
|
|
struct {
|
|
|
|
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
|
|
|
__uint(key_size, sizeof(__u32));
|
|
|
|
__uint(value_size, sizeof(__u32));
|
|
|
|
} rtt_events SEC(".maps");
|
|
|
|
|
|
|
|
// Help functions
|
|
|
|
|
2021-02-08 20:28:46 +01:00
|
|
|
/*
|
2021-02-16 12:34:19 +01:00
|
|
|
* Maps an IPv4 address into an IPv6 address according to RFC 4291 sec 2.5.5.2
|
2021-02-08 20:28:46 +01:00
|
|
|
*/
|
2021-02-09 18:09:30 +01:00
|
|
|
static void map_ipv4_to_ipv6(__be32 ipv4, struct in6_addr *ipv6)
|
2021-01-07 18:30:53 +01:00
|
|
|
{
|
2021-03-22 12:23:27 +01:00
|
|
|
__builtin_memset(&ipv6->in6_u.u6_addr8[0], 0x00, 10);
|
|
|
|
__builtin_memset(&ipv6->in6_u.u6_addr8[10], 0xff, 2);
|
2021-02-08 20:28:46 +01:00
|
|
|
ipv6->in6_u.u6_addr32[3] = ipv4;
|
2021-01-07 18:30:53 +01:00
|
|
|
}
|
2021-01-26 18:34:23 +01:00
|
|
|
|
2021-01-18 18:08:35 +01:00
|
|
|
/*
|
2021-01-26 18:34:23 +01:00
|
|
|
* Parses the TSval and TSecr values from the TCP options field. If sucessful
|
|
|
|
* the TSval and TSecr values will be stored at tsval and tsecr (in network
|
2021-01-18 18:08:35 +01:00
|
|
|
* byte order).
|
|
|
|
* Returns 0 if sucessful and -1 on failure
|
|
|
|
*/
|
2021-02-09 18:09:30 +01:00
|
|
|
static int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u32 *tsval,
|
|
|
|
__u32 *tsecr)
|
2021-01-07 18:30:53 +01:00
|
|
|
{
|
2021-01-27 12:16:11 +01:00
|
|
|
int len = tcph->doff << 2;
|
2021-01-26 18:34:23 +01:00
|
|
|
void *opt_end = (void *)tcph + len;
|
2021-01-27 12:16:11 +01:00
|
|
|
__u8 *pos = (__u8 *)(tcph + 1); //Current pos in TCP options
|
2021-03-29 20:13:33 +02:00
|
|
|
__u8 i, opt;
|
2021-04-15 14:13:54 +02:00
|
|
|
volatile __u8
|
|
|
|
opt_size; // Seems to ensure it's always read of from stack as u8
|
2021-01-07 18:30:53 +01:00
|
|
|
|
2021-01-27 12:16:11 +01:00
|
|
|
if (tcph + 1 > data_end || len <= sizeof(struct tcphdr))
|
|
|
|
return -1;
|
2021-03-15 18:23:23 +01:00
|
|
|
#pragma unroll //temporary solution until we can identify why the non-unrolled loop gets stuck in an infinite loop
|
2021-01-27 12:16:11 +01:00
|
|
|
for (i = 0; i < MAX_TCP_OPTIONS; i++) {
|
|
|
|
if (pos + 1 > opt_end || pos + 1 > data_end)
|
|
|
|
return -1;
|
2021-01-26 18:34:23 +01:00
|
|
|
|
2021-01-27 12:16:11 +01:00
|
|
|
opt = *pos;
|
|
|
|
if (opt == 0) // Reached end of TCP options
|
|
|
|
return -1;
|
2021-01-26 18:34:23 +01:00
|
|
|
|
2021-01-27 12:16:11 +01:00
|
|
|
if (opt == 1) { // TCP NOP option - advance one byte
|
|
|
|
pos++;
|
|
|
|
continue;
|
|
|
|
}
|
2021-01-26 18:34:23 +01:00
|
|
|
|
2021-01-27 12:16:11 +01:00
|
|
|
// Option > 1, should have option size
|
|
|
|
if (pos + 2 > opt_end || pos + 2 > data_end)
|
|
|
|
return -1;
|
|
|
|
opt_size = *(pos + 1);
|
2021-03-30 19:34:48 +02:00
|
|
|
if (opt_size < 2) // Stop parsing options if opt_size has an invalid value
|
|
|
|
return -1;
|
2021-01-26 18:34:23 +01:00
|
|
|
|
2021-01-27 12:16:11 +01:00
|
|
|
// Option-kind is TCP timestap (yey!)
|
|
|
|
if (opt == 8 && opt_size == 10) {
|
2021-03-29 20:13:33 +02:00
|
|
|
if (pos + 10 > opt_end || pos + 10 > data_end)
|
2021-01-27 12:16:11 +01:00
|
|
|
return -1;
|
|
|
|
*tsval = *(__u32 *)(pos + 2);
|
|
|
|
*tsecr = *(__u32 *)(pos + 6);
|
|
|
|
return 0;
|
|
|
|
}
|
2021-01-07 18:30:53 +01:00
|
|
|
|
2021-01-27 12:16:11 +01:00
|
|
|
// Some other TCP option - advance option-length bytes
|
|
|
|
pos += opt_size;
|
|
|
|
}
|
|
|
|
return -1;
|
2021-01-07 18:30:53 +01:00
|
|
|
}
|
2021-03-09 19:58:42 +01:00
|
|
|
|
2021-02-09 18:09:30 +01:00
|
|
|
/*
|
|
|
|
* Attempts to fetch an identifier for TCP packets, based on the TCP timestamp
|
|
|
|
* option. If sucessful, identifier will be set to TSval if is_ingress, TSecr
|
|
|
|
* otherwise, the port-members of saddr and daddr will be set the the TCP source
|
|
|
|
* and dest, respectively, and 0 will be returned. On failure, -1 will be
|
2021-03-09 19:58:42 +01:00
|
|
|
* returned. Additionally, if the connection is closing (FIN or RST flag), sets
|
|
|
|
* flow_closing to true.
|
2021-02-09 18:09:30 +01:00
|
|
|
*/
|
2021-03-09 19:58:42 +01:00
|
|
|
static int parse_tcp_identifier(struct parsing_context *ctx, __be16 *sport,
|
|
|
|
__be16 *dport, bool *flow_closing,
|
|
|
|
__u32 *identifier)
|
2021-02-09 18:09:30 +01:00
|
|
|
{
|
|
|
|
__u32 tsval, tsecr;
|
|
|
|
struct tcphdr *tcph;
|
|
|
|
|
2021-02-12 18:31:30 +01:00
|
|
|
if (parse_tcphdr(&ctx->nh, ctx->data_end, &tcph) < 0)
|
|
|
|
return -1;
|
|
|
|
|
2021-03-09 19:58:42 +01:00
|
|
|
// Check if connection is closing
|
pping: Add timestamp and min-RTT to output
To add timestamp to output, push the timestamp when packet was
processed from kernel as part of the rtt-event. Also keep track of
minimum encountered RTT for each flow in kernel, and also push that as
part of the RTT-event.
Additionally, avoid pushing RTT messages at all if no flow-state
information can be found (due to ex. being deleted from egress side),
as no valid min-RTT can then be given. Furthermore, no longer delete
flow-information once seeing the FIN-flag on egress in order to keep
useful flow-state around for RTT-messages longer. Due to the
FIN-handshake process, it is sufficient if the ingress program deletes
the flow-state upon seeing FIN. However, still delete flow-state from
either ingress or egress upon seeing RST flag, as RST does not have a
handshake process allowing for delayed deletion.
While minimum RTT could also be tracked from the userspace process,
userspace is not aware of when the flow is closed so would have to add
additional logic to keep track of minimum RTT for each flow and
periodically clean them up. Furthermore, keeping RTT statistics in the
flow-state map is useful for implementing future features, such as an
RTT-based sampling interval. It would also be useful in case pping is
changed to no longer have a long-running userspace process printing
out all the calculated RTTs, but instead simply occasionally looks up
the RTT from the flow-state map.
Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2021-04-29 18:55:06 +02:00
|
|
|
*flow_closing = tcph->rst || (!ctx->is_egress && tcph->fin);
|
2021-03-09 19:58:42 +01:00
|
|
|
|
2021-02-12 18:31:30 +01:00
|
|
|
// Do not timestamp pure ACKs
|
2021-03-09 19:58:42 +01:00
|
|
|
if (ctx->is_egress && ctx->nh.pos - ctx->data >= ctx->pkt_len &&
|
|
|
|
!tcph->syn)
|
2021-02-09 18:09:30 +01:00
|
|
|
return -1;
|
2021-02-12 11:40:43 +01:00
|
|
|
|
2021-02-12 18:31:30 +01:00
|
|
|
if (parse_tcp_ts(tcph, ctx->data_end, &tsval, &tsecr) < 0)
|
2021-02-09 18:09:30 +01:00
|
|
|
return -1; //Possible TODO, fall back on seq/ack instead
|
|
|
|
|
2021-02-16 12:34:19 +01:00
|
|
|
*sport = tcph->source;
|
|
|
|
*dport = tcph->dest;
|
2021-03-09 19:58:42 +01:00
|
|
|
*identifier = ctx->is_egress ? tsval : tsecr;
|
2021-02-09 18:09:30 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Attempts to parse the packet limited by the data and data_end pointers,
|
|
|
|
* to retrieve a protocol dependent packet identifier. If sucessful, the
|
2021-02-12 11:40:43 +01:00
|
|
|
* pointed to p_id will be filled with parsed information from the packet
|
2021-02-09 18:09:30 +01:00
|
|
|
* packet, and 0 will be returned. On failure, -1 will be returned.
|
2021-02-12 11:40:43 +01:00
|
|
|
* If is_egress saddr and daddr will match source and destination of packet,
|
|
|
|
* respectively, and identifier will be set to the identifer for an outgoing
|
|
|
|
* packet. Otherwise, saddr and daddr will be swapped (will match
|
|
|
|
* destination and source of packet, respectively), and identifier will be
|
|
|
|
* set to the identifier of a response.
|
2021-02-09 18:09:30 +01:00
|
|
|
*/
|
2021-03-09 19:58:42 +01:00
|
|
|
static int parse_packet_identifier(struct parsing_context *ctx,
|
|
|
|
struct packet_id *p_id, bool *flow_closing)
|
2021-02-09 18:09:30 +01:00
|
|
|
{
|
2021-02-12 11:40:43 +01:00
|
|
|
int proto, err;
|
2021-02-09 18:09:30 +01:00
|
|
|
struct ethhdr *eth;
|
|
|
|
struct iphdr *iph;
|
|
|
|
struct ipv6hdr *ip6h;
|
2021-02-12 11:40:43 +01:00
|
|
|
struct flow_address *saddr, *daddr;
|
|
|
|
|
|
|
|
// Switch saddr <--> daddr on ingress to match egress
|
2021-03-09 19:58:42 +01:00
|
|
|
if (ctx->is_egress) {
|
2021-02-12 11:40:43 +01:00
|
|
|
saddr = &p_id->flow.saddr;
|
|
|
|
daddr = &p_id->flow.daddr;
|
|
|
|
} else {
|
|
|
|
saddr = &p_id->flow.daddr;
|
|
|
|
daddr = &p_id->flow.saddr;
|
|
|
|
}
|
2021-02-09 18:09:30 +01:00
|
|
|
|
2021-02-12 18:31:30 +01:00
|
|
|
proto = parse_ethhdr(&ctx->nh, ctx->data_end, ð);
|
2021-02-09 18:09:30 +01:00
|
|
|
|
|
|
|
// Parse IPv4/6 header
|
|
|
|
if (proto == bpf_htons(ETH_P_IP)) {
|
|
|
|
p_id->flow.ipv = AF_INET;
|
2021-04-30 11:36:41 +02:00
|
|
|
p_id->flow.proto = parse_iphdr(&ctx->nh, ctx->data_end, &iph);
|
2021-02-09 18:09:30 +01:00
|
|
|
} else if (proto == bpf_htons(ETH_P_IPV6)) {
|
|
|
|
p_id->flow.ipv = AF_INET6;
|
2021-04-30 11:36:41 +02:00
|
|
|
p_id->flow.proto = parse_ip6hdr(&ctx->nh, ctx->data_end, &ip6h);
|
2021-02-12 11:40:43 +01:00
|
|
|
} else {
|
2021-02-09 18:09:30 +01:00
|
|
|
return -1;
|
2021-02-12 11:40:43 +01:00
|
|
|
}
|
2021-02-09 18:09:30 +01:00
|
|
|
|
|
|
|
// Add new protocols here
|
2021-04-30 11:36:41 +02:00
|
|
|
if (p_id->flow.proto == IPPROTO_TCP) {
|
2021-03-09 19:58:42 +01:00
|
|
|
err = parse_tcp_identifier(ctx, &saddr->port, &daddr->port,
|
|
|
|
flow_closing, &p_id->identifier);
|
2021-02-12 11:40:43 +01:00
|
|
|
if (err)
|
|
|
|
return -1;
|
|
|
|
} else {
|
2021-02-09 18:09:30 +01:00
|
|
|
return -1;
|
2021-02-12 11:40:43 +01:00
|
|
|
}
|
2021-02-09 18:09:30 +01:00
|
|
|
|
|
|
|
// Sucessfully parsed packet identifier - fill in IP-addresses and return
|
|
|
|
if (p_id->flow.ipv == AF_INET) {
|
|
|
|
map_ipv4_to_ipv6(iph->saddr, &saddr->ip);
|
|
|
|
map_ipv4_to_ipv6(iph->daddr, &daddr->ip);
|
|
|
|
} else { // IPv6
|
|
|
|
saddr->ip = ip6h->saddr;
|
|
|
|
daddr->ip = ip6h->daddr;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2021-01-07 18:30:53 +01:00
|
|
|
|
2021-05-07 14:54:12 +02:00
|
|
|
/*
|
|
|
|
* Returns the number of unparsed bytes left in the packet (bytes after nh.pos)
|
|
|
|
*/
|
|
|
|
static __u32 remaining_pkt_payload(struct parsing_context *ctx)
|
|
|
|
{
|
|
|
|
// pkt_len - (pos - data) fails because compiler transforms it to pkt_len - pos + data (pkt_len - pos not ok because value - pointer)
|
|
|
|
// data + pkt_len - pos fails on (data+pkt_len) - pos due to math between pkt_pointer and unbounded register
|
|
|
|
__u32 parsed_bytes = ctx->nh.pos - ctx->data;
|
|
|
|
return parsed_bytes < ctx->pkt_len ? ctx->pkt_len - parsed_bytes : 0;
|
|
|
|
}
|
|
|
|
|
2021-04-15 14:13:54 +02:00
|
|
|
// Programs
|
|
|
|
|
|
|
|
// TC-BFP for parsing packet identifier from egress traffic and add to map
|
|
|
|
SEC(EGRESS_PROG_SEC)
|
|
|
|
int pping_egress(struct __sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct packet_id p_id = { 0 };
|
|
|
|
__u64 p_ts;
|
|
|
|
struct parsing_context pctx = {
|
|
|
|
.data = (void *)(long)skb->data,
|
|
|
|
.data_end = (void *)(long)skb->data_end,
|
|
|
|
.pkt_len = skb->len,
|
|
|
|
.nh = { .pos = pctx.data },
|
|
|
|
.is_egress = true,
|
|
|
|
};
|
|
|
|
bool flow_closing = false;
|
|
|
|
struct flow_state *f_state;
|
|
|
|
struct flow_state new_state = { 0 };
|
|
|
|
|
|
|
|
if (parse_packet_identifier(&pctx, &p_id, &flow_closing) < 0)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
// Delete flow and create no timestamp entry if flow is closing
|
|
|
|
if (flow_closing) {
|
|
|
|
bpf_map_delete_elem(&flow_state, &p_id.flow);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check flow state
|
|
|
|
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
|
|
|
|
if (!f_state) { // No previous state - attempt to create it
|
|
|
|
bpf_map_update_elem(&flow_state, &p_id.flow, &new_state,
|
|
|
|
BPF_NOEXIST);
|
|
|
|
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
|
|
|
|
if (!f_state)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2021-05-07 14:54:12 +02:00
|
|
|
f_state->sent_pkts++;
|
|
|
|
f_state->sent_bytes += remaining_pkt_payload(&pctx);
|
|
|
|
|
2021-04-15 14:13:54 +02:00
|
|
|
// Check if identfier is new
|
|
|
|
if (f_state->last_id == p_id.identifier)
|
|
|
|
goto out;
|
|
|
|
f_state->last_id = p_id.identifier;
|
|
|
|
|
|
|
|
// Check rate-limit
|
|
|
|
p_ts = bpf_ktime_get_ns(); // or bpf_ktime_get_boot_ns
|
|
|
|
if (p_ts < f_state->last_timestamp ||
|
|
|
|
p_ts - f_state->last_timestamp < config.rate_limit)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Updates attempt at creating timestamp, even if creation of timestamp
|
|
|
|
* fails (due to map being full). This should make the competition for
|
|
|
|
* the next available map slot somewhat fairer between heavy and sparse
|
|
|
|
* flows.
|
|
|
|
*/
|
|
|
|
f_state->last_timestamp = p_ts;
|
|
|
|
bpf_map_update_elem(&packet_ts, &p_id, &p_ts, BPF_NOEXIST);
|
|
|
|
|
|
|
|
out:
|
|
|
|
return BPF_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
// XDP program for parsing identifier in ingress traffic and check for match in map
|
|
|
|
SEC(INGRESS_PROG_SEC)
|
|
|
|
int pping_ingress(struct xdp_md *ctx)
|
|
|
|
{
|
|
|
|
struct packet_id p_id = { 0 };
|
|
|
|
__u64 *p_ts;
|
|
|
|
struct rtt_event event = { 0 };
|
pping: Add timestamp and min-RTT to output
To add timestamp to output, push the timestamp when packet was
processed from kernel as part of the rtt-event. Also keep track of
minimum encountered RTT for each flow in kernel, and also push that as
part of the RTT-event.
Additionally, avoid pushing RTT messages at all if no flow-state
information can be found (due to ex. being deleted from egress side),
as no valid min-RTT can then be given. Furthermore, no longer delete
flow-information once seeing the FIN-flag on egress in order to keep
useful flow-state around for RTT-messages longer. Due to the
FIN-handshake process, it is sufficient if the ingress program deletes
the flow-state upon seeing FIN. However, still delete flow-state from
either ingress or egress upon seeing RST flag, as RST does not have a
handshake process allowing for delayed deletion.
While minimum RTT could also be tracked from the userspace process,
userspace is not aware of when the flow is closed so would have to add
additional logic to keep track of minimum RTT for each flow and
periodically clean them up. Furthermore, keeping RTT statistics in the
flow-state map is useful for implementing future features, such as an
RTT-based sampling interval. It would also be useful in case pping is
changed to no longer have a long-running userspace process printing
out all the calculated RTTs, but instead simply occasionally looks up
the RTT from the flow-state map.
Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2021-04-29 18:55:06 +02:00
|
|
|
struct flow_state *f_state;
|
2021-04-15 14:13:54 +02:00
|
|
|
struct parsing_context pctx = {
|
|
|
|
.data = (void *)(long)ctx->data,
|
|
|
|
.data_end = (void *)(long)ctx->data_end,
|
|
|
|
.pkt_len = pctx.data_end - pctx.data,
|
|
|
|
.nh = { .pos = pctx.data },
|
|
|
|
.is_egress = false,
|
|
|
|
};
|
|
|
|
bool flow_closing = false;
|
pping: Add timestamp and min-RTT to output
To add timestamp to output, push the timestamp when packet was
processed from kernel as part of the rtt-event. Also keep track of
minimum encountered RTT for each flow in kernel, and also push that as
part of the RTT-event.
Additionally, avoid pushing RTT messages at all if no flow-state
information can be found (due to ex. being deleted from egress side),
as no valid min-RTT can then be given. Furthermore, no longer delete
flow-information once seeing the FIN-flag on egress in order to keep
useful flow-state around for RTT-messages longer. Due to the
FIN-handshake process, it is sufficient if the ingress program deletes
the flow-state upon seeing FIN. However, still delete flow-state from
either ingress or egress upon seeing RST flag, as RST does not have a
handshake process allowing for delayed deletion.
While minimum RTT could also be tracked from the userspace process,
userspace is not aware of when the flow is closed so would have to add
additional logic to keep track of minimum RTT for each flow and
periodically clean them up. Furthermore, keeping RTT statistics in the
flow-state map is useful for implementing future features, such as an
RTT-based sampling interval. It would also be useful in case pping is
changed to no longer have a long-running userspace process printing
out all the calculated RTTs, but instead simply occasionally looks up
the RTT from the flow-state map.
Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2021-04-29 18:55:06 +02:00
|
|
|
__u64 now;
|
2021-04-15 14:13:54 +02:00
|
|
|
|
|
|
|
if (parse_packet_identifier(&pctx, &p_id, &flow_closing) < 0)
|
|
|
|
goto out;
|
|
|
|
|
2021-05-07 14:54:12 +02:00
|
|
|
f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow);
|
|
|
|
if (!f_state)
|
|
|
|
goto validflow_out;
|
|
|
|
|
|
|
|
f_state->rec_pkts++;
|
|
|
|
f_state->rec_bytes += remaining_pkt_payload(&pctx);
|
|
|
|
|
pping: Add timestamp and min-RTT to output
To add timestamp to output, push the timestamp when packet was
processed from kernel as part of the rtt-event. Also keep track of
minimum encountered RTT for each flow in kernel, and also push that as
part of the RTT-event.
Additionally, avoid pushing RTT messages at all if no flow-state
information can be found (due to ex. being deleted from egress side),
as no valid min-RTT can then be given. Furthermore, no longer delete
flow-information once seeing the FIN-flag on egress in order to keep
useful flow-state around for RTT-messages longer. Due to the
FIN-handshake process, it is sufficient if the ingress program deletes
the flow-state upon seeing FIN. However, still delete flow-state from
either ingress or egress upon seeing RST flag, as RST does not have a
handshake process allowing for delayed deletion.
While minimum RTT could also be tracked from the userspace process,
userspace is not aware of when the flow is closed so would have to add
additional logic to keep track of minimum RTT for each flow and
periodically clean them up. Furthermore, keeping RTT statistics in the
flow-state map is useful for implementing future features, such as an
RTT-based sampling interval. It would also be useful in case pping is
changed to no longer have a long-running userspace process printing
out all the calculated RTTs, but instead simply occasionally looks up
the RTT from the flow-state map.
Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2021-04-29 18:55:06 +02:00
|
|
|
now = bpf_ktime_get_ns();
|
2021-04-15 14:13:54 +02:00
|
|
|
p_ts = bpf_map_lookup_elem(&packet_ts, &p_id);
|
pping: Add timestamp and min-RTT to output
To add timestamp to output, push the timestamp when packet was
processed from kernel as part of the rtt-event. Also keep track of
minimum encountered RTT for each flow in kernel, and also push that as
part of the RTT-event.
Additionally, avoid pushing RTT messages at all if no flow-state
information can be found (due to ex. being deleted from egress side),
as no valid min-RTT can then be given. Furthermore, no longer delete
flow-information once seeing the FIN-flag on egress in order to keep
useful flow-state around for RTT-messages longer. Due to the
FIN-handshake process, it is sufficient if the ingress program deletes
the flow-state upon seeing FIN. However, still delete flow-state from
either ingress or egress upon seeing RST flag, as RST does not have a
handshake process allowing for delayed deletion.
While minimum RTT could also be tracked from the userspace process,
userspace is not aware of when the flow is closed so would have to add
additional logic to keep track of minimum RTT for each flow and
periodically clean them up. Furthermore, keeping RTT statistics in the
flow-state map is useful for implementing future features, such as an
RTT-based sampling interval. It would also be useful in case pping is
changed to no longer have a long-running userspace process printing
out all the calculated RTTs, but instead simply occasionally looks up
the RTT from the flow-state map.
Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2021-04-29 18:55:06 +02:00
|
|
|
if (!p_ts || now < *p_ts)
|
|
|
|
goto validflow_out;
|
2021-04-15 14:13:54 +02:00
|
|
|
|
pping: Add timestamp and min-RTT to output
To add timestamp to output, push the timestamp when packet was
processed from kernel as part of the rtt-event. Also keep track of
minimum encountered RTT for each flow in kernel, and also push that as
part of the RTT-event.
Additionally, avoid pushing RTT messages at all if no flow-state
information can be found (due to ex. being deleted from egress side),
as no valid min-RTT can then be given. Furthermore, no longer delete
flow-information once seeing the FIN-flag on egress in order to keep
useful flow-state around for RTT-messages longer. Due to the
FIN-handshake process, it is sufficient if the ingress program deletes
the flow-state upon seeing FIN. However, still delete flow-state from
either ingress or egress upon seeing RST flag, as RST does not have a
handshake process allowing for delayed deletion.
While minimum RTT could also be tracked from the userspace process,
userspace is not aware of when the flow is closed so would have to add
additional logic to keep track of minimum RTT for each flow and
periodically clean them up. Furthermore, keeping RTT statistics in the
flow-state map is useful for implementing future features, such as an
RTT-based sampling interval. It would also be useful in case pping is
changed to no longer have a long-running userspace process printing
out all the calculated RTTs, but instead simply occasionally looks up
the RTT from the flow-state map.
Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2021-04-29 18:55:06 +02:00
|
|
|
event.rtt = now - *p_ts;
|
|
|
|
event.timestamp = now;
|
2021-05-06 17:54:31 +02:00
|
|
|
|
|
|
|
// Delete timestamp entry as soon as RTT is calculated
|
2021-04-15 14:13:54 +02:00
|
|
|
bpf_map_delete_elem(&packet_ts, &p_id);
|
|
|
|
|
pping: Add timestamp and min-RTT to output
To add timestamp to output, push the timestamp when packet was
processed from kernel as part of the rtt-event. Also keep track of
minimum encountered RTT for each flow in kernel, and also push that as
part of the RTT-event.
Additionally, avoid pushing RTT messages at all if no flow-state
information can be found (due to ex. being deleted from egress side),
as no valid min-RTT can then be given. Furthermore, no longer delete
flow-information once seeing the FIN-flag on egress in order to keep
useful flow-state around for RTT-messages longer. Due to the
FIN-handshake process, it is sufficient if the ingress program deletes
the flow-state upon seeing FIN. However, still delete flow-state from
either ingress or egress upon seeing RST flag, as RST does not have a
handshake process allowing for delayed deletion.
While minimum RTT could also be tracked from the userspace process,
userspace is not aware of when the flow is closed so would have to add
additional logic to keep track of minimum RTT for each flow and
periodically clean them up. Furthermore, keeping RTT statistics in the
flow-state map is useful for implementing future features, such as an
RTT-based sampling interval. It would also be useful in case pping is
changed to no longer have a long-running userspace process printing
out all the calculated RTTs, but instead simply occasionally looks up
the RTT from the flow-state map.
Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2021-04-29 18:55:06 +02:00
|
|
|
if (f_state->min_rtt == 0 || event.rtt < f_state->min_rtt)
|
|
|
|
f_state->min_rtt = event.rtt;
|
|
|
|
|
|
|
|
event.min_rtt = f_state->min_rtt;
|
2021-05-07 14:54:12 +02:00
|
|
|
event.sent_pkts = f_state->sent_pkts;
|
|
|
|
event.sent_bytes = f_state->sent_bytes;
|
|
|
|
event.rec_pkts = f_state->rec_pkts;
|
|
|
|
event.rec_bytes = f_state->rec_bytes;
|
pping: Add timestamp and min-RTT to output
To add timestamp to output, push the timestamp when packet was
processed from kernel as part of the rtt-event. Also keep track of
minimum encountered RTT for each flow in kernel, and also push that as
part of the RTT-event.
Additionally, avoid pushing RTT messages at all if no flow-state
information can be found (due to ex. being deleted from egress side),
as no valid min-RTT can then be given. Furthermore, no longer delete
flow-information once seeing the FIN-flag on egress in order to keep
useful flow-state around for RTT-messages longer. Due to the
FIN-handshake process, it is sufficient if the ingress program deletes
the flow-state upon seeing FIN. However, still delete flow-state from
either ingress or egress upon seeing RST flag, as RST does not have a
handshake process allowing for delayed deletion.
While minimum RTT could also be tracked from the userspace process,
userspace is not aware of when the flow is closed so would have to add
additional logic to keep track of minimum RTT for each flow and
periodically clean them up. Furthermore, keeping RTT statistics in the
flow-state map is useful for implementing future features, such as an
RTT-based sampling interval. It would also be useful in case pping is
changed to no longer have a long-running userspace process printing
out all the calculated RTTs, but instead simply occasionally looks up
the RTT from the flow-state map.
Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2021-04-29 18:55:06 +02:00
|
|
|
|
2021-05-06 17:54:31 +02:00
|
|
|
// Push event to perf-buffer
|
2021-04-15 14:13:54 +02:00
|
|
|
__builtin_memcpy(&event.flow, &p_id.flow, sizeof(struct network_tuple));
|
|
|
|
bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event,
|
|
|
|
sizeof(event));
|
|
|
|
|
pping: Add timestamp and min-RTT to output
To add timestamp to output, push the timestamp when packet was
processed from kernel as part of the rtt-event. Also keep track of
minimum encountered RTT for each flow in kernel, and also push that as
part of the RTT-event.
Additionally, avoid pushing RTT messages at all if no flow-state
information can be found (due to ex. being deleted from egress side),
as no valid min-RTT can then be given. Furthermore, no longer delete
flow-information once seeing the FIN-flag on egress in order to keep
useful flow-state around for RTT-messages longer. Due to the
FIN-handshake process, it is sufficient if the ingress program deletes
the flow-state upon seeing FIN. However, still delete flow-state from
either ingress or egress upon seeing RST flag, as RST does not have a
handshake process allowing for delayed deletion.
While minimum RTT could also be tracked from the userspace process,
userspace is not aware of when the flow is closed so would have to add
additional logic to keep track of minimum RTT for each flow and
periodically clean them up. Furthermore, keeping RTT statistics in the
flow-state map is useful for implementing future features, such as an
RTT-based sampling interval. It would also be useful in case pping is
changed to no longer have a long-running userspace process printing
out all the calculated RTTs, but instead simply occasionally looks up
the RTT from the flow-state map.
Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2021-04-29 18:55:06 +02:00
|
|
|
validflow_out:
|
|
|
|
// Wait with deleting flow until having pushed final RTT message
|
|
|
|
if (flow_closing)
|
|
|
|
bpf_map_delete_elem(&flow_state, &p_id.flow);
|
|
|
|
|
2021-04-15 14:13:54 +02:00
|
|
|
out:
|
|
|
|
return XDP_PASS;
|
|
|
|
}
|