Files
xdp-project-bpf-examples/pping/pping.h

212 lines
5.0 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef PPING_H
#define PPING_H
#include <linux/types.h>
#include <linux/in6.h>
#include <stdbool.h>
#define NS_PER_SECOND 1000000000UL
#define NS_PER_MS 1000000UL
#define MS_PER_S 1000UL
#define S_PER_DAY (24 * 3600UL)
typedef __u64 fixpoint64;
#define FIXPOINT_SHIFT 16
#define DOUBLE_TO_FIXPOINT(X) ((fixpoint64)((X) * (1UL << FIXPOINT_SHIFT)))
#define FIXPOINT_TO_UINT(X) ((X) >> FIXPOINT_SHIFT)
/* For the event_type members of rtt_event and flow_event */
#define EVENT_TYPE_FLOW 1
#define EVENT_TYPE_RTT 2
#define EVENT_TYPE_MAP_FULL 3
#define EVENT_TYPE_MAP_CLEAN 4
enum __attribute__((__packed__)) flow_event_type {
FLOW_EVENT_NONE,
FLOW_EVENT_OPENING,
FLOW_EVENT_CLOSING,
FLOW_EVENT_CLOSING_BOTH
};
enum __attribute__((__packed__)) flow_event_reason {
EVENT_REASON_NONE,
EVENT_REASON_SYN,
EVENT_REASON_SYN_ACK,
EVENT_REASON_FIRST_OBS_PCKT,
EVENT_REASON_FIN,
EVENT_REASON_RST,
EVENT_REASON_FLOW_TIMEOUT
};
enum __attribute__((__packed__)) flow_event_source {
EVENT_SOURCE_PKT_SRC,
EVENT_SOURCE_PKT_DEST,
EVENT_SOURCE_GC
};
enum __attribute__((__packed__)) pping_map {
PPING_MAP_FLOWSTATE = 0,
PPING_MAP_PACKETTS
};
enum __attribute__((__packed__)) connection_state {
CONNECTION_STATE_EMPTY,
CONNECTION_STATE_WAITOPEN,
CONNECTION_STATE_OPEN,
CONNECTION_STATE_CLOSED
};
struct bpf_config {
__u64 rate_limit;
fixpoint64 rtt_rate;
bool use_srtt;
bool track_tcp;
bool track_icmp;
bool localfilt;
__u32 reserved;
};
/*
* Struct that can hold the source or destination address for a flow (l3+l4).
* Works for both IPv4 and IPv6, as IPv4 addresses can be mapped to IPv6 ones
* based on RFC 4291 Section 2.5.5.2.
*/
struct flow_address {
struct in6_addr ip;
__u16 port;
__u16 reserved;
};
/*
* Struct to hold a full network tuple
* The ipv member is technically not necessary, but makes it easier to
* determine if saddr/daddr are IPv4 or IPv6 address (don't need to look at the
* first 12 bytes of address). The proto memeber is not currently used, but
* could be useful once pping is extended to work for other protocols than TCP.
*/
struct network_tuple {
struct flow_address saddr;
struct flow_address daddr;
__u16 proto; //IPPROTO_TCP, IPPROTO_ICMP, QUIC etc
__u8 ipv; //AF_INET or AF_INET6
__u8 reserved;
};
struct flow_state {
pping: Add timestamp and min-RTT to output To add timestamp to output, push the timestamp when packet was processed from kernel as part of the rtt-event. Also keep track of minimum encountered RTT for each flow in kernel, and also push that as part of the RTT-event. Additionally, avoid pushing RTT messages at all if no flow-state information can be found (due to ex. being deleted from egress side), as no valid min-RTT can then be given. Furthermore, no longer delete flow-information once seeing the FIN-flag on egress in order to keep useful flow-state around for RTT-messages longer. Due to the FIN-handshake process, it is sufficient if the ingress program deletes the flow-state upon seeing FIN. However, still delete flow-state from either ingress or egress upon seeing RST flag, as RST does not have a handshake process allowing for delayed deletion. While minimum RTT could also be tracked from the userspace process, userspace is not aware of when the flow is closed so would have to add additional logic to keep track of minimum RTT for each flow and periodically clean them up. Furthermore, keeping RTT statistics in the flow-state map is useful for implementing future features, such as an RTT-based sampling interval. It would also be useful in case pping is changed to no longer have a long-running userspace process printing out all the calculated RTTs, but instead simply occasionally looks up the RTT from the flow-state map. Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2021-04-29 18:55:06 +02:00
__u64 min_rtt;
__u64 srtt;
__u64 last_timestamp;
__u64 sent_pkts;
__u64 sent_bytes;
__u64 rec_pkts;
__u64 rec_bytes;
__u32 last_id;
__u32 outstanding_timestamps;
enum connection_state conn_state;
enum flow_event_reason opening_reason;
__u8 reserved[6];
pping: Combine flow state in each direction to a dualflow state Combine the flow state entries for both the "forward" and "reverse" direction of the flow into a single dualflow state. Change the flowstate map to use the dualflow state so that state for both directions can be retrieved using a single map lookup. As flow states are now kept in pairs, cannot directly create/delete states from the BPF map each time a flow opens/closes in one direction. Therefore, update all logic related to creating/deleting flows. For example, use "empty" slot in dualflow state instead of creating a new map entry, and only delete the dual flow state entry once both directions of the flow have closed/timed out. Some implementation details: Have implemented a simple memcmp function as I could not get the __builtin_memcmp function to work (got error "libbpf: failed to find BTF for extern 'memcmp': -2"). To ensure that both directions of the flow always look up the same entry, use the "sorted" flow tuple (the (ip, port) pair that is smaller is always first) as key. This is what the memcmp is used for. To avoid storing two copies of the flow tuple (src -> dst and dst -> src) and doing additional memcmps, always store the flow state for the "sorted" direction as the first direction and the reverse as the second direction. Then simply check if a flow is sorted or not to determine which direction in the dual flow state that matches. Have attempted to at least partially abstract this detail away from most of the code by adding some get_flowstate_from* helpers. The dual flow state simply stores the two (single direction) flow states as the struct members dir1 and dir2. Use these two (admittedly poorly named) members instead of a single array of size 2 in order to avoid some issues with the verifier being worried that the array index might be out of bounds. Have added some new boolean members to the flow state to keep track of "connection state". In addition the the previous has_opened, I now also have a member for if the flow is "empty" or if it has been closed. These are needed to cope with having to keep individual flow states for both directions of the flow around as long as one direction of the flow is used. I plan to replace these boolean "connection state" members with a single enum in a future commit. Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2022-03-22 17:59:18 +01:00
};
/*
* Stores flowstate for both direction (src -> dst and dst -> src) of a flow
*
* Uses two named members instead of array of size 2 to avoid hassels with
* convincing verifier that member access is not out of bounds
*/
struct dual_flow_state {
struct flow_state dir1;
struct flow_state dir2;
};
struct packet_id {
struct network_tuple flow;
__u32 identifier; //tsval for TCP packets
};
/*
* Events that can be passed from the BPF-programs to the user space
* application.
* The initial event_type memeber is used to allow multiplexing between
* different event types in a single perf buffer. Memebers event_type and
* timestamp are common among all event types, and flow is common for
* rtt_event, flow_event and map_full_event.
*/
/*
* An RTT event message passed when an RTT has been calculated
* Uses explicit padding instead of packing based on recommendations in cilium's
* BPF reference documentation at https://docs.cilium.io/en/stable/bpf/#llvm.
*/
struct rtt_event {
__u64 event_type;
__u64 timestamp;
struct network_tuple flow;
__u32 padding;
__u64 rtt;
pping: Add timestamp and min-RTT to output To add timestamp to output, push the timestamp when packet was processed from kernel as part of the rtt-event. Also keep track of minimum encountered RTT for each flow in kernel, and also push that as part of the RTT-event. Additionally, avoid pushing RTT messages at all if no flow-state information can be found (due to ex. being deleted from egress side), as no valid min-RTT can then be given. Furthermore, no longer delete flow-information once seeing the FIN-flag on egress in order to keep useful flow-state around for RTT-messages longer. Due to the FIN-handshake process, it is sufficient if the ingress program deletes the flow-state upon seeing FIN. However, still delete flow-state from either ingress or egress upon seeing RST flag, as RST does not have a handshake process allowing for delayed deletion. While minimum RTT could also be tracked from the userspace process, userspace is not aware of when the flow is closed so would have to add additional logic to keep track of minimum RTT for each flow and periodically clean them up. Furthermore, keeping RTT statistics in the flow-state map is useful for implementing future features, such as an RTT-based sampling interval. It would also be useful in case pping is changed to no longer have a long-running userspace process printing out all the calculated RTTs, but instead simply occasionally looks up the RTT from the flow-state map. Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2021-04-29 18:55:06 +02:00
__u64 min_rtt;
__u64 sent_pkts;
__u64 sent_bytes;
__u64 rec_pkts;
__u64 rec_bytes;
bool match_on_egress;
__u8 reserved[7];
};
/*
* A flow event message passed when a flow has changed state (opened/closed)
*/
struct flow_event {
__u64 event_type;
pping: Add timestamp and min-RTT to output To add timestamp to output, push the timestamp when packet was processed from kernel as part of the rtt-event. Also keep track of minimum encountered RTT for each flow in kernel, and also push that as part of the RTT-event. Additionally, avoid pushing RTT messages at all if no flow-state information can be found (due to ex. being deleted from egress side), as no valid min-RTT can then be given. Furthermore, no longer delete flow-information once seeing the FIN-flag on egress in order to keep useful flow-state around for RTT-messages longer. Due to the FIN-handshake process, it is sufficient if the ingress program deletes the flow-state upon seeing FIN. However, still delete flow-state from either ingress or egress upon seeing RST flag, as RST does not have a handshake process allowing for delayed deletion. While minimum RTT could also be tracked from the userspace process, userspace is not aware of when the flow is closed so would have to add additional logic to keep track of minimum RTT for each flow and periodically clean them up. Furthermore, keeping RTT statistics in the flow-state map is useful for implementing future features, such as an RTT-based sampling interval. It would also be useful in case pping is changed to no longer have a long-running userspace process printing out all the calculated RTTs, but instead simply occasionally looks up the RTT from the flow-state map. Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2021-04-29 18:55:06 +02:00
__u64 timestamp;
struct network_tuple flow;
enum flow_event_type flow_event_type;
enum flow_event_reason reason;
enum flow_event_source source;
__u8 reserved;
};
/*
* An event indicating that a new entry could not be created the map due to the
* map being full.
*/
struct map_full_event {
__u64 event_type;
__u64 timestamp;
struct network_tuple flow;
enum pping_map map;
__u8 reserved[3];
};
/*
* Struct for storing various debug-information about the map cleaning process.
* The last_* members contain information from the last clean-cycle, whereas the
* tot_* entires contain cumulative stats from all clean cycles.
*/
struct map_clean_event {
__u64 event_type;
__u64 timestamp;
__u64 tot_runtime;
__u64 tot_processed_entries;
__u64 tot_timeout_del;
__u64 tot_auto_del;
__u64 last_runtime;
__u32 last_processed_entries;
__u32 last_timeout_del;
__u32 last_auto_del;
__u32 clean_cycles;
enum pping_map map;
__u8 reserved[7];
};
union pping_event {
__u64 event_type;
struct rtt_event rtt_event;
struct flow_event flow_event;
struct map_full_event map_event;
struct map_clean_event map_clean_event;
};
#endif