From 72404b6767f0d95fc28485aa24af7730a568e03c Mon Sep 17 00:00:00 2001 From: Simon Sundberg Date: Thu, 17 Feb 2022 14:01:08 +0100 Subject: [PATCH] pping: Add map cleanup debug info Add some debug info to the periodical map cleanup process. Push debug information through the events perf buffer by using newly added map_clean_event. The old user space map cleanup process had some simple debug information that was lost when transitioning to using bpf_iter instead. Therefore, add back similar (but more extensive) debug information but now collected from the BPF-side. In addition to stats on entries deleted by the cleanup process, also include stats on entries deleted by ePPing itself due to matching (for timestamp entries) or detecting FIN/RST (for flow entries) Signed-off-by: Simon Sundberg --- pping/Makefile | 2 +- pping/pping.c | 15 ++++- pping/pping.h | 29 ++++++++- pping/pping_debug_cleanup.h | 124 ++++++++++++++++++++++++++++++++++++ pping/pping_kern.c | 62 ++++++++++++------ 5 files changed, 208 insertions(+), 24 deletions(-) create mode 100644 pping/pping_debug_cleanup.h diff --git a/pping/Makefile b/pping/Makefile index 48a8030..1834410 100644 --- a/pping/Makefile +++ b/pping/Makefile @@ -4,7 +4,7 @@ USER_TARGETS := pping BPF_TARGETS := pping_kern LDLIBS += -pthread -EXTRA_DEPS += pping.h +EXTRA_DEPS += pping.h pping_debug_cleanup.h LIB_DIR = ../lib diff --git a/pping/pping.c b/pping/pping.c index a3bcf70..bcf59e4 100644 --- a/pping/pping.c +++ b/pping/pping.c @@ -37,7 +37,7 @@ enum PPING_OUTPUT_FORMAT { PPING_OUTPUT_PPVIZ }; -/* +/* * BPF implementation of pping using libbpf. * Uses TC-BPF for egress and XDP for ingress. * - On egrees, packets are parsed for an identifer, @@ -801,6 +801,16 @@ static void warn_map_full(const struct map_full_event *e) fprintf(stderr, "\n"); } +static void print_map_clean_info(const struct map_clean_event *e) +{ + fprintf(stderr, + "%s: cycle: %u, entries: %u, time: %llu, timeout: %u, tot timeout: %llu, selfdel: %u, tot selfdel: %llu\n", + e->map == PPING_MAP_PACKETTS ? "packet_ts" : "flow_state", + e->clean_cycles, e->last_processed_entries, e->last_runtime, + e->last_timeout_del, e->tot_timeout_del, e->last_auto_del, + e->tot_auto_del); +} + static void handle_event(void *ctx, int cpu, void *data, __u32 data_size) { const union pping_event *e = data; @@ -812,6 +822,9 @@ static void handle_event(void *ctx, int cpu, void *data, __u32 data_size) case EVENT_TYPE_MAP_FULL: warn_map_full(&e->map_event); break; + case EVENT_TYPE_MAP_CLEAN: + print_map_clean_info(&e->map_clean_event); + break; case EVENT_TYPE_RTT: case EVENT_TYPE_FLOW: print_event_func(e); diff --git a/pping/pping.h b/pping/pping.h index 80e5a06..ad2b6a5 100644 --- a/pping/pping.h +++ b/pping/pping.h @@ -20,6 +20,7 @@ typedef __u64 fixpoint64; #define EVENT_TYPE_FLOW 1 #define EVENT_TYPE_RTT 2 #define EVENT_TYPE_MAP_FULL 3 +#define EVENT_TYPE_MAP_CLEAN 4 enum __attribute__((__packed__)) flow_event_type { FLOW_EVENT_NONE, @@ -103,12 +104,14 @@ struct packet_id { __u32 identifier; //tsval for TCP packets }; + /* * Events that can be passed from the BPF-programs to the user space * application. * The initial event_type memeber is used to allow multiplexing between - * different event types in a single perf buffer. Memebers up to and including - * flow are identical for all event types. + * different event types in a single perf buffer. Memebers event_type and + * timestamp are common among all event types, and flow is common for + * rtt_event, flow_event and map_full_event. */ /* @@ -156,11 +159,33 @@ struct map_full_event { __u8 reserved[3]; }; +/* + * Struct for storing various debug-information about the map cleaning process. + * The last_* members contain information from the last clean-cycle, whereas the + * tot_* entires contain cumulative stats from all clean cycles. + */ +struct map_clean_event { + __u64 event_type; + __u64 timestamp; + __u64 tot_runtime; + __u64 tot_processed_entries; + __u64 tot_timeout_del; + __u64 tot_auto_del; + __u64 last_runtime; + __u32 last_processed_entries; + __u32 last_timeout_del; + __u32 last_auto_del; + __u32 clean_cycles; + enum pping_map map; + __u8 reserved[7]; +}; + union pping_event { __u64 event_type; struct rtt_event rtt_event; struct flow_event flow_event; struct map_full_event map_event; + struct map_clean_event map_clean_event; }; #endif diff --git a/pping/pping_debug_cleanup.h b/pping/pping_debug_cleanup.h new file mode 100644 index 0000000..e2cd177 --- /dev/null +++ b/pping/pping_debug_cleanup.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef __PPING_DEBUG_CLEANUP_H +#define __PPING_DEBUG_CLEANUP_H + +#include +#include +#include "pping.h" + +/* + * Structs and functions that are only used for tracking the cleanup of the + * packet timestamp and flow state maps. + + * Structs and contents of functions are guarded by ifdef DEBUGs to minimze + * overhead, and kept in this file to keep the normal pping-related code + * cleaner. + */ + +#ifdef DEBUG + +/* + * Global entries with cleanup stats for each map (PPING_MAP_PACKETTS and + * PPING_MAP_FLOWSTATE). The last_* members keep track of how many entries + * that are deleted in the current cleaning cycle and are updated continuiously, + * whereas the tot_* entries keeps the cumulative stats but are only updated at + * the end of the current cleaning cycle. + */ + +struct map_clean_stats { + __u64 start_time; + __u64 tot_runtime; + __u64 tot_processed_entries; + __u64 tot_timeout_del; + __u64 tot_auto_del; + __u64 last_runtime; + __u32 last_processed_entries; + __u32 last_timeout_del; + __u32 last_auto_del; + __u32 clean_cycles; +}; + +static volatile struct map_clean_stats clean_stats[2] = { 0 }; + +#endif + + +static __always_inline void debug_increment_autodel(enum pping_map map) +{ +#ifdef DEBUG + clean_stats[map].last_auto_del += 1; +#endif +} + +static __always_inline void debug_increment_timeoutdel(enum pping_map map) +{ +#ifdef DEBUG + clean_stats[map].last_timeout_del += 1; +#endif +} + +#ifdef DEBUG +static __always_inline void +send_map_clean_event(void *ctx, void *perf_buffer, + volatile const struct map_clean_stats *map_stats, + __u64 now, enum pping_map map) +{ + struct map_clean_event mce = { + .event_type = EVENT_TYPE_MAP_CLEAN, + .timestamp = now, + .tot_timeout_del = map_stats->tot_timeout_del, + .tot_auto_del = map_stats->tot_auto_del, + .tot_processed_entries = map_stats->tot_processed_entries, + .tot_runtime = map_stats->tot_runtime, + .last_timeout_del = map_stats->last_timeout_del, + .last_auto_del = map_stats->last_auto_del, + .last_processed_entries = map_stats->last_processed_entries, + .last_runtime = map_stats->last_runtime, + .clean_cycles = map_stats->clean_cycles, + .map = map, + .reserved = { 0 } + }; + + bpf_perf_event_output(ctx, perf_buffer, BPF_F_CURRENT_CPU, &mce, + sizeof(mce)); +} +#endif + +static __always_inline void +debug_update_mapclean_stats(void *ctx, void *perf_buffer, bool final, + __u64 seq_num, __u64 time, enum pping_map map) +{ +#ifdef DEBUG + volatile struct map_clean_stats *map_stats = &clean_stats[map]; + + if (final) { // post final entry + if (map_stats->start_time) { // Non-empty map + map_stats->last_processed_entries = seq_num + 1; + map_stats->last_runtime = time - map_stats->start_time; + } else { + map_stats->last_processed_entries = 0; + map_stats->last_runtime = 0; + } + + //update totals + map_stats->tot_runtime += map_stats->last_runtime; + map_stats->tot_processed_entries += + map_stats->last_processed_entries; + map_stats->tot_timeout_del += map_stats->last_timeout_del; + map_stats->tot_auto_del += map_stats->last_auto_del; + map_stats->clean_cycles += 1; + + send_map_clean_event(ctx, perf_buffer, map_stats, time, map); + + // Reset for next clean cycle + map_stats->start_time = 0; + map_stats->last_timeout_del = 0; + map_stats->last_auto_del = 0; + + } else if (seq_num == 0) { // mark first entry + map_stats->start_time = time; + } +#endif +} + +#endif diff --git a/pping/pping_kern.c b/pping/pping_kern.c index 393f3b2..12dd8b1 100644 --- a/pping/pping_kern.c +++ b/pping/pping_kern.c @@ -20,6 +20,7 @@ #include #include "pping.h" +#include "pping_debug_cleanup.h" #define AF_INET 2 #define AF_INET6 10 @@ -127,6 +128,7 @@ struct { __uint(value_size, sizeof(__u32)); } events SEC(".maps"); + // Help functions /* @@ -587,18 +589,22 @@ static void delete_closed_flows(void *ctx, struct packet_info *p_info, if (flow && (p_info->event_type == FLOW_EVENT_CLOSING || p_info->event_type == FLOW_EVENT_CLOSING_BOTH)) { has_opened = flow->has_opened; - if (!bpf_map_delete_elem(&flow_state, &p_info->pid.flow) && - has_opened) - send_flow_event(ctx, p_info, false); + if (bpf_map_delete_elem(&flow_state, &p_info->pid.flow) == 0) { + debug_increment_autodel(PPING_MAP_FLOWSTATE); + if (has_opened) + send_flow_event(ctx, p_info, false); + } } // Also close reverse flow if (rev_flow && p_info->event_type == FLOW_EVENT_CLOSING_BOTH) { has_opened = rev_flow->has_opened; - if (!bpf_map_delete_elem(&flow_state, - &p_info->reply_pid.flow) && - has_opened) - send_flow_event(ctx, p_info, true); + if (bpf_map_delete_elem(&flow_state, &p_info->reply_pid.flow) == + 0) { + debug_increment_autodel(PPING_MAP_FLOWSTATE); + if (has_opened) + send_flow_event(ctx, p_info, true); + } } } @@ -701,8 +707,10 @@ static void pping_match_packet(struct flow_state *f_state, void *ctx, return; re.rtt = p_info->time - *p_ts; + // Delete timestamp entry as soon as RTT is calculated - bpf_map_delete_elem(&packet_ts, &p_info->reply_pid); + if (bpf_map_delete_elem(&packet_ts, &p_info->reply_pid) == 0) + debug_increment_autodel(PPING_MAP_PACKETTS); if (f_state->min_rtt == 0 || re.rtt < f_state->min_rtt) f_state->min_rtt = re.rtt; @@ -807,6 +815,10 @@ int tsmap_cleanup(struct bpf_iter__bpf_map_elem *ctx) __u64 *timestamp = ctx->value; __u64 now = bpf_ktime_get_ns(); + debug_update_mapclean_stats(ctx, &events, !ctx->key || !ctx->value, + ctx->meta->seq_num, now, + PPING_MAP_PACKETTS); + if (!pid || !timestamp) return 0; @@ -814,7 +826,9 @@ int tsmap_cleanup(struct bpf_iter__bpf_map_elem *ctx) /* Seems like the key for map lookup operations must be on the stack, so copy pid to local_pid. */ __builtin_memcpy(&local_pid, pid, sizeof(local_pid)); - bpf_map_delete_elem(&packet_ts, &local_pid); + + if (bpf_map_delete_elem(&packet_ts, &local_pid) == 0) + debug_increment_timeoutdel(PPING_MAP_PACKETTS); } return 0; @@ -830,6 +844,10 @@ int flowmap_cleanup(struct bpf_iter__bpf_map_elem *ctx) __u64 now = bpf_ktime_get_ns(); bool has_opened; + debug_update_mapclean_stats(ctx, &events, !ctx->key || !ctx->value, + ctx->meta->seq_num, now, + PPING_MAP_FLOWSTATE); + if (!flow || !f_state) return 0; @@ -838,17 +856,21 @@ int flowmap_cleanup(struct bpf_iter__bpf_map_elem *ctx) __builtin_memcpy(&local_flow, flow, sizeof(local_flow)); has_opened = f_state->has_opened; - if (bpf_map_delete_elem(&flow_state, &local_flow) == 0 && - has_opened) { - reverse_flow(&fe.flow, &local_flow); - fe.event_type = EVENT_TYPE_FLOW; - fe.timestamp = now; - fe.flow_event_type = FLOW_EVENT_CLOSING; - fe.reason = EVENT_REASON_FLOW_TIMEOUT; - fe.source = EVENT_SOURCE_GC; - fe.reserved = 0; - bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, - &fe, sizeof(fe)); + if (bpf_map_delete_elem(&flow_state, &local_flow) == 0) { + debug_increment_timeoutdel(PPING_MAP_FLOWSTATE); + + if (has_opened) { + reverse_flow(&fe.flow, &local_flow); + fe.event_type = EVENT_TYPE_FLOW; + fe.timestamp = now; + fe.flow_event_type = FLOW_EVENT_CLOSING; + fe.reason = EVENT_REASON_FLOW_TIMEOUT; + fe.source = EVENT_SOURCE_GC; + fe.reserved = 0; + bpf_perf_event_output(ctx, &events, + BPF_F_CURRENT_CPU, &fe, + sizeof(fe)); + } } }