diff --git a/pping/TODO.md b/pping/TODO.md index 546ee34..7b09c47 100644 --- a/pping/TODO.md +++ b/pping/TODO.md @@ -25,7 +25,7 @@ - [ ] Could potentially include keeping track of average RTT, which may be useful for some decisions (ex. how often to sample, when entry can be removed etc) - - [ ] Could potentially include keeping track of minimum RTT (as + - [x] Could potentially include keeping track of minimum RTT (as done by the original pping), ex. to track bufferbloat - [ ] Could potentially include keeping track of if flow is bi-directional @@ -42,7 +42,6 @@ - It may be a good idea to keep the same format as original pping, so that tools such as [ppviz](https://github.com/pollere/ppviz) works for both pping implementations. -- [ ] Add timestamps to output (as original pping) - [ ] Add support for other hooks - Ex TC-BFP on ingress instead of XDP? @@ -59,3 +58,4 @@ - [x] Add IPv6 support - [x] Refactor to support easy addition of other protocols - [x] Load tc-bpf program with libbpf (only attach it with tc) +- [x] Add timestamps to output (as original pping) diff --git a/pping/pping.c b/pping/pping.c index f834676..5830443 100644 --- a/pping/pping.c +++ b/pping/pping.c @@ -40,6 +40,9 @@ static const char *__doc__ = #define MAX_PATH_LEN 1024 +#define MON_TO_REAL_UPDATE_FREQ \ + (1 * NS_PER_SECOND) // Update offset between CLOCK_MONOTONIC and CLOCK_REALTIME once per second + /* * BPF implementation of pping using libbpf * Uses TC-BPF for egress and XDP for ingress @@ -336,10 +339,10 @@ static int tc_bpf_clear(char *interface) * Returns time of CLOCK_MONOTONIC as nanoseconds in a single __u64. * On failure, the value 0 is returned (and errno will be set). */ -static __u64 get_time_ns(void) +static __u64 get_time_ns(clockid_t clockid) { struct timespec t; - if (clock_gettime(CLOCK_MONOTONIC, &t) != 0) + if (clock_gettime(clockid, &t) != 0) return 0; return (__u64)t.tv_sec * NS_PER_SECOND + (__u64)t.tv_nsec; @@ -374,7 +377,7 @@ static int clean_map(int map_fd, size_t key_size, size_t value_size, int removed = 0; void *key, *prev_key, *value; bool delete_prev = false; - __u64 now_nsec = get_time_ns(); + __u64 now_nsec = get_time_ns(CLOCK_MONOTONIC); #ifdef DEBUG int entries = 0; @@ -412,7 +415,7 @@ static int clean_map(int map_fd, size_t key_size, size_t value_size, removed++; } #ifdef DEBUG - duration = get_time_ns() - now_nsec; + duration = get_time_ns(CLOCK_MONOTONIC) - now_nsec; printf("%d: Gone through %d entries and removed %d of them in %llu.%09llu s\n", map_fd, entries, removed, duration / NS_PER_SECOND, duration % NS_PER_SECOND); @@ -441,6 +444,28 @@ static void *periodic_map_cleanup(void *args) pthread_exit(NULL); } +static __u64 convert_monotonic_to_realtime(__u64 monotonic_time) +{ + __u64 now_mon, now_rt; + static __u64 offset = 0; + static __u64 offset_updated = 0; + + now_mon = get_time_ns(CLOCK_MONOTONIC); + if (offset == 0 || + (now_mon > offset_updated && + now_mon - offset_updated > MON_TO_REAL_UPDATE_FREQ)) { + now_mon = get_time_ns(CLOCK_MONOTONIC); + now_rt = get_time_ns(CLOCK_REALTIME); + if (now_rt < now_mon) + return 0; + + offset = now_rt - now_mon; + offset_updated = now_mon; + } + + return monotonic_time + offset; +} + /* * Wrapper around inet_ntop designed to handle the "bug" that mapped IPv4 * addresses are formated as IPv6 addresses for AF_INET6 @@ -461,13 +486,18 @@ static void handle_rtt_event(void *ctx, int cpu, void *data, __u32 data_size) const struct rtt_event *e = data; char saddr[INET6_ADDRSTRLEN]; char daddr[INET6_ADDRSTRLEN]; + char timestr[9]; + __u64 ts = convert_monotonic_to_realtime(e->timestamp); + time_t ts_s = ts / NS_PER_SECOND; format_ip_address(e->flow.ipv, &e->flow.saddr.ip, saddr, sizeof(saddr)); format_ip_address(e->flow.ipv, &e->flow.daddr.ip, daddr, sizeof(daddr)); + strftime(timestr, sizeof(timestr), "%H:%M:%S", localtime(&ts_s)); - printf("%llu.%06llu ms %s:%d+%s:%d\n", e->rtt / NS_PER_MS, - e->rtt % NS_PER_MS, saddr, ntohs(e->flow.saddr.port), daddr, - ntohs(e->flow.daddr.port)); + printf("%s.%09llu %llu.%06llu ms %llu.%06llu ms %s:%d+%s:%d\n", timestr, + ts % NS_PER_SECOND, e->rtt / NS_PER_MS, e->rtt % NS_PER_MS, + e->min_rtt / NS_PER_MS, e->min_rtt % NS_PER_MS, saddr, + ntohs(e->flow.saddr.port), daddr, ntohs(e->flow.daddr.port)); } static void handle_missed_rtt_event(void *ctx, int cpu, __u64 lost_cnt) diff --git a/pping/pping.h b/pping/pping.h index ac7d188..4c09bc4 100644 --- a/pping/pping.h +++ b/pping/pping.h @@ -39,6 +39,7 @@ struct network_tuple { }; struct flow_state { + __u64 min_rtt; __u64 last_timestamp; __u32 last_id; __u32 reserved; @@ -51,6 +52,8 @@ struct packet_id { struct rtt_event { __u64 rtt; + __u64 min_rtt; + __u64 timestamp; struct network_tuple flow; __u32 reserved; }; diff --git a/pping/pping_kern.c b/pping/pping_kern.c index 735cec0..ad9169b 100644 --- a/pping/pping_kern.c +++ b/pping/pping_kern.c @@ -147,11 +147,7 @@ static int parse_tcp_identifier(struct parsing_context *ctx, __be16 *sport, return -1; // Check if connection is closing - if (tcph->fin || tcph->rst) { - *flow_closing = true; - /* bpf_printk("Detected connection closing on %d\n", */ - /* ctx->is_egress); //Upsets verifier? */ - } + *flow_closing = tcph->rst || (!ctx->is_egress && tcph->fin); // Do not timestamp pure ACKs if (ctx->is_egress && ctx->nh.pos - ctx->data >= ctx->pkt_len && @@ -324,6 +320,7 @@ int pping_ingress(struct xdp_md *ctx) struct packet_id p_id = { 0 }; __u64 *p_ts; struct rtt_event event = { 0 }; + struct flow_state *f_state; struct parsing_context pctx = { .data = (void *)(long)ctx->data, .data_end = (void *)(long)ctx->data_end, @@ -332,19 +329,18 @@ int pping_ingress(struct xdp_md *ctx) .is_egress = false, }; bool flow_closing = false; + __u64 now; if (parse_packet_identifier(&pctx, &p_id, &flow_closing) < 0) goto out; - // Delete flow, but allow final attempt at RTT calculation - if (flow_closing) - bpf_map_delete_elem(&flow_state, &p_id.flow); - + now = bpf_ktime_get_ns(); p_ts = bpf_map_lookup_elem(&packet_ts, &p_id); - if (!p_ts) - goto out; + if (!p_ts || now < *p_ts) + goto validflow_out; - event.rtt = bpf_ktime_get_ns() - *p_ts; + event.rtt = now - *p_ts; + event.timestamp = now; /* * Attempt to delete timestamp entry as soon as RTT is calculated. * But could have potential concurrency issue where multiple packets @@ -352,10 +348,25 @@ int pping_ingress(struct xdp_md *ctx) */ bpf_map_delete_elem(&packet_ts, &p_id); + // Update flow's min-RTT, may have concurrency issues + f_state = bpf_map_lookup_elem(&flow_state, &p_id.flow); + if (!f_state) + goto validflow_out; + + if (f_state->min_rtt == 0 || event.rtt < f_state->min_rtt) + f_state->min_rtt = event.rtt; + + event.min_rtt = f_state->min_rtt; + __builtin_memcpy(&event.flow, &p_id.flow, sizeof(struct network_tuple)); bpf_perf_event_output(ctx, &rtt_events, BPF_F_CURRENT_CPU, &event, sizeof(event)); +validflow_out: + // Wait with deleting flow until having pushed final RTT message + if (flow_closing) + bpf_map_delete_elem(&flow_state, &p_id.flow); + out: return XDP_PASS; }