Files
xdp-project-bpf-examples/pping/pping.c
Simon Sundberg 70f255cbf8 pping: Ignore SYN packets by default
Make ePPing ignore TCP SYN packets by default, so that the
initial handshake phase of the connection is ignored. Add an
option (--include-syn/-s) to explicitly include SYN packets.

The main reason it can be a good idea to avoid SYN-packets is to avoid
being affected by SYN-flood attacks. When ePPing also includes
SYN-packets it becomes quite vulnerable to SYN-flood attacks, which
will quickly fill up its flow_state table, blocking actual useful
flows from being tracked. As ePPing will consider the connection
opened as soon as it sees the SYN-ACK (it will not wait for final
ACK), flow-state created from SYN-flood attacks will also stay around
in the flow-state table for a long time (5 minutes currently) as no
RST/FIN will be sent that can be used to close it.

The drawback from ignoring SYN-packets is that no RTTs will be
collected during the handshake phase, and all connections will be
considered opened due to "first observed packet".

A more refined approach could be to properly track the full TCP
handshake (SYN + SYN-ACK + ACK) instead of the more generic "open once
we see reply in reverse direction" used now. However, this adds a fair
bit of additional protocol-specific logic. Furthermore, to track the
full handshake we will still need to store some flow-state before the
handshake is completed, and thus such a solution would still be
vulnerable to SYN-flood attacks (although the incomplete flow states
could potentially be cleaned up faster).

Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
2022-11-04 13:47:08 +01:00

1150 lines
31 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
static const char *__doc__ =
"Passive Ping - monitor flow RTT based on header inspection";
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <linux/if_link.h>
#include <net/if.h> // For if_nametoindex
#include <arpa/inet.h> // For inet_ntoa and ntohs
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <getopt.h>
#include <stdbool.h>
#include <signal.h> // For detecting Ctrl-C
#include <sys/resource.h> // For setting rlmit
#include <time.h>
#include <pthread.h>
#include <xdp/libxdp.h>
#include "json_writer.h"
#include "pping.h" //common structs for user-space and BPF parts
#define PERF_BUFFER_PAGES 64 // Related to the perf-buffer size?
#define PERF_POLL_TIMEOUT_MS 100
#define MAX_PATH_LEN 1024
#define MON_TO_REAL_UPDATE_FREQ \
(1 * NS_PER_SECOND) // Update offset between CLOCK_MONOTONIC and CLOCK_REALTIME once per second
enum PPING_OUTPUT_FORMAT {
PPING_OUTPUT_STANDARD,
PPING_OUTPUT_JSON,
PPING_OUTPUT_PPVIZ
};
/*
* BPF implementation of pping using libbpf.
* Uses TC-BPF for egress and XDP for ingress.
* - On egrees, packets are parsed for an identifer,
* if found added to hashmap using flow+identifier as key,
* and current time as value.
* - On ingress, packets are parsed for reply identifer,
* if found looksup hashmap using reverse-flow+identifier as key,
* and calculates RTT as different between now and stored timestamp.
* - Calculated RTTs are pushed to userspace
* (together with the related flow) and printed out.
*/
// Structure to contain arguments for periodic_map_cleanup (for passing to pthread_create)
// Also keeps information about the thread in which the cleanup function runs
struct map_cleanup_args {
pthread_t tid;
struct bpf_link *tsclean_link;
struct bpf_link *flowclean_link;
__u64 cleanup_interval;
bool valid_thread;
};
// Store configuration values in struct to easily pass around
struct pping_config {
struct bpf_config bpf_config;
struct bpf_tc_opts tc_ingress_opts;
struct bpf_tc_opts tc_egress_opts;
struct map_cleanup_args clean_args;
char *object_path;
char *ingress_prog;
char *egress_prog;
char *cleanup_ts_prog;
char *cleanup_flow_prog;
char *packet_map;
char *flow_map;
char *event_map;
int ifindex;
struct xdp_program *xdp_prog;
int ingress_prog_id;
int egress_prog_id;
char ifname[IF_NAMESIZE];
enum PPING_OUTPUT_FORMAT output_format;
bool force;
bool created_tc_hook;
};
static volatile sig_atomic_t keep_running = 1;
static json_writer_t *json_ctx = NULL;
static void (*print_event_func)(const union pping_event *) = NULL;
static const struct option long_options[] = {
{ "help", no_argument, NULL, 'h' },
{ "interface", required_argument, NULL, 'i' }, // Name of interface to run on
{ "rate-limit", required_argument, NULL, 'r' }, // Sampling rate-limit in ms
{ "rtt-rate", required_argument, NULL, 'R' }, // Sampling rate in terms of flow-RTT (ex 1 sample per RTT-interval)
{ "rtt-type", required_argument, NULL, 't' }, // What type of RTT the RTT-rate should be applied to ("min" or "smoothed"), only relevant if rtt-rate is provided
{ "force", no_argument, NULL, 'f' }, // Overwrite any existing XDP program on interface, remove qdisc on cleanup
{ "cleanup-interval", required_argument, NULL, 'c' }, // Map cleaning interval in s, 0 to disable
{ "format", required_argument, NULL, 'F' }, // Which format to output in (standard/json/ppviz)
{ "ingress-hook", required_argument, NULL, 'I' }, // Use tc or XDP as ingress hook
{ "tcp", no_argument, NULL, 'T' }, // Calculate and report RTTs for TCP traffic (with TCP timestamps)
{ "icmp", no_argument, NULL, 'C' }, // Calculate and report RTTs for ICMP echo-reply traffic
{ "include-local", no_argument, NULL, 'l' }, // Also report "internal" RTTs
{ "include-SYN", no_argument, NULL, 's' }, // Include SYN-packets in tracking (may fill up flow state with half-open connections)
{ 0, 0, NULL, 0 }
};
/*
* Copied from Jesper Dangaaard Brouer's traffic-pacing-edt example
*/
static void print_usage(char *argv[])
{
int i;
printf("\nDOCUMENTATION:\n%s\n", __doc__);
printf("\n");
printf(" Usage: %s (options-see-below)\n", argv[0]);
printf(" Listing options:\n");
for (i = 0; long_options[i].name != 0; i++) {
printf(" --%-12s", long_options[i].name);
if (long_options[i].flag != NULL)
printf(" flag (internal value:%d)",
*long_options[i].flag);
else
printf(" short-option: -%c", long_options[i].val);
printf("\n");
}
printf("\n");
}
/*
* Simple convenience wrapper around libbpf_strerror for which you don't have
* to provide a buffer. Instead uses its own static buffer and returns a pointer
* to it.
*
* This of course comes with the tradeoff that it is no longer thread safe and
* later invocations overwrite previous results.
*/
static const char *get_libbpf_strerror(int err)
{
static char buf[200];
libbpf_strerror(err, buf, sizeof(buf));
return buf;
}
static int parse_bounded_double(double *res, const char *str, double low,
double high, const char *name)
{
char *endptr;
*res = strtod(str, &endptr);
if (strlen(str) != endptr - str) {
fprintf(stderr, "%s %s is not a valid number\n", name, str);
return -EINVAL;
}
if (*res < low || *res > high) {
fprintf(stderr, "%s must in range [%g, %g]\n", name, low, high);
return -EINVAL;
}
return 0;
}
static int parse_arguments(int argc, char *argv[], struct pping_config *config)
{
int err, opt;
double rate_limit_ms, cleanup_interval_s, rtt_rate;
config->ifindex = 0;
config->bpf_config.localfilt = true;
config->force = false;
config->bpf_config.track_tcp = false;
config->bpf_config.track_icmp = false;
config->bpf_config.skip_syn = true;
while ((opt = getopt_long(argc, argv, "hflTCsi:r:R:t:c:F:I:",
long_options, NULL)) != -1) {
switch (opt) {
case 'i':
if (strlen(optarg) > IF_NAMESIZE) {
fprintf(stderr, "interface name too long\n");
return -EINVAL;
}
strncpy(config->ifname, optarg, IF_NAMESIZE);
config->ifindex = if_nametoindex(config->ifname);
if (config->ifindex == 0) {
err = -errno;
fprintf(stderr,
"Could not get index of interface %s: %s\n",
config->ifname,
get_libbpf_strerror(err));
return err;
}
break;
case 'r':
err = parse_bounded_double(&rate_limit_ms, optarg, 0,
7 * S_PER_DAY * MS_PER_S,
"rate-limit");
if (err)
return -EINVAL;
config->bpf_config.rate_limit =
rate_limit_ms * NS_PER_MS;
break;
case 'R':
err = parse_bounded_double(&rtt_rate, optarg, 0, 10000,
"rtt-rate");
if (err)
return -EINVAL;
config->bpf_config.rtt_rate =
DOUBLE_TO_FIXPOINT(rtt_rate);
break;
case 't':
if (strcmp(optarg, "min") == 0) {
config->bpf_config.use_srtt = false;
} else if (strcmp(optarg, "smoothed") == 0) {
config->bpf_config.use_srtt = true;
} else {
fprintf(stderr,
"rtt-type must be \"min\" or \"smoothed\"\n");
return -EINVAL;
}
break;
case 'c':
err = parse_bounded_double(&cleanup_interval_s, optarg,
0, 7 * S_PER_DAY,
"cleanup-interval");
if (err)
return -EINVAL;
config->clean_args.cleanup_interval =
cleanup_interval_s * NS_PER_SECOND;
break;
case 'F':
if (strcmp(optarg, "standard") == 0) {
config->output_format = PPING_OUTPUT_STANDARD;
} else if (strcmp(optarg, "json") == 0) {
config->output_format = PPING_OUTPUT_JSON;
} else if (strcmp(optarg, "ppviz") == 0) {
config->output_format = PPING_OUTPUT_PPVIZ;
} else {
fprintf(stderr,
"format must be \"standard\", \"json\" or \"ppviz\"\n");
return -EINVAL;
}
break;
case 'I':
if (strcmp(optarg, "xdp") == 0) {
config->ingress_prog = "pping_xdp_ingress";
} else if (strcmp(optarg, "tc") == 0) {
config->ingress_prog = "pping_tc_ingress";
} else {
fprintf(stderr,
"ingress-hook must be \"xdp\" or \"tc\"\n");
return -EINVAL;
}
break;
case 'l':
config->bpf_config.localfilt = false;
break;
case 'f':
config->force = true;
break;
case 'T':
config->bpf_config.track_tcp = true;
break;
case 'C':
config->bpf_config.track_icmp = true;
break;
case 's':
config->bpf_config.skip_syn = false;
break;
case 'h':
printf("HELP:\n");
print_usage(argv);
exit(0);
default:
fprintf(stderr, "Unknown option %s\n", argv[optind]);
return -EINVAL;
}
}
if (config->ifindex == 0) {
fprintf(stderr,
"An interface (-i or --interface) must be provided\n");
return -EINVAL;
}
return 0;
}
const char *tracked_protocols_to_str(struct pping_config *config)
{
bool tcp = config->bpf_config.track_tcp;
bool icmp = config->bpf_config.track_icmp;
return tcp && icmp ? "TCP, ICMP" : tcp ? "TCP" : "ICMP";
}
const char *output_format_to_str(enum PPING_OUTPUT_FORMAT format)
{
switch (format) {
case PPING_OUTPUT_STANDARD:
return "standard";
case PPING_OUTPUT_JSON:
return "json";
case PPING_OUTPUT_PPVIZ:
return "ppviz";
default:
return "unkown format";
}
}
void abort_program(int sig)
{
keep_running = 0;
}
static int set_rlimit(long int lim)
{
struct rlimit rlim = {
.rlim_cur = lim,
.rlim_max = lim,
};
return !setrlimit(RLIMIT_MEMLOCK, &rlim) ? 0 : -errno;
}
static int init_rodata(struct bpf_object *obj, void *src, size_t size)
{
struct bpf_map *map = NULL;
bpf_object__for_each_map(map, obj) {
if (strstr(bpf_map__name(map), ".rodata"))
return bpf_map__set_initial_value(map, src, size);
}
// No .rodata map found
return -EINVAL;
}
/*
* Attempt to attach program in section sec of obj to ifindex.
* If sucessful, will return the positive program id of the attached.
* On failure, will return a negative error code.
*/
static int xdp_attach(struct bpf_object *obj, const char *prog_name,
int ifindex, struct xdp_program **xdp_prog)
{
struct xdp_program *prog;
int err;
DECLARE_LIBXDP_OPTS(xdp_program_opts, opts,
.prog_name = prog_name,
.obj = obj);
prog = xdp_program__create(&opts);
if (!prog)
return -errno;
err = xdp_program__attach(prog, ifindex, XDP_MODE_UNSPEC, 0);
if (err) {
xdp_program__close(prog);
return err;
}
*xdp_prog = prog;
return 0;
}
static int xdp_detach(struct xdp_program *prog, int ifindex)
{
int err;
err = xdp_program__detach(prog, ifindex, XDP_MODE_UNSPEC, 0);
xdp_program__close(prog);
return err;
}
/*
* Will attempt to attach program at section sec in obj to ifindex at
* attach_point.
* On success, will fill in the passed opts, optionally set new_hook depending
* if it created a new hook or not, and return the id of the attached program.
* On failure it will return a negative error code.
*/
static int tc_attach(struct bpf_object *obj, int ifindex,
enum bpf_tc_attach_point attach_point,
const char *prog_name, struct bpf_tc_opts *opts,
bool *new_hook)
{
int err;
int prog_fd;
bool created_hook = true;
DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
.attach_point = attach_point);
err = bpf_tc_hook_create(&hook);
if (err == -EEXIST)
created_hook = false;
else if (err)
return err;
prog_fd = bpf_program__fd(
bpf_object__find_program_by_name(obj, prog_name));
if (prog_fd < 0) {
err = prog_fd;
goto err_after_hook;
}
opts->prog_fd = prog_fd;
opts->prog_id = 0;
err = bpf_tc_attach(&hook, opts);
if (err)
goto err_after_hook;
if (new_hook)
*new_hook = created_hook;
return opts->prog_id;
err_after_hook:
/*
* Destroy hook if it created it.
* This is slightly racy, as some other program may still have been
* attached to the hook between its creation and this error cleanup.
*/
if (created_hook) {
hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
bpf_tc_hook_destroy(&hook);
}
return err;
}
static int tc_detach(int ifindex, enum bpf_tc_attach_point attach_point,
const struct bpf_tc_opts *opts, bool destroy_hook)
{
int err;
int hook_err = 0;
DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
.attach_point = attach_point);
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_info, .handle = opts->handle,
.priority = opts->priority);
// Check we are removing the correct program
err = bpf_tc_query(&hook, &opts_info);
if (err)
return err;
if (opts->prog_id != opts_info.prog_id)
return -ENOENT;
// Attempt to detach program
opts_info.prog_fd = 0;
opts_info.prog_id = 0;
opts_info.flags = 0;
err = bpf_tc_detach(&hook, &opts_info);
/*
* Attempt to destroy hook regardsless if detach succeded.
* If the hook is destroyed sucessfully, program should
* also be detached.
*/
if (destroy_hook) {
hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
hook_err = bpf_tc_hook_destroy(&hook);
}
err = destroy_hook ? hook_err : err;
return err;
}
/*
* Attach program prog_name (of typer iter/bpf_map_elem) from obj to map_name
*/
static int iter_map_attach(struct bpf_object *obj, const char *prog_name,
const char *map_name, struct bpf_link **link)
{
struct bpf_program *prog;
struct bpf_link *linkptr;
union bpf_iter_link_info linfo = { 0 };
int map_fd, err;
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts,
.link_info = &linfo,
.link_info_len = sizeof(linfo));
map_fd = bpf_object__find_map_fd_by_name(obj, map_name);
if (map_fd < 0)
return map_fd;
linfo.map.map_fd = map_fd;
prog = bpf_object__find_program_by_name(obj, prog_name);
err = libbpf_get_error(prog);
if (err)
return err;
linkptr = bpf_program__attach_iter(prog, &iter_opts);
err = libbpf_get_error(linkptr);
if (err)
return err;
*link = linkptr;
return 0;
}
/*
* Execute the iter/bpf_map_elem program attached through link on map elements
*/
static int iter_map_execute(struct bpf_link *link)
{
int iter_fd, err;
char buf[64];
if (!link)
return -EINVAL;
iter_fd = bpf_iter_create(bpf_link__fd(link));
if (iter_fd < 0)
return iter_fd;
while ((err = read(iter_fd, &buf, sizeof(buf))) > 0)
;
close(iter_fd);
return err;
}
/*
* Returns time as nanoseconds in a single __u64.
* On failure, the value 0 is returned (and errno will be set).
*/
static __u64 get_time_ns(clockid_t clockid)
{
struct timespec t;
if (clock_gettime(clockid, &t) != 0)
return 0;
return (__u64)t.tv_sec * NS_PER_SECOND + (__u64)t.tv_nsec;
}
static void *periodic_map_cleanup(void *args)
{
struct map_cleanup_args *argp = args;
struct timespec interval;
interval.tv_sec = argp->cleanup_interval / NS_PER_SECOND;
interval.tv_nsec = argp->cleanup_interval % NS_PER_SECOND;
char buf[256];
int err;
while (keep_running) {
err = iter_map_execute(argp->tsclean_link);
if (err) {
// Running in separate thread so can't use get_libbpf_strerror
libbpf_strerror(err, buf, sizeof(buf));
fprintf(stderr,
"Error while cleaning timestamp map: %s\n",
buf);
}
err = iter_map_execute(argp->flowclean_link);
if (err) {
libbpf_strerror(err, buf, sizeof(buf));
fprintf(stderr, "Error while cleaning flow map: %s\n",
buf);
}
nanosleep(&interval, NULL);
}
pthread_exit(NULL);
}
static __u64 convert_monotonic_to_realtime(__u64 monotonic_time)
{
static __u64 offset = 0;
static __u64 offset_updated = 0;
__u64 now_mon = get_time_ns(CLOCK_MONOTONIC);
__u64 now_rt;
if (offset == 0 ||
(now_mon > offset_updated &&
now_mon - offset_updated > MON_TO_REAL_UPDATE_FREQ)) {
now_mon = get_time_ns(CLOCK_MONOTONIC);
now_rt = get_time_ns(CLOCK_REALTIME);
if (now_rt < now_mon)
return 0;
offset = now_rt - now_mon;
offset_updated = now_mon;
}
return monotonic_time + offset;
}
/*
* Wrapper around inet_ntop designed to handle the "bug" that mapped IPv4
* addresses are formated as IPv6 addresses for AF_INET6
*/
static int format_ip_address(char *buf, size_t size, int af,
const struct in6_addr *addr)
{
if (af == AF_INET)
return inet_ntop(af, &addr->s6_addr[12], buf, size) ? -errno :
0;
else if (af == AF_INET6)
return inet_ntop(af, addr, buf, size) ? -errno : 0;
return -EINVAL;
}
static const char *proto_to_str(__u16 proto)
{
static char buf[8];
switch (proto) {
case IPPROTO_TCP:
return "TCP";
case IPPROTO_ICMP:
return "ICMP";
case IPPROTO_ICMPV6:
return "ICMPv6";
default:
snprintf(buf, sizeof(buf), "%d", proto);
return buf;
}
}
static const char *flowevent_to_str(enum flow_event_type fe)
{
switch (fe) {
case FLOW_EVENT_NONE:
return "none";
case FLOW_EVENT_OPENING:
return "opening";
case FLOW_EVENT_CLOSING:
case FLOW_EVENT_CLOSING_BOTH:
return "closing";
default:
return "unknown";
}
}
static const char *eventreason_to_str(enum flow_event_reason er)
{
switch (er) {
case EVENT_REASON_NONE:
return "none";
case EVENT_REASON_SYN:
return "SYN";
case EVENT_REASON_SYN_ACK:
return "SYN-ACK";
case EVENT_REASON_FIRST_OBS_PCKT:
return "first observed packet";
case EVENT_REASON_FIN:
return "FIN";
case EVENT_REASON_RST:
return "RST";
case EVENT_REASON_FLOW_TIMEOUT:
return "flow timeout";
default:
return "unknown";
}
}
static const char *eventsource_to_str(enum flow_event_source es)
{
switch (es) {
case EVENT_SOURCE_PKT_SRC:
return "src";
case EVENT_SOURCE_PKT_DEST:
return "dest";
case EVENT_SOURCE_GC:
return "garbage collection";
default:
return "unknown";
}
}
static void print_flow_ppvizformat(FILE *stream,
const struct network_tuple *flow)
{
char saddr[INET6_ADDRSTRLEN];
char daddr[INET6_ADDRSTRLEN];
format_ip_address(saddr, sizeof(saddr), flow->ipv, &flow->saddr.ip);
format_ip_address(daddr, sizeof(daddr), flow->ipv, &flow->daddr.ip);
fprintf(stream, "%s:%d+%s:%d", saddr, ntohs(flow->saddr.port), daddr,
ntohs(flow->daddr.port));
}
static void print_ns_datetime(FILE *stream, __u64 monotonic_ns)
{
char timestr[9];
__u64 ts = convert_monotonic_to_realtime(monotonic_ns);
time_t ts_s = ts / NS_PER_SECOND;
strftime(timestr, sizeof(timestr), "%H:%M:%S", localtime(&ts_s));
fprintf(stream, "%s.%09llu", timestr, ts % NS_PER_SECOND);
}
static void print_event_standard(const union pping_event *e)
{
if (e->event_type == EVENT_TYPE_RTT) {
print_ns_datetime(stdout, e->rtt_event.timestamp);
printf(" %llu.%06llu ms %llu.%06llu ms %s ",
e->rtt_event.rtt / NS_PER_MS,
e->rtt_event.rtt % NS_PER_MS,
e->rtt_event.min_rtt / NS_PER_MS,
e->rtt_event.min_rtt % NS_PER_MS,
proto_to_str(e->rtt_event.flow.proto));
print_flow_ppvizformat(stdout, &e->rtt_event.flow);
printf("\n");
} else if (e->event_type == EVENT_TYPE_FLOW) {
print_ns_datetime(stdout, e->flow_event.timestamp);
printf(" %s ", proto_to_str(e->rtt_event.flow.proto));
print_flow_ppvizformat(stdout, &e->flow_event.flow);
printf(" %s due to %s from %s\n",
flowevent_to_str(e->flow_event.flow_event_type),
eventreason_to_str(e->flow_event.reason),
eventsource_to_str(e->flow_event.source));
}
}
static void print_event_ppviz(const union pping_event *e)
{
// ppviz format does not support flow events
if (e->event_type != EVENT_TYPE_RTT)
return;
const struct rtt_event *re = &e->rtt_event;
__u64 time = convert_monotonic_to_realtime(re->timestamp);
printf("%llu.%09llu %llu.%09llu %llu.%09llu ", time / NS_PER_SECOND,
time % NS_PER_SECOND, re->rtt / NS_PER_SECOND,
re->rtt % NS_PER_SECOND, re->min_rtt / NS_PER_SECOND,
re->min_rtt);
print_flow_ppvizformat(stdout, &re->flow);
printf("\n");
}
static void print_common_fields_json(json_writer_t *ctx,
const union pping_event *e)
{
const struct network_tuple *flow = &e->rtt_event.flow;
char saddr[INET6_ADDRSTRLEN];
char daddr[INET6_ADDRSTRLEN];
format_ip_address(saddr, sizeof(saddr), flow->ipv, &flow->saddr.ip);
format_ip_address(daddr, sizeof(daddr), flow->ipv, &flow->daddr.ip);
jsonw_u64_field(ctx, "timestamp",
convert_monotonic_to_realtime(e->rtt_event.timestamp));
jsonw_string_field(ctx, "src_ip", saddr);
jsonw_hu_field(ctx, "src_port", ntohs(flow->saddr.port));
jsonw_string_field(ctx, "dest_ip", daddr);
jsonw_hu_field(ctx, "dest_port", ntohs(flow->daddr.port));
jsonw_string_field(ctx, "protocol", proto_to_str(flow->proto));
}
static void print_rttevent_fields_json(json_writer_t *ctx,
const struct rtt_event *re)
{
jsonw_u64_field(ctx, "rtt", re->rtt);
jsonw_u64_field(ctx, "min_rtt", re->min_rtt);
jsonw_u64_field(ctx, "sent_packets", re->sent_pkts);
jsonw_u64_field(ctx, "sent_bytes", re->sent_bytes);
jsonw_u64_field(ctx, "rec_packets", re->rec_pkts);
jsonw_u64_field(ctx, "rec_bytes", re->rec_bytes);
jsonw_bool_field(ctx, "match_on_egress", re->match_on_egress);
}
static void print_flowevent_fields_json(json_writer_t *ctx,
const struct flow_event *fe)
{
jsonw_string_field(ctx, "flow_event",
flowevent_to_str(fe->flow_event_type));
jsonw_string_field(ctx, "reason", eventreason_to_str(fe->reason));
jsonw_string_field(ctx, "triggered_by", eventsource_to_str(fe->source));
}
static void print_event_json(const union pping_event *e)
{
if (e->event_type != EVENT_TYPE_RTT && e->event_type != EVENT_TYPE_FLOW)
return;
if (!json_ctx) {
json_ctx = jsonw_new(stdout);
jsonw_start_array(json_ctx);
}
jsonw_start_object(json_ctx);
print_common_fields_json(json_ctx, e);
if (e->event_type == EVENT_TYPE_RTT)
print_rttevent_fields_json(json_ctx, &e->rtt_event);
else // flow-event
print_flowevent_fields_json(json_ctx, &e->flow_event);
jsonw_end_object(json_ctx);
}
static void warn_map_full(const struct map_full_event *e)
{
print_ns_datetime(stderr, e->timestamp);
fprintf(stderr, " Warning: Unable to create %s entry for flow ",
e->map == PPING_MAP_FLOWSTATE ? "flow" : "timestamp");
print_flow_ppvizformat(stderr, &e->flow);
fprintf(stderr, "\n");
}
static void print_map_clean_info(const struct map_clean_event *e)
{
fprintf(stderr,
"%s: cycle: %u, entries: %u, time: %llu, timeout: %u, tot timeout: %llu, selfdel: %u, tot selfdel: %llu\n",
e->map == PPING_MAP_PACKETTS ? "packet_ts" : "flow_state",
e->clean_cycles, e->last_processed_entries, e->last_runtime,
e->last_timeout_del, e->tot_timeout_del, e->last_auto_del,
e->tot_auto_del);
}
static void handle_event(void *ctx, int cpu, void *data, __u32 data_size)
{
const union pping_event *e = data;
if (data_size < sizeof(e->event_type))
return;
switch (e->event_type) {
case EVENT_TYPE_MAP_FULL:
warn_map_full(&e->map_event);
break;
case EVENT_TYPE_MAP_CLEAN:
print_map_clean_info(&e->map_clean_event);
break;
case EVENT_TYPE_RTT:
case EVENT_TYPE_FLOW:
print_event_func(e);
break;
default:
fprintf(stderr, "Warning: Unknown event type %llu\n",
e->event_type);
};
}
static void handle_missed_events(void *ctx, int cpu, __u64 lost_cnt)
{
fprintf(stderr, "Lost %llu events on CPU %d\n", lost_cnt, cpu);
}
/*
* Sets only the necessary programs in the object file to autoload.
*
* Assumes all programs are set to autoload by default, so in practice
* deactivates autoloading for the program that does not need to be loaded.
*/
static int set_programs_to_load(struct bpf_object *obj,
struct pping_config *config)
{
struct bpf_program *prog;
char *unload_prog =
strcmp(config->ingress_prog, "pping_xdp_ingress") != 0 ?
"pping_xdp_ingress" :
"pping_tc_ingress";
prog = bpf_object__find_program_by_name(obj, unload_prog);
if (libbpf_get_error(prog))
return libbpf_get_error(prog);
return bpf_program__set_autoload(prog, false);
}
static int load_attach_bpfprogs(struct bpf_object **obj,
struct pping_config *config)
{
int err, detach_err;
// Open and load ELF file
*obj = bpf_object__open(config->object_path);
err = libbpf_get_error(*obj);
if (err) {
fprintf(stderr, "Failed opening object file %s: %s\n",
config->object_path, get_libbpf_strerror(err));
return err;
}
err = init_rodata(*obj, &config->bpf_config,
sizeof(config->bpf_config));
if (err) {
fprintf(stderr, "Failed pushing user-configration to %s: %s\n",
config->object_path, get_libbpf_strerror(err));
return err;
}
set_programs_to_load(*obj, config);
// Attach ingress prog
if (strcmp(config->ingress_prog, "pping_xdp_ingress") == 0) {
/* xdp_attach() loads 'obj' through libxdp */
err = xdp_attach(*obj, config->ingress_prog, config->ifindex,
&config->xdp_prog);
} else {
err = bpf_object__load(*obj);
if (err) {
fprintf(stderr, "Failed loading bpf programs in %s: %s\n",
config->object_path, get_libbpf_strerror(err));
return err;
}
err = tc_attach(*obj, config->ifindex, BPF_TC_INGRESS,
config->ingress_prog,
&config->tc_ingress_opts, NULL);
config->ingress_prog_id = err;
}
if (err < 0) {
fprintf(stderr,
"Failed attaching ingress BPF program on interface %s: %s\n",
config->ifname, get_libbpf_strerror(err));
return err;
}
// Attach egress prog
config->egress_prog_id =
tc_attach(*obj, config->ifindex, BPF_TC_EGRESS,
config->egress_prog, &config->tc_egress_opts,
&config->created_tc_hook);
if (config->egress_prog_id < 0) {
fprintf(stderr,
"Failed attaching egress BPF program on interface %s: %s\n",
config->ifname,
get_libbpf_strerror(config->egress_prog_id));
err = config->egress_prog_id;
goto egress_err;
}
return 0;
egress_err:
if (config->xdp_prog) {
detach_err = xdp_detach(config->xdp_prog, config->ifindex);
config->xdp_prog = NULL;
} else {
detach_err =
tc_detach(config->ifindex, BPF_TC_INGRESS,
&config->tc_ingress_opts, config->created_tc_hook);
}
if (detach_err)
fprintf(stderr, "Failed detaching ingress program from %s: %s\n",
config->ifname, get_libbpf_strerror(detach_err));
return err;
}
static int setup_periodical_map_cleaning(struct bpf_object *obj,
struct pping_config *config)
{
int err;
if (config->clean_args.valid_thread) {
fprintf(stderr,
"There already exists a thread for the map cleanup\n");
return -EINVAL;
}
if (!config->clean_args.cleanup_interval) {
fprintf(stderr, "Periodic map cleanup disabled\n");
return 0;
}
err = iter_map_attach(obj, config->cleanup_ts_prog, config->packet_map,
&config->clean_args.tsclean_link);
if (err) {
fprintf(stderr,
"Failed attaching cleanup program to timestamp map: %s\n",
get_libbpf_strerror(err));
return err;
}
err = iter_map_attach(obj, config->cleanup_flow_prog, config->flow_map,
&config->clean_args.flowclean_link);
if (err) {
fprintf(stderr,
"Failed attaching cleanup program to flow map: %s\n",
get_libbpf_strerror(err));
goto destroy_ts_link;
}
err = pthread_create(&config->clean_args.tid, NULL,
periodic_map_cleanup, &config->clean_args);
if (err) {
fprintf(stderr,
"Failed starting thread to perform periodic map cleanup: %s\n",
get_libbpf_strerror(err));
goto destroy_links;
}
config->clean_args.valid_thread = true;
return 0;
destroy_links:
bpf_link__destroy(config->clean_args.flowclean_link);
destroy_ts_link:
bpf_link__destroy(config->clean_args.tsclean_link);
return err;
}
int main(int argc, char *argv[])
{
int err = 0, detach_err;
struct bpf_object *obj = NULL;
struct perf_buffer *pb = NULL;
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_ingress_opts);
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_egress_opts);
struct pping_config config = {
.bpf_config = { .rate_limit = 100 * NS_PER_MS,
.rtt_rate = 0,
.use_srtt = false },
.clean_args = { .cleanup_interval = 1 * NS_PER_SECOND,
.valid_thread = false },
.object_path = "pping_kern.o",
.ingress_prog = "pping_xdp_ingress",
.egress_prog = "pping_tc_egress",
.cleanup_ts_prog = "tsmap_cleanup",
.cleanup_flow_prog = "flowmap_cleanup",
.packet_map = "packet_ts",
.flow_map = "flow_state",
.event_map = "events",
.tc_ingress_opts = tc_ingress_opts,
.tc_egress_opts = tc_egress_opts,
.output_format = PPING_OUTPUT_STANDARD,
};
// Detect if running as root
if (geteuid() != 0) {
printf("This program must be run as root.\n");
return EXIT_FAILURE;
}
// Increase rlimit
err = set_rlimit(RLIM_INFINITY);
if (err) {
fprintf(stderr, "Could not set rlimit to infinity: %s\n",
get_libbpf_strerror(err));
return EXIT_FAILURE;
}
err = parse_arguments(argc, argv, &config);
if (err) {
fprintf(stderr, "Failed parsing arguments: %s\n",
get_libbpf_strerror(err));
print_usage(argv);
return EXIT_FAILURE;
}
if (!config.bpf_config.track_tcp && !config.bpf_config.track_icmp)
config.bpf_config.track_tcp = true;
if (config.bpf_config.track_icmp &&
config.output_format == PPING_OUTPUT_PPVIZ)
fprintf(stderr,
"Warning: ppviz format mainly intended for TCP traffic, but may now include ICMP traffic as well\n");
switch (config.output_format) {
case PPING_OUTPUT_STANDARD:
print_event_func = print_event_standard;
break;
case PPING_OUTPUT_JSON:
print_event_func = print_event_json;
break;
case PPING_OUTPUT_PPVIZ:
print_event_func = print_event_ppviz;
break;
}
fprintf(stderr, "Starting ePPing in %s mode tracking %s on %s\n",
output_format_to_str(config.output_format),
tracked_protocols_to_str(&config), config.ifname);
// Allow program to perform cleanup on Ctrl-C
signal(SIGINT, abort_program);
signal(SIGTERM, abort_program);
err = load_attach_bpfprogs(&obj, &config);
if (err) {
fprintf(stderr,
"Failed loading and attaching BPF programs in %s\n",
config.object_path);
return EXIT_FAILURE;
}
// Set up perf buffer
pb = perf_buffer__new(bpf_object__find_map_fd_by_name(obj,
config.event_map),
PERF_BUFFER_PAGES, handle_event,
handle_missed_events, NULL, NULL);
err = libbpf_get_error(pb);
if (err) {
fprintf(stderr, "Failed to open perf buffer %s: %s\n",
config.event_map, get_libbpf_strerror(err));
goto cleanup_attached_progs;
}
err = setup_periodical_map_cleaning(obj, &config);
if (err) {
fprintf(stderr, "Failed setting up map cleaning: %s\n",
get_libbpf_strerror(err));
goto cleanup_perf_buffer;
}
// Main loop
while (keep_running) {
if ((err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS)) < 0) {
if (keep_running) // Only print polling error if it wasn't caused by program termination
fprintf(stderr,
"Error polling perf buffer: %s\n",
get_libbpf_strerror(-err));
break;
}
}
// Cleanup
if (config.output_format == PPING_OUTPUT_JSON && json_ctx) {
jsonw_end_array(json_ctx);
jsonw_destroy(&json_ctx);
}
if (config.clean_args.valid_thread) {
pthread_cancel(config.clean_args.tid);
pthread_join(config.clean_args.tid, NULL);
bpf_link__destroy(config.clean_args.tsclean_link);
bpf_link__destroy(config.clean_args.flowclean_link);
}
cleanup_perf_buffer:
perf_buffer__free(pb);
cleanup_attached_progs:
if (config.xdp_prog)
detach_err = xdp_detach(config.xdp_prog, config.ifindex);
else
detach_err = tc_detach(config.ifindex, BPF_TC_INGRESS,
&config.tc_ingress_opts, false);
if (detach_err)
fprintf(stderr,
"Failed removing ingress program from interface %s: %s\n",
config.ifname, get_libbpf_strerror(detach_err));
detach_err =
tc_detach(config.ifindex, BPF_TC_EGRESS, &config.tc_egress_opts,
config.force && config.created_tc_hook);
if (detach_err)
fprintf(stderr,
"Failed removing egress program from interface %s: %s\n",
config.ifname, get_libbpf_strerror(detach_err));
return (err != 0 && keep_running) || detach_err != 0;
}