diff --git a/AF_XDP-interaction/af_xdp_user.c b/AF_XDP-interaction/af_xdp_user.c index 20d6ce0..9762e52 100644 --- a/AF_XDP-interaction/af_xdp_user.c +++ b/AF_XDP-interaction/af_xdp_user.c @@ -26,9 +26,21 @@ #include #include #include +#include #include #include #include +#include + +#include + +#ifndef SO_PREFER_BUSY_POLL +#define SO_PREFER_BUSY_POLL 69 +#endif +#ifndef SO_BUSY_POLL_BUDGET +#define SO_BUSY_POLL_BUDGET 70 +#endif + #include /* provided by libbpf */ @@ -39,10 +51,13 @@ #include "lib_xsk_extend.h" #include "ethtool_utils.h" +#include "lib_checksum.h" #define NUM_FRAMES 4096 /* Frames per queue */ #define FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE /* 4096 */ +#define FRAME_SIZE_MASK (FRAME_SIZE - 1) #define RX_BATCH_SIZE 64 +#define FQ_REFILL_MAX (RX_BATCH_SIZE * 2) #define INVALID_UMEM_FRAME UINT64_MAX struct mem_frame_allocator { @@ -87,6 +102,16 @@ struct xsk_container { int num; /* Number of xsk_sockets configured */ }; +static void __exit_with_error(int error, const char *file, const char *func, + int line) +{ + fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func, + line, error, strerror(error)); + exit(EXIT_FAILURE); +} + +#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) + /** * BTF setup XDP-hints * ------------------- @@ -166,6 +191,22 @@ int init_btf_info_via_bpf_object(struct bpf_object *bpf_obj) return 0; } +void pr_addr_info(const char *msg, uint64_t pkt_addr, struct xsk_umem_info *umem) +{ + uint64_t pkt_nr = pkt_addr / FRAME_SIZE; /* Integer div round off */ + uint32_t offset = pkt_addr - (pkt_nr * FRAME_SIZE); /* what got rounded off */ + uint8_t *pkt_ptr = NULL; + + if (!debug) + return; + + if (umem) + pkt_ptr = xsk_umem__get_data(umem->buffer, pkt_addr); + + printf(" - Addr-info: %s pkt_nr:%lu offset:%u (addr:0x%lX) ptr:%p\n", + msg, pkt_nr, offset, pkt_addr, pkt_ptr); +} + #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ static uint64_t gettime(void) { @@ -174,7 +215,7 @@ static uint64_t gettime(void) res = clock_gettime(CLOCK_MONOTONIC, &t); if (res < 0) { - fprintf(stderr, "Error with gettimeofday! (%i)\n", res); + fprintf(stderr, "Error with clock_gettime! (%i)\n", res); exit(EXIT_FAIL); } return (uint64_t) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; @@ -247,6 +288,15 @@ static const struct option_wrapper long_options[] = { {{"progsec", required_argument, NULL, 2 }, "Load program in
of the ELF file", "
"}, + {{"busy-poll", no_argument, NULL, 'B' }, + "Enable socket prefer NAPI busy-poll mode (remember adjust sysctl too)"}, + + {{"tx-dmac", required_argument, NULL, 'G' }, + "Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format", "aa:bb:cc:dd:ee:ff"}, + + {{"tx-smac", required_argument, NULL, 'H' }, + "Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format", "aa:bb:cc:dd:ee:ff"}, + {{0, 0, NULL, 0 }, NULL, false} }; @@ -271,6 +321,14 @@ static void mem_free_umem_frame(struct mem_frame_allocator *mem, uint64_t frame) { assert(mem->umem_frame_free < mem->umem_frame_max); + /* Remove any packet offset from the frame addr. The kernel RX process + * will add some headroom. Our userspace TX process can also choose to + * add headroom. Thus, frame addr can be returned to our mem allocator + * including this offset. + */ + // frame = (frame / FRAME_SIZE) * FRAME_SIZE; + frame = frame & ~FRAME_SIZE_MASK; + mem->umem_frame_addr[mem->umem_frame_free++] = frame; } @@ -295,12 +353,38 @@ static void mem_init_umem_frame_allocator(struct mem_frame_allocator *mem, mem->umem_frame_max = nr_frames; /* The umem_frame_addr is basically index into umem->buffer memory area */ - for (i = 0; i < nr_frames; i++) - mem->umem_frame_addr[i] = i * FRAME_SIZE; + for (i = 0; i < nr_frames; i++) { + uint64_t addr = i * FRAME_SIZE; + mem->umem_frame_addr[i] = addr; + } mem->umem_frame_free = nr_frames; } +static void apply_setsockopt(struct xsk_socket_info *xsk, bool opt_busy_poll, + int opt_batch_size) +{ + int sock_opt; + + if (!opt_busy_poll) + return; + + sock_opt = 1; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); + + sock_opt = 20; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); + + sock_opt = opt_batch_size; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); +} + static struct xsk_umem_info *configure_xsk_umem(void *buffer, uint64_t size, uint32_t frame_size, uint32_t nr_frames) { @@ -420,6 +504,9 @@ static struct xsk_socket_info *xsk_configure_socket(struct config *cfg, /* Due to XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD manually update map */ // xsk_socket__update_xskmap(xsk_info->xsk, xsks_map_fd); + + apply_setsockopt(xsk_info, cfg->opt_busy_poll, RX_BATCH_SIZE); + return xsk_info; error_exit: @@ -444,10 +531,13 @@ static void complete_tx(struct xsk_socket_info *xsk) &idx_cq); if (completed > 0) { - for (int i = 0; i < completed; i++) - mem_free_umem_frame(&xsk->umem->mem, - *xsk_ring_cons__comp_addr(&xsk->cq, - idx_cq++)); + for (int i = 0; i < completed; i++) { + uint64_t addr; + + addr = *xsk_ring_cons__comp_addr(&xsk->cq, idx_cq++); + mem_free_umem_frame(&xsk->umem->mem, addr); + //pr_addr_info(__func__, addr, xsk->umem); + } xsk_ring_cons__release(&xsk->cq, completed); xsk->outstanding_tx -= completed < xsk->outstanding_tx ? @@ -473,6 +563,116 @@ static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) *sum = ~csum16_add(csum16_sub(~(*sum), old), new); } +/** + * Packet fill helpers + */ +static uint8_t base_pkt_data[FRAME_SIZE]; + +/* Can be changed via cmdline options (-G|--tx-dmac) or (-H|--tx-smac) */ +static struct ether_addr default_tx_smac = {{ 0x24, 0x5e, 0xbe, 0x57, 0xf1, 0x64 }}; +static struct ether_addr default_tx_dmac = {{ 0xbc, 0xee, 0x7b, 0xda, 0xc2, 0x62 }}; + +#define MIN_PKT_SIZE 64 +static uint16_t opt_pkt_size = MIN_PKT_SIZE; + +#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ + sizeof(struct udphdr)) + +#define ETH_FCS_SIZE 4 +#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE) +#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) +#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) +#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) + +static void gen_eth_hdr(struct config *cfg, struct ethhdr *eth_hdr) +{ + /* Ethernet header: + * Can be changed via cmdline options (-G|--tx-dmac) or (-H|--tx-smac) + */ + memcpy(eth_hdr->h_dest , &cfg->opt_tx_dmac, ETH_ALEN); + memcpy(eth_hdr->h_source, &cfg->opt_tx_smac, ETH_ALEN); + eth_hdr->h_proto = htons(ETH_P_IP); +} + +static bool get_ipv4_u32(char *ip_str, uint32_t *ip_addr) +{ + int res; + + res = inet_pton(AF_INET, ip_str, ip_addr); + if (res <= 0) { + if (res == 0) + fprintf(stderr, "ERROR: IP%s \"%s\" not in presentation format\n", + "v4", ip_str); + else + perror("inet_pton"); + return false; + } + return true; +} + +static char *opt_ip_str_src = "192.168.44.2"; +static char *opt_ip_str_dst = "192.168.44.3"; + +static void gen_ip_hdr(struct iphdr *ip_hdr) +{ + uint32_t saddr; + uint32_t daddr; + + get_ipv4_u32(opt_ip_str_src, &saddr); + get_ipv4_u32(opt_ip_str_dst, &daddr); + + /* IP header */ + ip_hdr->version = IPVERSION; + ip_hdr->ihl = 0x5; /* 20 byte header */ + ip_hdr->tos = 0x0; + ip_hdr->tot_len = htons(IP_PKT_SIZE); + ip_hdr->id = 0; + ip_hdr->frag_off = 0; + ip_hdr->ttl = IPDEFTTL; + ip_hdr->protocol = IPPROTO_UDP; + ip_hdr->saddr = saddr; + ip_hdr->daddr = daddr; + + /* IP header checksum */ + ip_hdr->check = 0; + ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl); +} + +static uint32_t opt_pkt_fill_pattern = 0x41424344; + +static void gen_udp_hdr(struct udphdr *udp_hdr, struct iphdr *ip_hdr) +{ + /* UDP header */ + udp_hdr->source = htons(0x1000); + udp_hdr->dest = htons(0x1000); + udp_hdr->len = htons(UDP_PKT_SIZE); + + /* UDP data */ + memset32_htonl((void*)udp_hdr + sizeof(struct udphdr), + opt_pkt_fill_pattern, + UDP_PKT_DATA_SIZE); + + /* UDP header checksum */ + udp_hdr->check = 0; + udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, + IPPROTO_UDP, (__u16 *)udp_hdr); +} + +static void gen_base_pkt(struct config *cfg, uint8_t *pkt_ptr) +{ + struct ethhdr *eth_hdr = (struct ethhdr *)pkt_ptr; + struct iphdr *ip_hdr = (struct iphdr *)(pkt_ptr + + sizeof(struct ethhdr)); + struct udphdr *udp_hdr = (struct udphdr *)(pkt_ptr + + sizeof(struct ethhdr) + + sizeof(struct iphdr)); + + gen_eth_hdr(cfg, eth_hdr); + gen_ip_hdr(ip_hdr); + gen_udp_hdr(udp_hdr, ip_hdr); +} + + /** * BTF accessing XDP-hints * ----------------------- @@ -629,6 +829,111 @@ static void print_pkt_info(uint8_t *pkt, uint32_t len) } } +static void tx_pkt(struct config *cfg, struct xsk_socket_info *xsk) +{ + struct xsk_umem_info *umem = xsk->umem; + uint64_t pkt_addr = mem_alloc_umem_frame(&umem->mem); + uint8_t *pkt = NULL; + uint32_t offset = 0; // 256; + + pkt_addr += offset; + pr_addr_info(__func__, pkt_addr, umem); + + pkt = xsk_umem__get_data(umem->buffer, pkt_addr); + gen_base_pkt(cfg, pkt); + + { + uint32_t tx_idx = 0; + int ret; + + ret = xsk_ring_prod__reserve(&xsk->tx, 1, &tx_idx); + if (ret != 1) { + /* No more transmit slots, drop the packet */ + mem_free_umem_frame(&umem->mem, pkt_addr); + } + + xsk_ring_prod__tx_desc(&xsk->tx, tx_idx)->addr = pkt_addr; + xsk_ring_prod__tx_desc(&xsk->tx, tx_idx)->len = 64; + xsk_ring_prod__submit(&xsk->tx, 1); + xsk->outstanding_tx++; + } + //complete_tx(xsk); +} + +/* Generate some fake packets (in umem area). Real system will deliver TX + * packets containing the needed control information. + */ +static int invent_tx_pkts(struct config *cfg, struct xsk_umem_info *umem, + const unsigned int n, struct xdp_desc pkts[n]) +{ + uint32_t len = opt_pkt_size; + uint32_t offset = 256; + int i; + + for (i = 0; i < n; i++) { + uint64_t pkt_addr = mem_alloc_umem_frame(&umem->mem); + struct xdp_desc desc; + uint8_t *pkt_data; + + if (pkt_addr == INVALID_UMEM_FRAME) + return i; + + pkt_addr += offset; + desc.addr = pkt_addr; + desc.len = len; + desc.options = 0; + + /* Write into packet memory area */ + pkt_data = xsk_umem__get_data(umem->buffer, pkt_addr); + gen_base_pkt(cfg, pkt_data); + + pkts[i] = desc; + } + return i; +} + +static int tx_batch_pkts(struct xsk_socket_info *xsk, + const unsigned int nr, struct xdp_desc pkts[nr]) +{ + struct xsk_umem_info *umem = xsk->umem; + uint32_t tx_res; + uint32_t tx_idx = 0; + int i; + + tx_res = xsk_ring_prod__reserve(&xsk->tx, nr, &tx_idx); + if (tx_res != nr) { + /* No more transmit slots, drop all packets. Normally AF_XDP + * code would try to run TX-completion CQ step to free up slots, + * but we don't want to introduce variability due to RT + * requirements. Other code make sure CQ is processed. + */ + for (i = 0; i < nr; i++) { + mem_free_umem_frame(&umem->mem, pkts[i].addr); + } + return 0; + } + + for (i = 0; i < nr ; i++) { + struct xdp_desc *tx_desc; + + tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, tx_idx + i); + *tx_desc = pkts[i]; + //xsk_ring_prod__tx_desc(&xsk->tx, tx_idx)->addr = pkt_addr; + //xsk_ring_prod__tx_desc(&xsk->tx, tx_idx)->len = 64; + xsk->outstanding_tx++; + } + xsk_ring_prod__submit(&xsk->tx, nr); + + // Kick Tx + // sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + complete_tx(xsk); + + // See if kicking Rx-side works + // recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); + + return nr; +} + static bool process_packet(struct xsk_socket_info *xsk, uint64_t addr, uint32_t len) { @@ -709,17 +1014,17 @@ void restock_receive_fill_queue(struct xsk_socket_info *xsk) uint32_t idx_fq = 0; int ret; + /* Limit refill size as it takes time */ int free_frames = mem_avail_umem_frames(&xsk->umem->mem); + int refill = (free_frames > FQ_REFILL_MAX) ? FQ_REFILL_MAX : free_frames; + __u64 start = gettime(); - /* Stuff the ring with as much frames as possible */ - stock_frames = xsk_prod_nb_free(&xsk->fq, - mem_avail_umem_frames(&xsk->umem->mem)); + stock_frames = xsk_prod_nb_free(&xsk->fq, refill); if (stock_frames > 0) { - ret = xsk_ring_prod__reserve(&xsk->fq, stock_frames, - &idx_fq); + ret = xsk_ring_prod__reserve(&xsk->fq, stock_frames, &idx_fq); /* This should not happen, but just in case */ if (ret != stock_frames) { @@ -735,7 +1040,7 @@ void restock_receive_fill_queue(struct xsk_socket_info *xsk) xsk_ring_prod__submit(&xsk->fq, stock_frames); } __u64 now = gettime(); - if (debug && (stock_frames || free_frames)) + if (debug && stock_frames > 1) printf("XXX stock_frame:%d free_frames:%d cost of xsk_prod_nb_free() %llu ns\n", stock_frames, free_frames, now - start); } @@ -745,6 +1050,9 @@ static void handle_receive_packets(struct xsk_socket_info *xsk) unsigned int rcvd, i; uint32_t idx_rx = 0; + // FIXME: Needed when in NAPI busy_poll mode? + recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); + rcvd = xsk_ring_cons__peek(&xsk->rx, RX_BATCH_SIZE, &idx_rx); if (!rcvd) return; @@ -754,6 +1062,8 @@ static void handle_receive_packets(struct xsk_socket_info *xsk) uint64_t addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; uint32_t len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; + pr_addr_info(__func__, addr, xsk->umem); + if (!process_packet(xsk, addr, len)) mem_free_umem_frame(&xsk->umem->mem, addr); @@ -806,6 +1116,142 @@ static void rx_and_process(struct config *cfg, } } +static void rx_avail_packets(struct xsk_container *xsks) +{ + for (int i = 0; i < xsks->num; i++) { + struct xsk_socket_info *xsk_info = xsks->sockets[i]; + + handle_receive_packets(xsk_info); + } +} + +/* Default interval in usec */ +#define DEFAULT_INTERVAL 1000000 + +#define USEC_PER_SEC 1000000 +#define NSEC_PER_SEC 1000000000 + +static inline void tsnorm(struct timespec *ts) +{ + while (ts->tv_nsec >= NSEC_PER_SEC) { + ts->tv_nsec -= NSEC_PER_SEC; + ts->tv_sec++; + } +} + +static inline int64_t calcdiff(struct timespec t1, struct timespec t2) +{ + int64_t diff; + diff = USEC_PER_SEC * (long long)((int) t1.tv_sec - (int) t2.tv_sec); + diff += ((int) t1.tv_nsec - (int) t2.tv_nsec) / 1000; + return diff; +} + +struct wakeup_stat { + long min; + long max; + long act; + double avg; + unsigned long events; +}; + +/* Use-case: Accurate cyclic Tx and lazy RX-processing + * + * This processing loop is simulating a Time-Triggered schedule, where + * transmitting packets within a small time-window is the most + * important task. Picking up frames in RX-queue is less time + * critical, as the PCF synchronization packets will have been + * timestamped (rx_ktime) by XDP before they got enqueued. + */ +static void tx_cyclic_and_rx_process(struct config *cfg, + struct xsk_container *xsks) +{ + struct timespec now, next, interval; + struct wakeup_stat stat = { .min = DEFAULT_INTERVAL}; + int batch_nr = 4; + struct xdp_desc tx_pkts[batch_nr]; + int tx_nr; + + int period = DEFAULT_INTERVAL; // TODO: Add to cfg + int timermode = TIMER_ABSTIME; + int clock = CLOCK_MONOTONIC; + + // Choosing xsk id 0 + struct xsk_socket_info *xsk = xsks->sockets[0]; + + /* Get packets for *first* iteration */ + tx_nr = invent_tx_pkts(cfg, xsk->umem, batch_nr, tx_pkts); + + interval.tv_sec = period / USEC_PER_SEC; + interval.tv_nsec = (period % USEC_PER_SEC) * 1000; + + clock_gettime(clock, &now); + + next = now; + next.tv_sec += interval.tv_sec; + next.tv_nsec += interval.tv_nsec; + tsnorm(&next); + + while (!global_exit) { + int64_t diff; + int err, n; + + /* Wait for next period */ + err = clock_nanosleep(clock, timermode, &next, NULL); + /* Took case MODE_CLOCK_NANOSLEEP from cyclictest */ + if (err) { + if (err != EINTR) + fprintf(stderr, "clock_nanosleep failed." + " err:%d errno:%d\n", err, errno); + goto out; + } + + /* Expecting to wakeup at "next" get systime "now" to check */ + err = clock_gettime(clock, &now); + if (err) { + if (err != EINTR) + fprintf(stderr, "clock_getttime() failed." + " err:%d errno:%d\n", err, errno); + goto out; + } + + /* Detect inaccuracy diff */ + diff = calcdiff(now, next); + if (diff < stat.min) + stat.min = diff; + if (diff > stat.max) + stat.max = diff; + stat.avg += (double) diff; + stat.act = diff; + + stat.events++; + + /* Send batch of packets */ + n = tx_batch_pkts(xsk, tx_nr, tx_pkts); + + if (verbose >=1 ) + printf("TX pkts:%d event:%lu" + " inaccurate(usec) wakeup min:%ld cur:%ld max:%ld\n", + n, stat.events, stat.min, stat.act, stat.max); + + /* Calculate next time to wakeup */ + next.tv_sec += interval.tv_sec; + next.tv_nsec += interval.tv_nsec; + tsnorm(&next); + + /* Get packets for *next* iteration */ + tx_nr = invent_tx_pkts(cfg, xsk->umem, batch_nr, tx_pkts); + + /* Empty RX queues */ + rx_avail_packets(xsks); + } +out: + /* Free umem frames */ + for (int i = 0; i < tx_nr; i++) { + mem_free_umem_frame(&xsk->umem->mem, tx_pkts[i].addr); + } +} + static double calc_period(struct stats_record *r, struct stats_record *p) { double period_ = 0; @@ -926,6 +1372,8 @@ int main(int argc, char **argv) .progsec = "xdp_sock", .xsk_wakeup_mode = true, /* Default, change via --spin */ .xsk_if_queue = -1, + .opt_tx_dmac = default_tx_dmac, + .opt_tx_smac = default_tx_smac, }; pthread_t stats_poll_thread; struct xsk_umem_info *umem; @@ -950,7 +1398,8 @@ int main(int argc, char **argv) * cost of copying over packet data to our preallocated AF_XDP umem * area. */ - cfg.xsk_bind_flags = XDP_COPY; + //cfg.xsk_bind_flags = XDP_COPY; + cfg.xsk_bind_flags = XDP_COPY | XDP_USE_NEED_WAKEUP; struct bpf_object *bpf_obj = NULL; struct bpf_map *map; @@ -1042,6 +1491,9 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } + /* Generate packets to TX */ + gen_base_pkt(&cfg, (uint8_t*)&base_pkt_data); + /* Open and configure the AF_XDP (xsk) socket(s) */ for (i = 0; i < xsks.num; i++) { struct xsk_socket_info *xski; @@ -1088,8 +1540,19 @@ int main(int argc, char **argv) cfg.sched_prio, cfg.sched_policy); } + /* Issue: At this point AF_XDP socket might not be ready e.g. for TX. + * It seems related with XDP attachment causing link down/up event for + * some drivers. Q: What is the right method/API that waits for link to + * be initilized correctly? + */ + //sleep(3); + // tx_pkt(&cfg, xsks.sockets[0]); + /* Receive and count packets than drop them */ - rx_and_process(&cfg, &xsks); + // rx_and_process(&cfg, &xsks); + + /* Send packets cyclic */ + tx_cyclic_and_rx_process(&cfg, &xsks); /* Cleanup */ for (i = 0; i < xsks.num; i++) diff --git a/AF_XDP-interaction/common_defines.h b/AF_XDP-interaction/common_defines.h index 089b10e..7638632 100644 --- a/AF_XDP-interaction/common_defines.h +++ b/AF_XDP-interaction/common_defines.h @@ -4,6 +4,7 @@ #include #include #include +#include /* struct ether_addr */ struct config { __u32 xdp_flags; @@ -26,6 +27,9 @@ struct config { /* Real-Time scheduler setting */ int sched_prio; int sched_policy; + bool opt_busy_poll; + struct ether_addr opt_tx_smac; + struct ether_addr opt_tx_dmac; }; /* Defined in common_params.o */ diff --git a/AF_XDP-interaction/common_params.c b/AF_XDP-interaction/common_params.c index e9c28c7..b93eef5 100644 --- a/AF_XDP-interaction/common_params.c +++ b/AF_XDP-interaction/common_params.c @@ -29,7 +29,7 @@ void _print_options(const struct option_wrapper *long_options, bool required) if (long_options[i].required != required) continue; - if (long_options[i].option.val > 64) /* ord('A') = 65 */ + if (long_options[i].option.val > 64) /* ord('A') = 65 = 0x41 */ printf(" -%c,", long_options[i].option.val); else printf(" "); @@ -96,7 +96,7 @@ void parse_cmdline_args(int argc, char **argv, } /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hd:r:L:R:ASNFUMQ:czqp:", + while ((opt = getopt_long(argc, argv, "hd:r:L:R:BASNFUMQ:G:H:czqp:", long_options, &longindex)) != -1) { switch (opt) { case 'd': @@ -129,6 +129,25 @@ void parse_cmdline_args(int argc, char **argv, goto error; } break; + case 'G': + if (!ether_aton_r(optarg, + (struct ether_addr *)&cfg->opt_tx_dmac)) { + fprintf(stderr, "Invalid dest MAC address:%s\n", + optarg); + goto error; + } + break; + case 'H': + if (!ether_aton_r(optarg, + (struct ether_addr *)&cfg->opt_tx_smac)) { + fprintf(stderr, "Invalid src MAC address:%s\n", + optarg); + goto error; + } + break; + case 'B': + cfg->opt_busy_poll = true; + break; case 'A': cfg->xdp_flags &= ~XDP_FLAGS_MODES; /* Clear flags */ break; diff --git a/AF_XDP-interaction/lib_checksum.h b/AF_XDP-interaction/lib_checksum.h new file mode 100644 index 0000000..379a746 --- /dev/null +++ b/AF_XDP-interaction/lib_checksum.h @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 +// Code taken from kernel samples/bpf/xdpsock_user.c + +#ifndef __LIB_CHECKSUM_H +#define __LIB_CHECKSUM_H + +static void *memset32_htonl(void *dest, __u32 val, __u32 size) +{ + __u32 *ptr = (__u32 *)dest; + int i; + + val = htonl(val); + + for (i = 0; i < (size & (~0x3)); i += 4) + ptr[i >> 2] = val; + + for (; i < size; i++) + ((char *)dest)[i] = ((char *)&val)[i & 3]; + + return dest; +} + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +static inline unsigned short from32to16(unsigned int x) +{ + /* add up 16-bit and 16-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +static unsigned int do_csum(const unsigned char *buff, int len) +{ + unsigned int result = 0; + int odd; + + if (len <= 0) + goto out; + odd = 1 & (unsigned long)buff; + if (odd) { +#ifdef __LITTLE_ENDIAN + result += (*buff << 8); +#else + result = *buff; +#endif + len--; + buff++; + } + if (len >= 2) { + if (2 & (unsigned long)buff) { + result += *(unsigned short *)buff; + len -= 2; + buff += 2; + } + if (len >= 4) { + const unsigned char *end = buff + + ((unsigned int)len & ~3); + unsigned int carry = 0; + + do { + unsigned int w = *(unsigned int *)buff; + + buff += 4; + result += carry; + result += w; + carry = (w > result); + } while (buff < end); + result += carry; + result = (result & 0xffff) + (result >> 16); + } + if (len & 2) { + result += *(unsigned short *)buff; + buff += 2; + } + } + if (len & 1) +#ifdef __LITTLE_ENDIAN + result += *buff; +#else + result += (*buff << 8); +#endif + result = from32to16(result); + if (odd) + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); +out: + return result; +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + return (__sum16)~do_csum(iph, ihl * 4); +} + +/* + * Fold a partial checksum + * This function code has been taken from + * Linux kernel include/asm-generic/checksum.h + */ +static inline __sum16 csum_fold(__wsum csum) +{ + __u32 sum = (__u32)csum; + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__sum16)~sum; +} + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +static inline __u32 from64to32(__u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (__u32)x; +} + +__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, __wsum sum); + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, __wsum sum) +{ + unsigned long long s = (__u32)sum; + + s += (__u32)saddr; + s += (__u32)daddr; +#ifdef __BIG_ENDIAN__ + s += proto + len; +#else + s += (proto + len) << 8; +#endif + return (__wsum)from64to32(s); +} + +/* + * This function has been taken from + * Linux kernel include/asm-generic/checksum.h + */ +static inline __sum16 +csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +static inline __u16 udp_csum(__u32 saddr, __u32 daddr, __u32 len, + __u8 proto, __u16 *udp_pkt) +{ + __u32 csum = 0; + __u32 cnt = 0; + + /* udp hdr and data */ + for (; cnt < len; cnt += 2) + csum += udp_pkt[cnt >> 1]; + + return csum_tcpudp_magic(saddr, daddr, len, proto, csum); +} + +#endif /* __LIB_CHECKSUM_H */