mirror of
https://github.com/xdp-project/bpf-examples.git
synced 2024-05-06 15:54:53 +00:00
Merge pull request #35 from xdp-project/vestas03_AF_XDP_example
AF_XDP-interaction: Implement TX-cyclic sending
This commit is contained in:
@ -26,9 +26,21 @@
|
||||
#include <net/if.h>
|
||||
#include <linux/if_link.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <netinet/ether.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/icmpv6.h>
|
||||
#include <linux/udp.h>
|
||||
|
||||
#include <linux/socket.h>
|
||||
|
||||
#ifndef SO_PREFER_BUSY_POLL
|
||||
#define SO_PREFER_BUSY_POLL 69
|
||||
#endif
|
||||
#ifndef SO_BUSY_POLL_BUDGET
|
||||
#define SO_BUSY_POLL_BUDGET 70
|
||||
#endif
|
||||
|
||||
|
||||
#include <bpf/btf.h> /* provided by libbpf */
|
||||
|
||||
@ -39,10 +51,13 @@
|
||||
|
||||
#include "lib_xsk_extend.h"
|
||||
#include "ethtool_utils.h"
|
||||
#include "lib_checksum.h"
|
||||
|
||||
#define NUM_FRAMES 4096 /* Frames per queue */
|
||||
#define FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE /* 4096 */
|
||||
#define FRAME_SIZE_MASK (FRAME_SIZE - 1)
|
||||
#define RX_BATCH_SIZE 64
|
||||
#define FQ_REFILL_MAX (RX_BATCH_SIZE * 2)
|
||||
#define INVALID_UMEM_FRAME UINT64_MAX
|
||||
|
||||
struct mem_frame_allocator {
|
||||
@ -87,6 +102,16 @@ struct xsk_container {
|
||||
int num; /* Number of xsk_sockets configured */
|
||||
};
|
||||
|
||||
static void __exit_with_error(int error, const char *file, const char *func,
|
||||
int line)
|
||||
{
|
||||
fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
|
||||
line, error, strerror(error));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
|
||||
|
||||
/**
|
||||
* BTF setup XDP-hints
|
||||
* -------------------
|
||||
@ -166,6 +191,22 @@ int init_btf_info_via_bpf_object(struct bpf_object *bpf_obj)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pr_addr_info(const char *msg, uint64_t pkt_addr, struct xsk_umem_info *umem)
|
||||
{
|
||||
uint64_t pkt_nr = pkt_addr / FRAME_SIZE; /* Integer div round off */
|
||||
uint32_t offset = pkt_addr - (pkt_nr * FRAME_SIZE); /* what got rounded off */
|
||||
uint8_t *pkt_ptr = NULL;
|
||||
|
||||
if (!debug)
|
||||
return;
|
||||
|
||||
if (umem)
|
||||
pkt_ptr = xsk_umem__get_data(umem->buffer, pkt_addr);
|
||||
|
||||
printf(" - Addr-info: %s pkt_nr:%lu offset:%u (addr:0x%lX) ptr:%p\n",
|
||||
msg, pkt_nr, offset, pkt_addr, pkt_ptr);
|
||||
}
|
||||
|
||||
#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
|
||||
static uint64_t gettime(void)
|
||||
{
|
||||
@ -174,7 +215,7 @@ static uint64_t gettime(void)
|
||||
|
||||
res = clock_gettime(CLOCK_MONOTONIC, &t);
|
||||
if (res < 0) {
|
||||
fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
|
||||
fprintf(stderr, "Error with clock_gettime! (%i)\n", res);
|
||||
exit(EXIT_FAIL);
|
||||
}
|
||||
return (uint64_t) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
|
||||
@ -247,6 +288,15 @@ static const struct option_wrapper long_options[] = {
|
||||
{{"progsec", required_argument, NULL, 2 },
|
||||
"Load program in <section> of the ELF file", "<section>"},
|
||||
|
||||
{{"busy-poll", no_argument, NULL, 'B' },
|
||||
"Enable socket prefer NAPI busy-poll mode (remember adjust sysctl too)"},
|
||||
|
||||
{{"tx-dmac", required_argument, NULL, 'G' },
|
||||
"Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format", "aa:bb:cc:dd:ee:ff"},
|
||||
|
||||
{{"tx-smac", required_argument, NULL, 'H' },
|
||||
"Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format", "aa:bb:cc:dd:ee:ff"},
|
||||
|
||||
{{0, 0, NULL, 0 }, NULL, false}
|
||||
};
|
||||
|
||||
@ -271,6 +321,14 @@ static void mem_free_umem_frame(struct mem_frame_allocator *mem, uint64_t frame)
|
||||
{
|
||||
assert(mem->umem_frame_free < mem->umem_frame_max);
|
||||
|
||||
/* Remove any packet offset from the frame addr. The kernel RX process
|
||||
* will add some headroom. Our userspace TX process can also choose to
|
||||
* add headroom. Thus, frame addr can be returned to our mem allocator
|
||||
* including this offset.
|
||||
*/
|
||||
// frame = (frame / FRAME_SIZE) * FRAME_SIZE;
|
||||
frame = frame & ~FRAME_SIZE_MASK;
|
||||
|
||||
mem->umem_frame_addr[mem->umem_frame_free++] = frame;
|
||||
}
|
||||
|
||||
@ -295,12 +353,38 @@ static void mem_init_umem_frame_allocator(struct mem_frame_allocator *mem,
|
||||
mem->umem_frame_max = nr_frames;
|
||||
|
||||
/* The umem_frame_addr is basically index into umem->buffer memory area */
|
||||
for (i = 0; i < nr_frames; i++)
|
||||
mem->umem_frame_addr[i] = i * FRAME_SIZE;
|
||||
for (i = 0; i < nr_frames; i++) {
|
||||
uint64_t addr = i * FRAME_SIZE;
|
||||
mem->umem_frame_addr[i] = addr;
|
||||
}
|
||||
|
||||
mem->umem_frame_free = nr_frames;
|
||||
}
|
||||
|
||||
static void apply_setsockopt(struct xsk_socket_info *xsk, bool opt_busy_poll,
|
||||
int opt_batch_size)
|
||||
{
|
||||
int sock_opt;
|
||||
|
||||
if (!opt_busy_poll)
|
||||
return;
|
||||
|
||||
sock_opt = 1;
|
||||
if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
|
||||
(void *)&sock_opt, sizeof(sock_opt)) < 0)
|
||||
exit_with_error(errno);
|
||||
|
||||
sock_opt = 20;
|
||||
if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
|
||||
(void *)&sock_opt, sizeof(sock_opt)) < 0)
|
||||
exit_with_error(errno);
|
||||
|
||||
sock_opt = opt_batch_size;
|
||||
if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
|
||||
(void *)&sock_opt, sizeof(sock_opt)) < 0)
|
||||
exit_with_error(errno);
|
||||
}
|
||||
|
||||
static struct xsk_umem_info *configure_xsk_umem(void *buffer, uint64_t size,
|
||||
uint32_t frame_size, uint32_t nr_frames)
|
||||
{
|
||||
@ -420,6 +504,9 @@ static struct xsk_socket_info *xsk_configure_socket(struct config *cfg,
|
||||
|
||||
/* Due to XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD manually update map */
|
||||
// xsk_socket__update_xskmap(xsk_info->xsk, xsks_map_fd);
|
||||
|
||||
apply_setsockopt(xsk_info, cfg->opt_busy_poll, RX_BATCH_SIZE);
|
||||
|
||||
return xsk_info;
|
||||
|
||||
error_exit:
|
||||
@ -444,10 +531,13 @@ static void complete_tx(struct xsk_socket_info *xsk)
|
||||
&idx_cq);
|
||||
|
||||
if (completed > 0) {
|
||||
for (int i = 0; i < completed; i++)
|
||||
mem_free_umem_frame(&xsk->umem->mem,
|
||||
*xsk_ring_cons__comp_addr(&xsk->cq,
|
||||
idx_cq++));
|
||||
for (int i = 0; i < completed; i++) {
|
||||
uint64_t addr;
|
||||
|
||||
addr = *xsk_ring_cons__comp_addr(&xsk->cq, idx_cq++);
|
||||
mem_free_umem_frame(&xsk->umem->mem, addr);
|
||||
//pr_addr_info(__func__, addr, xsk->umem);
|
||||
}
|
||||
|
||||
xsk_ring_cons__release(&xsk->cq, completed);
|
||||
xsk->outstanding_tx -= completed < xsk->outstanding_tx ?
|
||||
@ -473,6 +563,116 @@ static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
|
||||
*sum = ~csum16_add(csum16_sub(~(*sum), old), new);
|
||||
}
|
||||
|
||||
/**
|
||||
* Packet fill helpers
|
||||
*/
|
||||
static uint8_t base_pkt_data[FRAME_SIZE];
|
||||
|
||||
/* Can be changed via cmdline options (-G|--tx-dmac) or (-H|--tx-smac) */
|
||||
static struct ether_addr default_tx_smac = {{ 0x24, 0x5e, 0xbe, 0x57, 0xf1, 0x64 }};
|
||||
static struct ether_addr default_tx_dmac = {{ 0xbc, 0xee, 0x7b, 0xda, 0xc2, 0x62 }};
|
||||
|
||||
#define MIN_PKT_SIZE 64
|
||||
static uint16_t opt_pkt_size = MIN_PKT_SIZE;
|
||||
|
||||
#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
|
||||
sizeof(struct udphdr))
|
||||
|
||||
#define ETH_FCS_SIZE 4
|
||||
#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE)
|
||||
#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
|
||||
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
|
||||
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
|
||||
|
||||
static void gen_eth_hdr(struct config *cfg, struct ethhdr *eth_hdr)
|
||||
{
|
||||
/* Ethernet header:
|
||||
* Can be changed via cmdline options (-G|--tx-dmac) or (-H|--tx-smac)
|
||||
*/
|
||||
memcpy(eth_hdr->h_dest , &cfg->opt_tx_dmac, ETH_ALEN);
|
||||
memcpy(eth_hdr->h_source, &cfg->opt_tx_smac, ETH_ALEN);
|
||||
eth_hdr->h_proto = htons(ETH_P_IP);
|
||||
}
|
||||
|
||||
static bool get_ipv4_u32(char *ip_str, uint32_t *ip_addr)
|
||||
{
|
||||
int res;
|
||||
|
||||
res = inet_pton(AF_INET, ip_str, ip_addr);
|
||||
if (res <= 0) {
|
||||
if (res == 0)
|
||||
fprintf(stderr, "ERROR: IP%s \"%s\" not in presentation format\n",
|
||||
"v4", ip_str);
|
||||
else
|
||||
perror("inet_pton");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static char *opt_ip_str_src = "192.168.44.2";
|
||||
static char *opt_ip_str_dst = "192.168.44.3";
|
||||
|
||||
static void gen_ip_hdr(struct iphdr *ip_hdr)
|
||||
{
|
||||
uint32_t saddr;
|
||||
uint32_t daddr;
|
||||
|
||||
get_ipv4_u32(opt_ip_str_src, &saddr);
|
||||
get_ipv4_u32(opt_ip_str_dst, &daddr);
|
||||
|
||||
/* IP header */
|
||||
ip_hdr->version = IPVERSION;
|
||||
ip_hdr->ihl = 0x5; /* 20 byte header */
|
||||
ip_hdr->tos = 0x0;
|
||||
ip_hdr->tot_len = htons(IP_PKT_SIZE);
|
||||
ip_hdr->id = 0;
|
||||
ip_hdr->frag_off = 0;
|
||||
ip_hdr->ttl = IPDEFTTL;
|
||||
ip_hdr->protocol = IPPROTO_UDP;
|
||||
ip_hdr->saddr = saddr;
|
||||
ip_hdr->daddr = daddr;
|
||||
|
||||
/* IP header checksum */
|
||||
ip_hdr->check = 0;
|
||||
ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl);
|
||||
}
|
||||
|
||||
static uint32_t opt_pkt_fill_pattern = 0x41424344;
|
||||
|
||||
static void gen_udp_hdr(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
|
||||
{
|
||||
/* UDP header */
|
||||
udp_hdr->source = htons(0x1000);
|
||||
udp_hdr->dest = htons(0x1000);
|
||||
udp_hdr->len = htons(UDP_PKT_SIZE);
|
||||
|
||||
/* UDP data */
|
||||
memset32_htonl((void*)udp_hdr + sizeof(struct udphdr),
|
||||
opt_pkt_fill_pattern,
|
||||
UDP_PKT_DATA_SIZE);
|
||||
|
||||
/* UDP header checksum */
|
||||
udp_hdr->check = 0;
|
||||
udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE,
|
||||
IPPROTO_UDP, (__u16 *)udp_hdr);
|
||||
}
|
||||
|
||||
static void gen_base_pkt(struct config *cfg, uint8_t *pkt_ptr)
|
||||
{
|
||||
struct ethhdr *eth_hdr = (struct ethhdr *)pkt_ptr;
|
||||
struct iphdr *ip_hdr = (struct iphdr *)(pkt_ptr +
|
||||
sizeof(struct ethhdr));
|
||||
struct udphdr *udp_hdr = (struct udphdr *)(pkt_ptr +
|
||||
sizeof(struct ethhdr) +
|
||||
sizeof(struct iphdr));
|
||||
|
||||
gen_eth_hdr(cfg, eth_hdr);
|
||||
gen_ip_hdr(ip_hdr);
|
||||
gen_udp_hdr(udp_hdr, ip_hdr);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* BTF accessing XDP-hints
|
||||
* -----------------------
|
||||
@ -629,6 +829,111 @@ static void print_pkt_info(uint8_t *pkt, uint32_t len)
|
||||
}
|
||||
}
|
||||
|
||||
static void tx_pkt(struct config *cfg, struct xsk_socket_info *xsk)
|
||||
{
|
||||
struct xsk_umem_info *umem = xsk->umem;
|
||||
uint64_t pkt_addr = mem_alloc_umem_frame(&umem->mem);
|
||||
uint8_t *pkt = NULL;
|
||||
uint32_t offset = 0; // 256;
|
||||
|
||||
pkt_addr += offset;
|
||||
pr_addr_info(__func__, pkt_addr, umem);
|
||||
|
||||
pkt = xsk_umem__get_data(umem->buffer, pkt_addr);
|
||||
gen_base_pkt(cfg, pkt);
|
||||
|
||||
{
|
||||
uint32_t tx_idx = 0;
|
||||
int ret;
|
||||
|
||||
ret = xsk_ring_prod__reserve(&xsk->tx, 1, &tx_idx);
|
||||
if (ret != 1) {
|
||||
/* No more transmit slots, drop the packet */
|
||||
mem_free_umem_frame(&umem->mem, pkt_addr);
|
||||
}
|
||||
|
||||
xsk_ring_prod__tx_desc(&xsk->tx, tx_idx)->addr = pkt_addr;
|
||||
xsk_ring_prod__tx_desc(&xsk->tx, tx_idx)->len = 64;
|
||||
xsk_ring_prod__submit(&xsk->tx, 1);
|
||||
xsk->outstanding_tx++;
|
||||
}
|
||||
//complete_tx(xsk);
|
||||
}
|
||||
|
||||
/* Generate some fake packets (in umem area). Real system will deliver TX
|
||||
* packets containing the needed control information.
|
||||
*/
|
||||
static int invent_tx_pkts(struct config *cfg, struct xsk_umem_info *umem,
|
||||
const unsigned int n, struct xdp_desc pkts[n])
|
||||
{
|
||||
uint32_t len = opt_pkt_size;
|
||||
uint32_t offset = 256;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
uint64_t pkt_addr = mem_alloc_umem_frame(&umem->mem);
|
||||
struct xdp_desc desc;
|
||||
uint8_t *pkt_data;
|
||||
|
||||
if (pkt_addr == INVALID_UMEM_FRAME)
|
||||
return i;
|
||||
|
||||
pkt_addr += offset;
|
||||
desc.addr = pkt_addr;
|
||||
desc.len = len;
|
||||
desc.options = 0;
|
||||
|
||||
/* Write into packet memory area */
|
||||
pkt_data = xsk_umem__get_data(umem->buffer, pkt_addr);
|
||||
gen_base_pkt(cfg, pkt_data);
|
||||
|
||||
pkts[i] = desc;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
static int tx_batch_pkts(struct xsk_socket_info *xsk,
|
||||
const unsigned int nr, struct xdp_desc pkts[nr])
|
||||
{
|
||||
struct xsk_umem_info *umem = xsk->umem;
|
||||
uint32_t tx_res;
|
||||
uint32_t tx_idx = 0;
|
||||
int i;
|
||||
|
||||
tx_res = xsk_ring_prod__reserve(&xsk->tx, nr, &tx_idx);
|
||||
if (tx_res != nr) {
|
||||
/* No more transmit slots, drop all packets. Normally AF_XDP
|
||||
* code would try to run TX-completion CQ step to free up slots,
|
||||
* but we don't want to introduce variability due to RT
|
||||
* requirements. Other code make sure CQ is processed.
|
||||
*/
|
||||
for (i = 0; i < nr; i++) {
|
||||
mem_free_umem_frame(&umem->mem, pkts[i].addr);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < nr ; i++) {
|
||||
struct xdp_desc *tx_desc;
|
||||
|
||||
tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, tx_idx + i);
|
||||
*tx_desc = pkts[i];
|
||||
//xsk_ring_prod__tx_desc(&xsk->tx, tx_idx)->addr = pkt_addr;
|
||||
//xsk_ring_prod__tx_desc(&xsk->tx, tx_idx)->len = 64;
|
||||
xsk->outstanding_tx++;
|
||||
}
|
||||
xsk_ring_prod__submit(&xsk->tx, nr);
|
||||
|
||||
// Kick Tx
|
||||
// sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
|
||||
complete_tx(xsk);
|
||||
|
||||
// See if kicking Rx-side works
|
||||
// recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
static bool process_packet(struct xsk_socket_info *xsk,
|
||||
uint64_t addr, uint32_t len)
|
||||
{
|
||||
@ -709,17 +1014,17 @@ void restock_receive_fill_queue(struct xsk_socket_info *xsk)
|
||||
uint32_t idx_fq = 0;
|
||||
int ret;
|
||||
|
||||
/* Limit refill size as it takes time */
|
||||
int free_frames = mem_avail_umem_frames(&xsk->umem->mem);
|
||||
int refill = (free_frames > FQ_REFILL_MAX) ? FQ_REFILL_MAX : free_frames;
|
||||
|
||||
__u64 start = gettime();
|
||||
|
||||
/* Stuff the ring with as much frames as possible */
|
||||
stock_frames = xsk_prod_nb_free(&xsk->fq,
|
||||
mem_avail_umem_frames(&xsk->umem->mem));
|
||||
stock_frames = xsk_prod_nb_free(&xsk->fq, refill);
|
||||
|
||||
if (stock_frames > 0) {
|
||||
|
||||
ret = xsk_ring_prod__reserve(&xsk->fq, stock_frames,
|
||||
&idx_fq);
|
||||
ret = xsk_ring_prod__reserve(&xsk->fq, stock_frames, &idx_fq);
|
||||
|
||||
/* This should not happen, but just in case */
|
||||
if (ret != stock_frames) {
|
||||
@ -735,7 +1040,7 @@ void restock_receive_fill_queue(struct xsk_socket_info *xsk)
|
||||
xsk_ring_prod__submit(&xsk->fq, stock_frames);
|
||||
}
|
||||
__u64 now = gettime();
|
||||
if (debug && (stock_frames || free_frames))
|
||||
if (debug && stock_frames > 1)
|
||||
printf("XXX stock_frame:%d free_frames:%d cost of xsk_prod_nb_free() %llu ns\n",
|
||||
stock_frames, free_frames, now - start);
|
||||
}
|
||||
@ -745,6 +1050,9 @@ static void handle_receive_packets(struct xsk_socket_info *xsk)
|
||||
unsigned int rcvd, i;
|
||||
uint32_t idx_rx = 0;
|
||||
|
||||
// FIXME: Needed when in NAPI busy_poll mode?
|
||||
recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
|
||||
|
||||
rcvd = xsk_ring_cons__peek(&xsk->rx, RX_BATCH_SIZE, &idx_rx);
|
||||
if (!rcvd)
|
||||
return;
|
||||
@ -754,6 +1062,8 @@ static void handle_receive_packets(struct xsk_socket_info *xsk)
|
||||
uint64_t addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
|
||||
uint32_t len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
|
||||
|
||||
pr_addr_info(__func__, addr, xsk->umem);
|
||||
|
||||
if (!process_packet(xsk, addr, len))
|
||||
mem_free_umem_frame(&xsk->umem->mem, addr);
|
||||
|
||||
@ -806,6 +1116,142 @@ static void rx_and_process(struct config *cfg,
|
||||
}
|
||||
}
|
||||
|
||||
static void rx_avail_packets(struct xsk_container *xsks)
|
||||
{
|
||||
for (int i = 0; i < xsks->num; i++) {
|
||||
struct xsk_socket_info *xsk_info = xsks->sockets[i];
|
||||
|
||||
handle_receive_packets(xsk_info);
|
||||
}
|
||||
}
|
||||
|
||||
/* Default interval in usec */
|
||||
#define DEFAULT_INTERVAL 1000000
|
||||
|
||||
#define USEC_PER_SEC 1000000
|
||||
#define NSEC_PER_SEC 1000000000
|
||||
|
||||
static inline void tsnorm(struct timespec *ts)
|
||||
{
|
||||
while (ts->tv_nsec >= NSEC_PER_SEC) {
|
||||
ts->tv_nsec -= NSEC_PER_SEC;
|
||||
ts->tv_sec++;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int64_t calcdiff(struct timespec t1, struct timespec t2)
|
||||
{
|
||||
int64_t diff;
|
||||
diff = USEC_PER_SEC * (long long)((int) t1.tv_sec - (int) t2.tv_sec);
|
||||
diff += ((int) t1.tv_nsec - (int) t2.tv_nsec) / 1000;
|
||||
return diff;
|
||||
}
|
||||
|
||||
struct wakeup_stat {
|
||||
long min;
|
||||
long max;
|
||||
long act;
|
||||
double avg;
|
||||
unsigned long events;
|
||||
};
|
||||
|
||||
/* Use-case: Accurate cyclic Tx and lazy RX-processing
|
||||
*
|
||||
* This processing loop is simulating a Time-Triggered schedule, where
|
||||
* transmitting packets within a small time-window is the most
|
||||
* important task. Picking up frames in RX-queue is less time
|
||||
* critical, as the PCF synchronization packets will have been
|
||||
* timestamped (rx_ktime) by XDP before they got enqueued.
|
||||
*/
|
||||
static void tx_cyclic_and_rx_process(struct config *cfg,
|
||||
struct xsk_container *xsks)
|
||||
{
|
||||
struct timespec now, next, interval;
|
||||
struct wakeup_stat stat = { .min = DEFAULT_INTERVAL};
|
||||
int batch_nr = 4;
|
||||
struct xdp_desc tx_pkts[batch_nr];
|
||||
int tx_nr;
|
||||
|
||||
int period = DEFAULT_INTERVAL; // TODO: Add to cfg
|
||||
int timermode = TIMER_ABSTIME;
|
||||
int clock = CLOCK_MONOTONIC;
|
||||
|
||||
// Choosing xsk id 0
|
||||
struct xsk_socket_info *xsk = xsks->sockets[0];
|
||||
|
||||
/* Get packets for *first* iteration */
|
||||
tx_nr = invent_tx_pkts(cfg, xsk->umem, batch_nr, tx_pkts);
|
||||
|
||||
interval.tv_sec = period / USEC_PER_SEC;
|
||||
interval.tv_nsec = (period % USEC_PER_SEC) * 1000;
|
||||
|
||||
clock_gettime(clock, &now);
|
||||
|
||||
next = now;
|
||||
next.tv_sec += interval.tv_sec;
|
||||
next.tv_nsec += interval.tv_nsec;
|
||||
tsnorm(&next);
|
||||
|
||||
while (!global_exit) {
|
||||
int64_t diff;
|
||||
int err, n;
|
||||
|
||||
/* Wait for next period */
|
||||
err = clock_nanosleep(clock, timermode, &next, NULL);
|
||||
/* Took case MODE_CLOCK_NANOSLEEP from cyclictest */
|
||||
if (err) {
|
||||
if (err != EINTR)
|
||||
fprintf(stderr, "clock_nanosleep failed."
|
||||
" err:%d errno:%d\n", err, errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Expecting to wakeup at "next" get systime "now" to check */
|
||||
err = clock_gettime(clock, &now);
|
||||
if (err) {
|
||||
if (err != EINTR)
|
||||
fprintf(stderr, "clock_getttime() failed."
|
||||
" err:%d errno:%d\n", err, errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Detect inaccuracy diff */
|
||||
diff = calcdiff(now, next);
|
||||
if (diff < stat.min)
|
||||
stat.min = diff;
|
||||
if (diff > stat.max)
|
||||
stat.max = diff;
|
||||
stat.avg += (double) diff;
|
||||
stat.act = diff;
|
||||
|
||||
stat.events++;
|
||||
|
||||
/* Send batch of packets */
|
||||
n = tx_batch_pkts(xsk, tx_nr, tx_pkts);
|
||||
|
||||
if (verbose >=1 )
|
||||
printf("TX pkts:%d event:%lu"
|
||||
" inaccurate(usec) wakeup min:%ld cur:%ld max:%ld\n",
|
||||
n, stat.events, stat.min, stat.act, stat.max);
|
||||
|
||||
/* Calculate next time to wakeup */
|
||||
next.tv_sec += interval.tv_sec;
|
||||
next.tv_nsec += interval.tv_nsec;
|
||||
tsnorm(&next);
|
||||
|
||||
/* Get packets for *next* iteration */
|
||||
tx_nr = invent_tx_pkts(cfg, xsk->umem, batch_nr, tx_pkts);
|
||||
|
||||
/* Empty RX queues */
|
||||
rx_avail_packets(xsks);
|
||||
}
|
||||
out:
|
||||
/* Free umem frames */
|
||||
for (int i = 0; i < tx_nr; i++) {
|
||||
mem_free_umem_frame(&xsk->umem->mem, tx_pkts[i].addr);
|
||||
}
|
||||
}
|
||||
|
||||
static double calc_period(struct stats_record *r, struct stats_record *p)
|
||||
{
|
||||
double period_ = 0;
|
||||
@ -926,6 +1372,8 @@ int main(int argc, char **argv)
|
||||
.progsec = "xdp_sock",
|
||||
.xsk_wakeup_mode = true, /* Default, change via --spin */
|
||||
.xsk_if_queue = -1,
|
||||
.opt_tx_dmac = default_tx_dmac,
|
||||
.opt_tx_smac = default_tx_smac,
|
||||
};
|
||||
pthread_t stats_poll_thread;
|
||||
struct xsk_umem_info *umem;
|
||||
@ -950,7 +1398,8 @@ int main(int argc, char **argv)
|
||||
* cost of copying over packet data to our preallocated AF_XDP umem
|
||||
* area.
|
||||
*/
|
||||
cfg.xsk_bind_flags = XDP_COPY;
|
||||
//cfg.xsk_bind_flags = XDP_COPY;
|
||||
cfg.xsk_bind_flags = XDP_COPY | XDP_USE_NEED_WAKEUP;
|
||||
|
||||
struct bpf_object *bpf_obj = NULL;
|
||||
struct bpf_map *map;
|
||||
@ -1042,6 +1491,9 @@ int main(int argc, char **argv)
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* Generate packets to TX */
|
||||
gen_base_pkt(&cfg, (uint8_t*)&base_pkt_data);
|
||||
|
||||
/* Open and configure the AF_XDP (xsk) socket(s) */
|
||||
for (i = 0; i < xsks.num; i++) {
|
||||
struct xsk_socket_info *xski;
|
||||
@ -1088,8 +1540,19 @@ int main(int argc, char **argv)
|
||||
cfg.sched_prio, cfg.sched_policy);
|
||||
}
|
||||
|
||||
/* Issue: At this point AF_XDP socket might not be ready e.g. for TX.
|
||||
* It seems related with XDP attachment causing link down/up event for
|
||||
* some drivers. Q: What is the right method/API that waits for link to
|
||||
* be initilized correctly?
|
||||
*/
|
||||
//sleep(3);
|
||||
// tx_pkt(&cfg, xsks.sockets[0]);
|
||||
|
||||
/* Receive and count packets than drop them */
|
||||
rx_and_process(&cfg, &xsks);
|
||||
// rx_and_process(&cfg, &xsks);
|
||||
|
||||
/* Send packets cyclic */
|
||||
tx_cyclic_and_rx_process(&cfg, &xsks);
|
||||
|
||||
/* Cleanup */
|
||||
for (i = 0; i < xsks.num; i++)
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <net/if.h>
|
||||
#include <linux/types.h>
|
||||
#include <stdbool.h>
|
||||
#include <netinet/ether.h> /* struct ether_addr */
|
||||
|
||||
struct config {
|
||||
__u32 xdp_flags;
|
||||
@ -26,6 +27,9 @@ struct config {
|
||||
/* Real-Time scheduler setting */
|
||||
int sched_prio;
|
||||
int sched_policy;
|
||||
bool opt_busy_poll;
|
||||
struct ether_addr opt_tx_smac;
|
||||
struct ether_addr opt_tx_dmac;
|
||||
};
|
||||
|
||||
/* Defined in common_params.o */
|
||||
|
@ -29,7 +29,7 @@ void _print_options(const struct option_wrapper *long_options, bool required)
|
||||
if (long_options[i].required != required)
|
||||
continue;
|
||||
|
||||
if (long_options[i].option.val > 64) /* ord('A') = 65 */
|
||||
if (long_options[i].option.val > 64) /* ord('A') = 65 = 0x41 */
|
||||
printf(" -%c,", long_options[i].option.val);
|
||||
else
|
||||
printf(" ");
|
||||
@ -96,7 +96,7 @@ void parse_cmdline_args(int argc, char **argv,
|
||||
}
|
||||
|
||||
/* Parse commands line args */
|
||||
while ((opt = getopt_long(argc, argv, "hd:r:L:R:ASNFUMQ:czqp:",
|
||||
while ((opt = getopt_long(argc, argv, "hd:r:L:R:BASNFUMQ:G:H:czqp:",
|
||||
long_options, &longindex)) != -1) {
|
||||
switch (opt) {
|
||||
case 'd':
|
||||
@ -129,6 +129,25 @@ void parse_cmdline_args(int argc, char **argv,
|
||||
goto error;
|
||||
}
|
||||
break;
|
||||
case 'G':
|
||||
if (!ether_aton_r(optarg,
|
||||
(struct ether_addr *)&cfg->opt_tx_dmac)) {
|
||||
fprintf(stderr, "Invalid dest MAC address:%s\n",
|
||||
optarg);
|
||||
goto error;
|
||||
}
|
||||
break;
|
||||
case 'H':
|
||||
if (!ether_aton_r(optarg,
|
||||
(struct ether_addr *)&cfg->opt_tx_smac)) {
|
||||
fprintf(stderr, "Invalid src MAC address:%s\n",
|
||||
optarg);
|
||||
goto error;
|
||||
}
|
||||
break;
|
||||
case 'B':
|
||||
cfg->opt_busy_poll = true;
|
||||
break;
|
||||
case 'A':
|
||||
cfg->xdp_flags &= ~XDP_FLAGS_MODES; /* Clear flags */
|
||||
break;
|
||||
|
181
AF_XDP-interaction/lib_checksum.h
Normal file
181
AF_XDP-interaction/lib_checksum.h
Normal file
@ -0,0 +1,181 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Code taken from kernel samples/bpf/xdpsock_user.c
|
||||
|
||||
#ifndef __LIB_CHECKSUM_H
|
||||
#define __LIB_CHECKSUM_H
|
||||
|
||||
static void *memset32_htonl(void *dest, __u32 val, __u32 size)
|
||||
{
|
||||
__u32 *ptr = (__u32 *)dest;
|
||||
int i;
|
||||
|
||||
val = htonl(val);
|
||||
|
||||
for (i = 0; i < (size & (~0x3)); i += 4)
|
||||
ptr[i >> 2] = val;
|
||||
|
||||
for (; i < size; i++)
|
||||
((char *)dest)[i] = ((char *)&val)[i & 3];
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function code has been taken from
|
||||
* Linux kernel lib/checksum.c
|
||||
*/
|
||||
static inline unsigned short from32to16(unsigned int x)
|
||||
{
|
||||
/* add up 16-bit and 16-bit for 16+c bit */
|
||||
x = (x & 0xffff) + (x >> 16);
|
||||
/* add up carry.. */
|
||||
x = (x & 0xffff) + (x >> 16);
|
||||
return x;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function code has been taken from
|
||||
* Linux kernel lib/checksum.c
|
||||
*/
|
||||
static unsigned int do_csum(const unsigned char *buff, int len)
|
||||
{
|
||||
unsigned int result = 0;
|
||||
int odd;
|
||||
|
||||
if (len <= 0)
|
||||
goto out;
|
||||
odd = 1 & (unsigned long)buff;
|
||||
if (odd) {
|
||||
#ifdef __LITTLE_ENDIAN
|
||||
result += (*buff << 8);
|
||||
#else
|
||||
result = *buff;
|
||||
#endif
|
||||
len--;
|
||||
buff++;
|
||||
}
|
||||
if (len >= 2) {
|
||||
if (2 & (unsigned long)buff) {
|
||||
result += *(unsigned short *)buff;
|
||||
len -= 2;
|
||||
buff += 2;
|
||||
}
|
||||
if (len >= 4) {
|
||||
const unsigned char *end = buff +
|
||||
((unsigned int)len & ~3);
|
||||
unsigned int carry = 0;
|
||||
|
||||
do {
|
||||
unsigned int w = *(unsigned int *)buff;
|
||||
|
||||
buff += 4;
|
||||
result += carry;
|
||||
result += w;
|
||||
carry = (w > result);
|
||||
} while (buff < end);
|
||||
result += carry;
|
||||
result = (result & 0xffff) + (result >> 16);
|
||||
}
|
||||
if (len & 2) {
|
||||
result += *(unsigned short *)buff;
|
||||
buff += 2;
|
||||
}
|
||||
}
|
||||
if (len & 1)
|
||||
#ifdef __LITTLE_ENDIAN
|
||||
result += *buff;
|
||||
#else
|
||||
result += (*buff << 8);
|
||||
#endif
|
||||
result = from32to16(result);
|
||||
if (odd)
|
||||
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
|
||||
out:
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a version of ip_compute_csum() optimized for IP headers,
|
||||
* which always checksum on 4 octet boundaries.
|
||||
* This function code has been taken from
|
||||
* Linux kernel lib/checksum.c
|
||||
*/
|
||||
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
|
||||
{
|
||||
return (__sum16)~do_csum(iph, ihl * 4);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fold a partial checksum
|
||||
* This function code has been taken from
|
||||
* Linux kernel include/asm-generic/checksum.h
|
||||
*/
|
||||
static inline __sum16 csum_fold(__wsum csum)
|
||||
{
|
||||
__u32 sum = (__u32)csum;
|
||||
|
||||
sum = (sum & 0xffff) + (sum >> 16);
|
||||
sum = (sum & 0xffff) + (sum >> 16);
|
||||
return (__sum16)~sum;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function code has been taken from
|
||||
* Linux kernel lib/checksum.c
|
||||
*/
|
||||
static inline __u32 from64to32(__u64 x)
|
||||
{
|
||||
/* add up 32-bit and 32-bit for 32+c bit */
|
||||
x = (x & 0xffffffff) + (x >> 32);
|
||||
/* add up carry.. */
|
||||
x = (x & 0xffffffff) + (x >> 32);
|
||||
return (__u32)x;
|
||||
}
|
||||
|
||||
__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
|
||||
__u32 len, __u8 proto, __wsum sum);
|
||||
|
||||
/*
|
||||
* This function code has been taken from
|
||||
* Linux kernel lib/checksum.c
|
||||
*/
|
||||
__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
|
||||
__u32 len, __u8 proto, __wsum sum)
|
||||
{
|
||||
unsigned long long s = (__u32)sum;
|
||||
|
||||
s += (__u32)saddr;
|
||||
s += (__u32)daddr;
|
||||
#ifdef __BIG_ENDIAN__
|
||||
s += proto + len;
|
||||
#else
|
||||
s += (proto + len) << 8;
|
||||
#endif
|
||||
return (__wsum)from64to32(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function has been taken from
|
||||
* Linux kernel include/asm-generic/checksum.h
|
||||
*/
|
||||
static inline __sum16
|
||||
csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
|
||||
__u8 proto, __wsum sum)
|
||||
{
|
||||
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
|
||||
}
|
||||
|
||||
static inline __u16 udp_csum(__u32 saddr, __u32 daddr, __u32 len,
|
||||
__u8 proto, __u16 *udp_pkt)
|
||||
{
|
||||
__u32 csum = 0;
|
||||
__u32 cnt = 0;
|
||||
|
||||
/* udp hdr and data */
|
||||
for (; cnt < len; cnt += 2)
|
||||
csum += udp_pkt[cnt >> 1];
|
||||
|
||||
return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
|
||||
}
|
||||
|
||||
#endif /* __LIB_CHECKSUM_H */
|
Reference in New Issue
Block a user