Files
xdp-project-bpf-examples/include/bpf/builtins.h
Toke Høiland-Jørgensen 5f5ea00a6c include: Import data loading helpers from Cilium
It's way too difficult to read packet data in XDP because LLVM will mostly
generate code that doesn't pass the verifier. Thankfully, Cilium has a nice
workaround for this in the form of hand-written BPF assembly to perform the
reads in a way that the verifier will understand. Let's import these
helpers so they can be used by the examples in this repository, along with
some of the other BPF helpers that it relies on.

This commit imports these files wholesale from Cilium:
- include/bpf/builtins.h
- include/bpf/compiler.h
- include/bpf/errno.h

And also adds include/xdp/context_helpers.h which only contains the
xdp_load_bytes() and xdp_store_bytes() helpers from Cilium's
include/bpf/ctx/xdp.h (as the other functions in that file are specific to
how the Cilium code is structured).

We also extend the maximum size supported by the efficient memcpy()
implementation in builtins.h to 280 bytes, and the mask size applied to
packet data copies up to 0x3ff.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
2021-06-01 16:11:33 +02:00

558 lines
19 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2016-2020 Authors of Cilium */
#ifndef __BPF_BUILTINS__
#define __BPF_BUILTINS__
#include "compiler.h"
#ifndef __non_bpf_context
#ifndef lock_xadd
# define lock_xadd(P, V) ((void) __sync_fetch_and_add((P), (V)))
#endif
/* Unfortunately verifier forces aligned stack access while other memory
* do not have to be aligned (map, pkt, etc). Mark those on the /stack/
* for objects > 8 bytes in order to force-align such memcpy candidates
* when we really need them to be aligned, this is not needed for objects
* of size <= 8 bytes and in case of > 8 bytes /only/ when 8 byte is not
* the natural object alignment (e.g. __u8 foo[12]).
*/
#define __align_stack_8 __aligned(8)
/* Memory iterators used below. */
#define __it_bwd(x, op) (x -= sizeof(__u##op))
#define __it_fwd(x, op) (x += sizeof(__u##op))
/* Memory operators used below. */
#define __it_set(a, op) (*(__u##op *)__it_bwd(a, op)) = 0
#define __it_xor(a, b, r, op) r |= (*(__u##op *)__it_bwd(a, op)) ^ (*(__u##op *)__it_bwd(b, op))
#define __it_mob(a, b, op) (*(__u##op *)__it_bwd(a, op)) = (*(__u##op *)__it_bwd(b, op))
#define __it_mof(a, b, op) \
do { \
*(__u##op *)a = *(__u##op *)b; \
__it_fwd(a, op); __it_fwd(b, op); \
} while (0)
static __always_inline __maybe_unused void
__bpf_memset_builtin(void *d, __u8 c, __u64 len)
{
/* Everything non-zero or non-const (currently unsupported) as c
* gets handled here.
*/
__builtin_memset(d, c, len);
}
static __always_inline void __bpf_memzero(void *d, __u64 len)
{
#if __clang_major__ >= 10
if (!__builtin_constant_p(len))
__throw_build_bug();
d += len;
switch (len) {
case 96: __it_set(d, 64);
case 88: jmp_88: __it_set(d, 64);
case 80: jmp_80: __it_set(d, 64);
case 72: jmp_72: __it_set(d, 64);
case 64: jmp_64: __it_set(d, 64);
case 56: jmp_56: __it_set(d, 64);
case 48: jmp_48: __it_set(d, 64);
case 40: jmp_40: __it_set(d, 64);
case 32: jmp_32: __it_set(d, 64);
case 24: jmp_24: __it_set(d, 64);
case 16: jmp_16: __it_set(d, 64);
case 8: jmp_8: __it_set(d, 64);
break;
case 94: __it_set(d, 16); __it_set(d, 32); goto jmp_88;
case 86: __it_set(d, 16); __it_set(d, 32); goto jmp_80;
case 78: __it_set(d, 16); __it_set(d, 32); goto jmp_72;
case 70: __it_set(d, 16); __it_set(d, 32); goto jmp_64;
case 62: __it_set(d, 16); __it_set(d, 32); goto jmp_56;
case 54: __it_set(d, 16); __it_set(d, 32); goto jmp_48;
case 46: __it_set(d, 16); __it_set(d, 32); goto jmp_40;
case 38: __it_set(d, 16); __it_set(d, 32); goto jmp_32;
case 30: __it_set(d, 16); __it_set(d, 32); goto jmp_24;
case 22: __it_set(d, 16); __it_set(d, 32); goto jmp_16;
case 14: __it_set(d, 16); __it_set(d, 32); goto jmp_8;
case 6: __it_set(d, 16); __it_set(d, 32);
break;
case 92: __it_set(d, 32); goto jmp_88;
case 84: __it_set(d, 32); goto jmp_80;
case 76: __it_set(d, 32); goto jmp_72;
case 68: __it_set(d, 32); goto jmp_64;
case 60: __it_set(d, 32); goto jmp_56;
case 52: __it_set(d, 32); goto jmp_48;
case 44: __it_set(d, 32); goto jmp_40;
case 36: __it_set(d, 32); goto jmp_32;
case 28: __it_set(d, 32); goto jmp_24;
case 20: __it_set(d, 32); goto jmp_16;
case 12: __it_set(d, 32); goto jmp_8;
case 4: __it_set(d, 32);
break;
case 90: __it_set(d, 16); goto jmp_88;
case 82: __it_set(d, 16); goto jmp_80;
case 74: __it_set(d, 16); goto jmp_72;
case 66: __it_set(d, 16); goto jmp_64;
case 58: __it_set(d, 16); goto jmp_56;
case 50: __it_set(d, 16); goto jmp_48;
case 42: __it_set(d, 16); goto jmp_40;
case 34: __it_set(d, 16); goto jmp_32;
case 26: __it_set(d, 16); goto jmp_24;
case 18: __it_set(d, 16); goto jmp_16;
case 10: __it_set(d, 16); goto jmp_8;
case 2: __it_set(d, 16);
break;
case 1: __it_set(d, 8);
break;
default:
/* __builtin_memset() is crappy slow since it cannot
* make any assumptions about alignment & underlying
* efficient unaligned access on the target we're
* running.
*/
__throw_build_bug();
}
#else
__bpf_memset_builtin(d, 0, len);
#endif
}
static __always_inline __maybe_unused void
__bpf_no_builtin_memset(void *d __maybe_unused, __u8 c __maybe_unused,
__u64 len __maybe_unused)
{
__throw_build_bug();
}
/* Redirect any direct use in our code to throw an error. */
#define __builtin_memset __bpf_no_builtin_memset
static __always_inline __nobuiltin("memset") void memset(void *d, int c,
__u64 len)
{
if (__builtin_constant_p(len) && __builtin_constant_p(c) && c == 0)
__bpf_memzero(d, len);
else
__bpf_memset_builtin(d, c, len);
}
static __always_inline __maybe_unused void
__bpf_memcpy_builtin(void *d, const void *s, __u64 len)
{
/* Explicit opt-in for __builtin_memcpy(). */
__builtin_memcpy(d, s, len);
}
static __always_inline void __bpf_memcpy(void *d, const void *s, __u64 len)
{
#if __clang_major__ >= 10
if (!__builtin_constant_p(len))
__throw_build_bug();
d += len;
s += len;
switch (len) {
case 280: __it_mob(d, s, 64);
case 272: jmp_272: __it_mob(d, s, 64);
case 264: jmp_264: __it_mob(d, s, 64);
case 256: jmp_256: __it_mob(d, s, 64);
case 248: jmp_248: __it_mob(d, s, 64);
case 240: jmp_240: __it_mob(d, s, 64);
case 232: jmp_232: __it_mob(d, s, 64);
case 224: jmp_224: __it_mob(d, s, 64);
case 216: jmp_216: __it_mob(d, s, 64);
case 208: jmp_208: __it_mob(d, s, 64);
case 200: jmp_200: __it_mob(d, s, 64);
case 192: jmp_192: __it_mob(d, s, 64);
case 184: jmp_184: __it_mob(d, s, 64);
case 176: jmp_176: __it_mob(d, s, 64);
case 168: jmp_168: __it_mob(d, s, 64);
case 160: jmp_160: __it_mob(d, s, 64);
case 152: jmp_152: __it_mob(d, s, 64);
case 144: jmp_144: __it_mob(d, s, 64);
case 136: jmp_136: __it_mob(d, s, 64);
case 128: jmp_128: __it_mob(d, s, 64);
case 120: jmp_120: __it_mob(d, s, 64);
case 112: jmp_112: __it_mob(d, s, 64);
case 104: jmp_104: __it_mob(d, s, 64);
case 96: jmp_96: __it_mob(d, s, 64);
case 88: jmp_88: __it_mob(d, s, 64);
case 80: jmp_80: __it_mob(d, s, 64);
case 72: jmp_72: __it_mob(d, s, 64);
case 64: jmp_64: __it_mob(d, s, 64);
case 56: jmp_56: __it_mob(d, s, 64);
case 48: jmp_48: __it_mob(d, s, 64);
case 40: jmp_40: __it_mob(d, s, 64);
case 32: jmp_32: __it_mob(d, s, 64);
case 24: jmp_24: __it_mob(d, s, 64);
case 16: jmp_16: __it_mob(d, s, 64);
case 8: jmp_8: __it_mob(d, s, 64);
break;
case 278: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_272;
case 270: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_264;
case 262: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_256;
case 254: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_248;
case 246: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_240;
case 238: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_232;
case 230: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_224;
case 222: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_216;
case 214: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_208;
case 206: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_200;
case 198: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_192;
case 190: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_184;
case 182: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_176;
case 174: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_168;
case 166: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_160;
case 158: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_152;
case 150: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_144;
case 142: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_136;
case 134: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_128;
case 126: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_120;
case 118: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_112;
case 110: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_104;
case 102: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_96;
case 94: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_88;
case 86: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_80;
case 78: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_72;
case 70: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_64;
case 62: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_56;
case 54: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_48;
case 46: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_40;
case 38: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_32;
case 30: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_24;
case 22: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_16;
case 14: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_8;
case 6: __it_mob(d, s, 16); __it_mob(d, s, 32);
break;
case 276: __it_mob(d, s, 32); goto jmp_272;
case 268: __it_mob(d, s, 32); goto jmp_264;
case 260: __it_mob(d, s, 32); goto jmp_256;
case 252: __it_mob(d, s, 32); goto jmp_248;
case 244: __it_mob(d, s, 32); goto jmp_240;
case 236: __it_mob(d, s, 32); goto jmp_232;
case 228: __it_mob(d, s, 32); goto jmp_224;
case 220: __it_mob(d, s, 32); goto jmp_216;
case 212: __it_mob(d, s, 32); goto jmp_208;
case 204: __it_mob(d, s, 32); goto jmp_200;
case 196: __it_mob(d, s, 32); goto jmp_192;
case 188: __it_mob(d, s, 32); goto jmp_184;
case 180: __it_mob(d, s, 32); goto jmp_176;
case 172: __it_mob(d, s, 32); goto jmp_168;
case 164: __it_mob(d, s, 32); goto jmp_160;
case 156: __it_mob(d, s, 32); goto jmp_152;
case 148: __it_mob(d, s, 32); goto jmp_144;
case 140: __it_mob(d, s, 32); goto jmp_136;
case 132: __it_mob(d, s, 32); goto jmp_128;
case 124: __it_mob(d, s, 32); goto jmp_120;
case 116: __it_mob(d, s, 32); goto jmp_112;
case 108: __it_mob(d, s, 32); goto jmp_104;
case 100: __it_mob(d, s, 32); goto jmp_96;
case 92: __it_mob(d, s, 32); goto jmp_88;
case 84: __it_mob(d, s, 32); goto jmp_80;
case 76: __it_mob(d, s, 32); goto jmp_72;
case 68: __it_mob(d, s, 32); goto jmp_64;
case 60: __it_mob(d, s, 32); goto jmp_56;
case 52: __it_mob(d, s, 32); goto jmp_48;
case 44: __it_mob(d, s, 32); goto jmp_40;
case 36: __it_mob(d, s, 32); goto jmp_32;
case 28: __it_mob(d, s, 32); goto jmp_24;
case 20: __it_mob(d, s, 32); goto jmp_16;
case 12: __it_mob(d, s, 32); goto jmp_8;
case 4: __it_mob(d, s, 32);
break;
case 274: __it_mob(d, s, 16); goto jmp_272;
case 266: __it_mob(d, s, 16); goto jmp_264;
case 258: __it_mob(d, s, 16); goto jmp_256;
case 250: __it_mob(d, s, 16); goto jmp_248;
case 242: __it_mob(d, s, 16); goto jmp_240;
case 234: __it_mob(d, s, 16); goto jmp_232;
case 226: __it_mob(d, s, 16); goto jmp_224;
case 218: __it_mob(d, s, 16); goto jmp_216;
case 210: __it_mob(d, s, 16); goto jmp_208;
case 202: __it_mob(d, s, 16); goto jmp_200;
case 194: __it_mob(d, s, 16); goto jmp_192;
case 186: __it_mob(d, s, 16); goto jmp_184;
case 178: __it_mob(d, s, 16); goto jmp_176;
case 170: __it_mob(d, s, 16); goto jmp_168;
case 162: __it_mob(d, s, 16); goto jmp_160;
case 154: __it_mob(d, s, 16); goto jmp_152;
case 146: __it_mob(d, s, 16); goto jmp_144;
case 138: __it_mob(d, s, 16); goto jmp_136;
case 130: __it_mob(d, s, 16); goto jmp_128;
case 122: __it_mob(d, s, 16); goto jmp_120;
case 114: __it_mob(d, s, 16); goto jmp_112;
case 106: __it_mob(d, s, 16); goto jmp_104;
case 98: __it_mob(d, s, 16); goto jmp_96;
case 90: __it_mob(d, s, 16); goto jmp_88;
case 82: __it_mob(d, s, 16); goto jmp_80;
case 74: __it_mob(d, s, 16); goto jmp_72;
case 66: __it_mob(d, s, 16); goto jmp_64;
case 58: __it_mob(d, s, 16); goto jmp_56;
case 50: __it_mob(d, s, 16); goto jmp_48;
case 42: __it_mob(d, s, 16); goto jmp_40;
case 34: __it_mob(d, s, 16); goto jmp_32;
case 26: __it_mob(d, s, 16); goto jmp_24;
case 18: __it_mob(d, s, 16); goto jmp_16;
case 10: __it_mob(d, s, 16); goto jmp_8;
case 2: __it_mob(d, s, 16);
break;
case 1: __it_mob(d, s, 8);
break;
default:
/* __builtin_memcpy() is crappy slow since it cannot
* make any assumptions about alignment & underlying
* efficient unaligned access on the target we're
* running.
*/
__throw_build_bug();
}
#else
__bpf_memcpy_builtin(d, s, len);
#endif
}
static __always_inline __maybe_unused void
__bpf_no_builtin_memcpy(void *d __maybe_unused, const void *s __maybe_unused,
__u64 len __maybe_unused)
{
__throw_build_bug();
}
/* Redirect any direct use in our code to throw an error. */
#define __builtin_memcpy __bpf_no_builtin_memcpy
static __always_inline __nobuiltin("memcpy") void memcpy(void *d, const void *s,
__u64 len)
{
return __bpf_memcpy(d, s, len);
}
static __always_inline __maybe_unused __u64
__bpf_memcmp_builtin(const void *x, const void *y, __u64 len)
{
/* Explicit opt-in for __builtin_memcmp(). We use the bcmp builtin
* here for two reasons: i) we only need to know equal or non-equal
* similar as in __bpf_memcmp(), and ii) if __bpf_memcmp() ends up
* selecting __bpf_memcmp_builtin(), clang generats a memcmp loop.
* That is, (*) -> __bpf_memcmp() -> __bpf_memcmp_builtin() ->
* __builtin_memcmp() -> memcmp() -> (*), meaning it will end up
* selecting our memcmp() from here. Remapping to __builtin_bcmp()
* breaks this loop and resolves both needs at once.
*/
return __builtin_bcmp(x, y, len);
}
static __always_inline __u64 __bpf_memcmp(const void *x, const void *y,
__u64 len)
{
#if __clang_major__ >= 10
__u64 r = 0;
if (!__builtin_constant_p(len))
__throw_build_bug();
x += len;
y += len;
switch (len) {
case 32: __it_xor(x, y, r, 64);
case 24: jmp_24: __it_xor(x, y, r, 64);
case 16: jmp_16: __it_xor(x, y, r, 64);
case 8: jmp_8: __it_xor(x, y, r, 64);
break;
case 30: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32); goto jmp_24;
case 22: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32); goto jmp_16;
case 14: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32); goto jmp_8;
case 6: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32);
break;
case 28: __it_xor(x, y, r, 32); goto jmp_24;
case 20: __it_xor(x, y, r, 32); goto jmp_16;
case 12: __it_xor(x, y, r, 32); goto jmp_8;
case 4: __it_xor(x, y, r, 32);
break;
case 26: __it_xor(x, y, r, 16); goto jmp_24;
case 18: __it_xor(x, y, r, 16); goto jmp_16;
case 10: __it_xor(x, y, r, 16); goto jmp_8;
case 2: __it_xor(x, y, r, 16);
break;
case 1: __it_xor(x, y, r, 8);
break;
default:
__throw_build_bug();
}
return r;
#else
return __bpf_memcmp_builtin(x, y, len);
#endif
}
static __always_inline __maybe_unused __u64
__bpf_no_builtin_memcmp(const void *x __maybe_unused,
const void *y __maybe_unused, __u64 len __maybe_unused)
{
__throw_build_bug();
return 0;
}
/* Redirect any direct use in our code to throw an error. */
#define __builtin_memcmp __bpf_no_builtin_memcmp
/* Modified for our needs in that we only return either zero (x and y
* are equal) or non-zero (x and y are non-equal).
*/
static __always_inline __nobuiltin("memcmp") __u64 memcmp(const void *x,
const void *y,
__u64 len)
{
return __bpf_memcmp(x, y, len);
}
static __always_inline __maybe_unused void
__bpf_memmove_builtin(void *d, const void *s, __u64 len)
{
/* Explicit opt-in for __builtin_memmove(). */
__builtin_memmove(d, s, len);
}
static __always_inline void __bpf_memmove_bwd(void *d, const void *s, __u64 len)
{
/* Our internal memcpy implementation walks backwards by default. */
__bpf_memcpy(d, s, len);
}
static __always_inline void __bpf_memmove_fwd(void *d, const void *s, __u64 len)
{
#if __clang_major__ >= 10
if (!__builtin_constant_p(len))
__throw_build_bug();
switch (len) {
case 96: __it_mof(d, s, 64);
case 88: jmp_88: __it_mof(d, s, 64);
case 80: jmp_80: __it_mof(d, s, 64);
case 72: jmp_72: __it_mof(d, s, 64);
case 64: jmp_64: __it_mof(d, s, 64);
case 56: jmp_56: __it_mof(d, s, 64);
case 48: jmp_48: __it_mof(d, s, 64);
case 40: jmp_40: __it_mof(d, s, 64);
case 32: jmp_32: __it_mof(d, s, 64);
case 24: jmp_24: __it_mof(d, s, 64);
case 16: jmp_16: __it_mof(d, s, 64);
case 8: jmp_8: __it_mof(d, s, 64);
break;
case 94: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_88;
case 86: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_80;
case 78: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_72;
case 70: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_64;
case 62: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_56;
case 54: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_48;
case 46: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_40;
case 38: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_32;
case 30: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_24;
case 22: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_16;
case 14: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_8;
case 6: __it_mof(d, s, 16); __it_mof(d, s, 32);
break;
case 92: __it_mof(d, s, 32); goto jmp_88;
case 84: __it_mof(d, s, 32); goto jmp_80;
case 76: __it_mof(d, s, 32); goto jmp_72;
case 68: __it_mof(d, s, 32); goto jmp_64;
case 60: __it_mof(d, s, 32); goto jmp_56;
case 52: __it_mof(d, s, 32); goto jmp_48;
case 44: __it_mof(d, s, 32); goto jmp_40;
case 36: __it_mof(d, s, 32); goto jmp_32;
case 28: __it_mof(d, s, 32); goto jmp_24;
case 20: __it_mof(d, s, 32); goto jmp_16;
case 12: __it_mof(d, s, 32); goto jmp_8;
case 4: __it_mof(d, s, 32);
break;
case 90: __it_mof(d, s, 16); goto jmp_88;
case 82: __it_mof(d, s, 16); goto jmp_80;
case 74: __it_mof(d, s, 16); goto jmp_72;
case 66: __it_mof(d, s, 16); goto jmp_64;
case 58: __it_mof(d, s, 16); goto jmp_56;
case 50: __it_mof(d, s, 16); goto jmp_48;
case 42: __it_mof(d, s, 16); goto jmp_40;
case 34: __it_mof(d, s, 16); goto jmp_32;
case 26: __it_mof(d, s, 16); goto jmp_24;
case 18: __it_mof(d, s, 16); goto jmp_16;
case 10: __it_mof(d, s, 16); goto jmp_8;
case 2: __it_mof(d, s, 16);
break;
case 1: __it_mof(d, s, 8);
break;
default:
/* __builtin_memmove() is crappy slow since it cannot
* make any assumptions about alignment & underlying
* efficient unaligned access on the target we're
* running.
*/
__throw_build_bug();
}
#else
__bpf_memmove_builtin(d, s, len);
#endif
}
static __always_inline __maybe_unused void
__bpf_no_builtin_memmove(void *d __maybe_unused, const void *s __maybe_unused,
__u64 len __maybe_unused)
{
__throw_build_bug();
}
/* Redirect any direct use in our code to throw an error. */
#define __builtin_memmove __bpf_no_builtin_memmove
static __always_inline void __bpf_memmove(void *d, const void *s, __u64 len)
{
/* Note, the forward walking memmove() might not work with on-stack data
* since we'll end up walking the memory unaligned even when __align_stack_8
* is set. Should not matter much since we'll use memmove() mostly or only
* on pkt data.
*
* Example with d, s, len = 12 bytes:
* * __bpf_memmove_fwd() emits: mov_32 d[0],s[0]; mov_64 d[4],s[4]
* * __bpf_memmove_bwd() emits: mov_32 d[8],s[8]; mov_64 d[0],s[0]
*/
if (d <= s)
return __bpf_memmove_fwd(d, s, len);
else
return __bpf_memmove_bwd(d, s, len);
}
static __always_inline __nobuiltin("memmove") void memmove(void *d,
const void *s,
__u64 len)
{
return __bpf_memmove(d, s, len);
}
#endif /* __non_bpf_context */
#endif /* __BPF_BUILTINS__ */