summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-12-18 10:51:06 -0500
committerDavid S. Miller <davem@davemloft.net>2017-12-18 10:51:06 -0500
commit59436c9ee18d7faad0cd1875c9d8322668f98611 (patch)
tree64543535fdefc11589a24aa9c3e2bab1bd98f894
parentc30abd5e40dd863f88e26be09b6ce949145a630a (diff)
parent46df3d209db080395a98fc0875bd05e45e8f44e0 (diff)
downloadop-kernel-dev-59436c9ee18d7faad0cd1875c9d8322668f98611.zip
op-kernel-dev-59436c9ee18d7faad0cd1875c9d8322668f98611.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2017-12-18 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Allow arbitrary function calls from one BPF function to another BPF function. As of today when writing BPF programs, __always_inline had to be used in the BPF C programs for all functions, unnecessarily causing LLVM to inflate code size. Handle this more naturally with support for BPF to BPF calls such that this __always_inline restriction can be overcome. As a result, it allows for better optimized code and finally enables to introduce core BPF libraries in the future that can be reused out of different projects. x86 and arm64 JIT support was added as well, from Alexei. 2) Add infrastructure for tagging functions as error injectable and allow for BPF to return arbitrary error values when BPF is attached via kprobes on those. This way of injecting errors generically eases testing and debugging without having to recompile or restart the kernel. Tags for opting-in for this facility are added with BPF_ALLOW_ERROR_INJECTION(), from Josef. 3) For BPF offload via nfp JIT, add support for bpf_xdp_adjust_head() helper call for XDP programs. First part of this work adds handling of BPF capabilities included in the firmware, and the later patches add support to the nfp verifier part and JIT as well as some small optimizations, from Jakub. 4) The bpftool now also gets support for basic cgroup BPF operations such as attaching, detaching and listing current BPF programs. As a requirement for the attach part, bpftool can now also load object files through 'bpftool prog load'. This reuses libbpf which we have in the kernel tree as well. bpftool-cgroup man page is added along with it, from Roman. 5) Back then commit e87c6bc3852b ("bpf: permit multiple bpf attachments for a single perf event") added support for attaching multiple BPF programs to a single perf event. Given they are configured through perf's ioctl() interface, the interface has been extended with a PERF_EVENT_IOC_QUERY_BPF command in this work in order to return an array of one or multiple BPF prog ids that are currently attached, from Yonghong. 6) Various minor fixes and cleanups to the bpftool's Makefile as well as a new 'uninstall' and 'doc-uninstall' target for removing bpftool itself or prior installed documentation related to it, from Quentin. 7) Add CONFIG_CGROUP_BPF=y to the BPF kernel selftest config file which is required for the test_dev_cgroup test case to run, from Naresh. 8) Fix reporting of XDP prog_flags for nfp driver, from Jakub. 9) Fix libbpf's exit code from the Makefile when libelf was not found in the system, also from Jakub. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/arm/net/bpf_jit_32.c2
-rw-r--r--arch/arm64/net/bpf_jit_comp.c70
-rw-r--r--arch/mips/net/ebpf_jit.c2
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c2
-rw-r--r--arch/s390/net/bpf_jit_comp.c2
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/kprobes.h4
-rw-r--r--arch/x86/include/asm/ptrace.h5
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c14
-rw-r--r--arch/x86/net/bpf_jit_comp.c49
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/fw.h54
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c107
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c115
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h30
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c70
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c11
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--include/asm-generic/vmlinux.lds.h10
-rw-r--r--include/linux/bpf.h18
-rw-r--r--include/linux/bpf_verifier.h45
-rw-r--r--include/linux/filter.h16
-rw-r--r--include/linux/kprobes.h1
-rw-r--r--include/linux/module.h5
-rw-r--r--include/linux/trace_events.h7
-rw-r--r--include/uapi/linux/bpf.h13
-rw-r--r--include/uapi/linux/perf_event.h22
-rw-r--r--kernel/bpf/core.c128
-rw-r--r--kernel/bpf/disasm.c8
-rw-r--r--kernel/bpf/syscall.c3
-rw-r--r--kernel/bpf/verifier.c1122
-rw-r--r--kernel/events/core.c10
-rw-r--r--kernel/kprobes.c163
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/trace/Kconfig11
-rw-r--r--kernel/trace/bpf_trace.c58
-rw-r--r--kernel/trace/trace_kprobe.c64
-rw-r--r--kernel/trace/trace_probe.h12
-rw-r--r--samples/bpf/Makefile4
-rwxr-xr-xsamples/bpf/test_override_return.sh15
-rw-r--r--samples/bpf/tracex7_kern.c16
-rw-r--r--samples/bpf/tracex7_user.c28
-rw-r--r--tools/bpf/bpftool/Documentation/Makefile30
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst118
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst12
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst8
-rw-r--r--tools/bpf/bpftool/Makefile61
-rw-r--r--tools/bpf/bpftool/cgroup.c307
-rw-r--r--tools/bpf/bpftool/common.c71
-rw-r--r--tools/bpf/bpftool/main.c3
-rw-r--r--tools/bpf/bpftool/main.h2
-rw-r--r--tools/bpf/bpftool/prog.c29
-rw-r--r--tools/include/uapi/linux/bpf.h13
-rw-r--r--tools/include/uapi/linux/perf_event.h22
-rw-r--r--tools/lib/bpf/Makefile4
-rw-r--r--tools/lib/bpf/bpf.h2
-rw-r--r--tools/lib/bpf/libbpf.c199
-rw-r--r--tools/scripts/Makefile.include1
-rw-r--r--tools/testing/selftests/bpf/Makefile12
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h3
-rw-r--r--tools/testing/selftests/bpf/config1
-rw-r--r--tools/testing/selftests/bpf/test_l4lb_noinline.c473
-rw-r--r--tools/testing/selftests/bpf/test_progs.c228
-rw-r--r--tools/testing/selftests/bpf/test_tracepoint.c26
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c1624
-rw-r--r--tools/testing/selftests/bpf/test_xdp_noinline.c833
73 files changed, 6071 insertions, 352 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 400b9e1..d3f4aaf 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -196,6 +196,9 @@ config HAVE_OPTPROBES
config HAVE_KPROBES_ON_FTRACE
bool
+config HAVE_KPROBE_OVERRIDE
+ bool
+
config HAVE_NMI
bool
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index c199990..4425189 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1824,7 +1824,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* If BPF JIT was not enabled then we must fall back to
* the interpreter.
*/
- if (!bpf_jit_enable)
+ if (!prog->jit_requested)
return orig_prog;
/* If constant blinding was enabled and we failed during blinding
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index ba38d40..396490c 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -99,6 +99,20 @@ static inline void emit_a64_mov_i64(const int reg, const u64 val,
}
}
+static inline void emit_addr_mov_i64(const int reg, const u64 val,
+ struct jit_ctx *ctx)
+{
+ u64 tmp = val;
+ int shift = 0;
+
+ emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
+ for (;shift < 48;) {
+ tmp >>= 16;
+ shift += 16;
+ emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
+ }
+}
+
static inline void emit_a64_mov_i(const int is64, const int reg,
const s32 val, struct jit_ctx *ctx)
{
@@ -603,7 +617,10 @@ emit_cond_jmp:
const u8 r0 = bpf2a64[BPF_REG_0];
const u64 func = (u64)__bpf_call_base + imm;
- emit_a64_mov_i64(tmp, func, ctx);
+ if (ctx->prog->is_func)
+ emit_addr_mov_i64(tmp, func, ctx);
+ else
+ emit_a64_mov_i64(tmp, func, ctx);
emit(A64_BLR(tmp), ctx);
emit(A64_MOV(1, r0, A64_R(0)), ctx);
break;
@@ -835,16 +852,24 @@ static inline void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end);
}
+struct arm64_jit_data {
+ struct bpf_binary_header *header;
+ u8 *image;
+ struct jit_ctx ctx;
+};
+
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
+ struct arm64_jit_data *jit_data;
bool tmp_blinded = false;
+ bool extra_pass = false;
struct jit_ctx ctx;
int image_size;
u8 *image_ptr;
- if (!bpf_jit_enable)
+ if (!prog->jit_requested)
return orig_prog;
tmp = bpf_jit_blind_constants(prog);
@@ -858,13 +883,29 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = tmp;
}
+ jit_data = prog->aux->jit_data;
+ if (!jit_data) {
+ jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+ if (!jit_data) {
+ prog = orig_prog;
+ goto out;
+ }
+ prog->aux->jit_data = jit_data;
+ }
+ if (jit_data->ctx.offset) {
+ ctx = jit_data->ctx;
+ image_ptr = jit_data->image;
+ header = jit_data->header;
+ extra_pass = true;
+ goto skip_init_ctx;
+ }
memset(&ctx, 0, sizeof(ctx));
ctx.prog = prog;
ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
if (ctx.offset == NULL) {
prog = orig_prog;
- goto out;
+ goto out_off;
}
/* 1. Initial fake pass to compute ctx->idx. */
@@ -895,6 +936,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* 2. Now, the actual pass. */
ctx.image = (__le32 *)image_ptr;
+skip_init_ctx:
ctx.idx = 0;
build_prologue(&ctx);
@@ -920,13 +962,31 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bpf_flush_icache(header, ctx.image + ctx.idx);
- bpf_jit_binary_lock_ro(header);
+ if (!prog->is_func || extra_pass) {
+ if (extra_pass && ctx.idx != jit_data->ctx.idx) {
+ pr_err_once("multi-func JIT bug %d != %d\n",
+ ctx.idx, jit_data->ctx.idx);
+ bpf_jit_binary_free(header);
+ prog->bpf_func = NULL;
+ prog->jited = 0;
+ goto out_off;
+ }
+ bpf_jit_binary_lock_ro(header);
+ } else {
+ jit_data->ctx = ctx;
+ jit_data->image = image_ptr;
+ jit_data->header = header;
+ }
prog->bpf_func = (void *)ctx.image;
prog->jited = 1;
prog->jited_len = image_size;
+ if (!prog->is_func || extra_pass) {
out_off:
- kfree(ctx.offset);
+ kfree(ctx.offset);
+ kfree(jit_data);
+ prog->aux->jit_data = NULL;
+ }
out:
if (tmp_blinded)
bpf_jit_prog_release_other(prog, prog == orig_prog ?
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 962b025..97069a1 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -1869,7 +1869,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
unsigned int image_size;
u8 *image_ptr;
- if (!bpf_jit_enable || !cpu_has_mips64r2)
+ if (!prog->jit_requested || !cpu_has_mips64r2)
return prog;
tmp = bpf_jit_blind_constants(prog);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 46d74e8..d5a5bc4 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -993,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_prog *tmp_fp;
bool bpf_blinded = false;
- if (!bpf_jit_enable)
+ if (!fp->jit_requested)
return org_fp;
tmp_fp = bpf_jit_blind_constants(org_fp);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e81c168..f4baa8c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1300,7 +1300,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_jit jit;
int pass;
- if (!bpf_jit_enable)
+ if (!fp->jit_requested)
return orig_fp;
tmp = bpf_jit_blind_constants(fp);
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 5765e7e..a2f1b5e 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1517,7 +1517,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
u8 *image_ptr;
int pass;
- if (!bpf_jit_enable)
+ if (!prog->jit_requested)
return orig_prog;
tmp = bpf_jit_blind_constants(prog);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8eed3f9..04d66e6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -154,6 +154,7 @@ config X86
select HAVE_KERNEL_XZ
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
+ select HAVE_KPROBE_OVERRIDE
select HAVE_KRETPROBES
select HAVE_KVM
select HAVE_LIVEPATCH if X86_64
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 9f2e310..36abb23 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -67,6 +67,10 @@ extern const int kretprobe_blacklist_size;
void arch_remove_kprobe(struct kprobe *p);
asmlinkage void kretprobe_trampoline(void);
+#ifdef CONFIG_KPROBES_ON_FTRACE
+extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs);
+#endif
+
/* Architecture specific copy of original instruction*/
struct arch_specific_insn {
/* copy of the original instruction */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 14131dd..6de1fd3 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -109,6 +109,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
return regs->ax;
}
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+ regs->ax = rc;
+}
+
/*
* user_mode(regs) determines whether a register set came from user
* mode. On x86_32, this is true if V8086 mode was enabled OR if the
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 8dc0161..1ea748d 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p)
p->ainsn.boostable = false;
return 0;
}
+
+asmlinkage void override_func(void);
+asm(
+ ".type override_func, @function\n"
+ "override_func:\n"
+ " ret\n"
+ ".size override_func, .-override_func\n"
+);
+
+void arch_ftrace_kprobe_override_function(struct pt_regs *regs)
+{
+ regs->ip = (unsigned long)&override_func;
+}
+NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 0554e8a..87f214f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1109,19 +1109,29 @@ common_load:
return proglen;
}
+struct x64_jit_data {
+ struct bpf_binary_header *header;
+ int *addrs;
+ u8 *image;
+ int proglen;
+ struct jit_context ctx;
+};
+
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_binary_header *header = NULL;
struct bpf_prog *tmp, *orig_prog = prog;
+ struct x64_jit_data *jit_data;
int proglen, oldproglen = 0;
struct jit_context ctx = {};
bool tmp_blinded = false;
+ bool extra_pass = false;
u8 *image = NULL;
int *addrs;
int pass;
int i;
- if (!bpf_jit_enable)
+ if (!prog->jit_requested)
return orig_prog;
tmp = bpf_jit_blind_constants(prog);
@@ -1135,10 +1145,28 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = tmp;
}
+ jit_data = prog->aux->jit_data;
+ if (!jit_data) {
+ jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+ if (!jit_data) {
+ prog = orig_prog;
+ goto out;
+ }
+ prog->aux->jit_data = jit_data;
+ }
+ addrs = jit_data->addrs;
+ if (addrs) {
+ ctx = jit_data->ctx;
+ oldproglen = jit_data->proglen;
+ image = jit_data->image;
+ header = jit_data->header;
+ extra_pass = true;
+ goto skip_init_addrs;
+ }
addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
if (!addrs) {
prog = orig_prog;
- goto out;
+ goto out_addrs;
}
/* Before first pass, make a rough estimation of addrs[]
@@ -1149,6 +1177,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
addrs[i] = proglen;
}
ctx.cleanup_addr = proglen;
+skip_init_addrs:
/* JITed image shrinks with every pass and the loop iterates
* until the image stops shrinking. Very large bpf programs
@@ -1189,7 +1218,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (image) {
bpf_flush_icache(header, image + proglen);
- bpf_jit_binary_lock_ro(header);
+ if (!prog->is_func || extra_pass) {
+ bpf_jit_binary_lock_ro(header);
+ } else {
+ jit_data->addrs = addrs;
+ jit_data->ctx = ctx;
+ jit_data->proglen = proglen;
+ jit_data->image = image;
+ jit_data->header = header;
+ }
prog->bpf_func = (void *)image;
prog->jited = 1;
prog->jited_len = proglen;
@@ -1197,8 +1234,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = orig_prog;
}
+ if (!prog->is_func || extra_pass) {
out_addrs:
- kfree(addrs);
+ kfree(addrs);
+ kfree(jit_data);
+ prog->aux->jit_data = NULL;
+ }
out:
if (tmp_blinded)
bpf_jit_prog_release_other(prog, prog == orig_prog ?
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
new file mode 100644
index 0000000..7206aa1
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef NFP_BPF_FW_H
+#define NFP_BPF_FW_H 1
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+
+enum bpf_cap_tlv_type {
+ NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2,
+};
+
+struct nfp_bpf_cap_tlv_adjust_head {
+ __le32 flags;
+ __le32 off_min;
+ __le32 off_max;
+ __le32 guaranteed_sub;
+ __le32 guaranteed_add;
+};
+
+#define NFP_BPF_ADJUST_HEAD_NO_META BIT(0)
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 3419ad4..0de59f0 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -33,6 +33,7 @@
#define pr_fmt(fmt) "NFP net bpf: " fmt
+#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/bpf.h>
#include <linux/filter.h>
@@ -87,6 +88,18 @@ static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
return nfp_prog->start_off + nfp_prog->prog_len;
}
+static bool
+nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
+{
+ /* If there is a recorded error we may have dropped instructions;
+ * that doesn't have to be due to translator bug, and the translation
+ * will fail anyway, so just return OK.
+ */
+ if (nfp_prog->error)
+ return true;
+ return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off);
+}
+
static unsigned int
nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset)
{
@@ -1196,6 +1209,86 @@ static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
SHF_SC_R_ROT, 16);
}
+static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
+ struct nfp_bpf_cap_adjust_head *adjust_head;
+ u32 ret_einval, end;
+
+ adjust_head = &nfp_prog->bpf->adjust_head;
+
+ /* Optimized version - 5 vs 14 cycles */
+ if (nfp_prog->adjust_head_location != UINT_MAX) {
+ if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n))
+ return -EINVAL;
+
+ emit_alu(nfp_prog, pptr_reg(nfp_prog),
+ reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog));
+ emit_alu(nfp_prog, plen_reg(nfp_prog),
+ plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
+ emit_alu(nfp_prog, pv_len(nfp_prog),
+ pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
+
+ wrp_immed(nfp_prog, reg_both(0), 0);
+ wrp_immed(nfp_prog, reg_both(1), 0);
+
+ /* TODO: when adjust head is guaranteed to succeed we can
+ * also eliminate the following if (r0 == 0) branch.
+ */
+
+ return 0;
+ }
+
+ ret_einval = nfp_prog_current_offset(nfp_prog) + 14;
+ end = ret_einval + 2;
+
+ /* We need to use a temp because offset is just a part of the pkt ptr */
+ emit_alu(nfp_prog, tmp,
+ reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog));
+
+ /* Validate result will fit within FW datapath constraints */
+ emit_alu(nfp_prog, reg_none(),
+ tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min));
+ emit_br(nfp_prog, BR_BLO, ret_einval, 0);
+ emit_alu(nfp_prog, reg_none(),
+ reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp);
+ emit_br(nfp_prog, BR_BLO, ret_einval, 0);
+
+ /* Validate the length is at least ETH_HLEN */
+ emit_alu(nfp_prog, tmp_len,
+ plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
+ emit_alu(nfp_prog, reg_none(),
+ tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN));
+ emit_br(nfp_prog, BR_BMI, ret_einval, 0);
+
+ /* Load the ret code */
+ wrp_immed(nfp_prog, reg_both(0), 0);
+ wrp_immed(nfp_prog, reg_both(1), 0);
+
+ /* Modify the packet metadata */
+ emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0);
+
+ /* Skip over the -EINVAL ret code (defer 2) */
+ emit_br_def(nfp_prog, end, 2);
+
+ emit_alu(nfp_prog, plen_reg(nfp_prog),
+ plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
+ emit_alu(nfp_prog, pv_len(nfp_prog),
+ pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
+
+ /* return -EINVAL target */
+ if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
+ return -EINVAL;
+
+ wrp_immed(nfp_prog, reg_both(0), -22);
+ wrp_immed(nfp_prog, reg_both(1), ~0);
+
+ if (!nfp_prog_confirm_current_offset(nfp_prog, end))
+ return -EINVAL;
+
+ return 0;
+}
+
/* --- Callbacks --- */
static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
@@ -1930,6 +2023,17 @@ static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
}
+static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ switch (meta->insn.imm) {
+ case BPF_FUNC_xdp_adjust_head:
+ return adjust_head(nfp_prog, meta);
+ default:
+ WARN_ONCE(1, "verifier allowed unsupported function\n");
+ return -EOPNOTSUPP;
+ }
+}
+
static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT);
@@ -2002,6 +2106,7 @@ static const instr_cb_t instr_cb[256] = {
[BPF_JMP | BPF_JLE | BPF_X] = jle_reg,
[BPF_JMP | BPF_JSET | BPF_X] = jset_reg,
[BPF_JMP | BPF_JNE | BPF_X] = jne_reg,
+ [BPF_JMP | BPF_CALL] = call,
[BPF_JMP | BPF_EXIT] = goto_out,
};
@@ -2026,6 +2131,8 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
list_for_each_entry(meta, &nfp_prog->insns, l) {
if (meta->skip)
continue;
+ if (meta->insn.code == (BPF_JMP | BPF_CALL))
+ continue;
if (BPF_CLASS(meta->insn.code) != BPF_JMP)
continue;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 54bfd78..4f6553f 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -34,10 +34,12 @@
#include <net/pkt_cls.h>
#include "../nfpcore/nfp_cpp.h"
+#include "../nfpcore/nfp_nffw.h"
#include "../nfp_app.h"
#include "../nfp_main.h"
#include "../nfp_net.h"
#include "../nfp_port.h"
+#include "fw.h"
#include "main.h"
static bool nfp_net_ebpf_capable(struct nfp_net *nn)
@@ -155,10 +157,123 @@ static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn)
return nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF;
}
+static int
+nfp_bpf_parse_cap_adjust_head(struct nfp_app_bpf *bpf, void __iomem *value,
+ u32 length)
+{
+ struct nfp_bpf_cap_tlv_adjust_head __iomem *cap = value;
+ struct nfp_cpp *cpp = bpf->app->pf->cpp;
+
+ if (length < sizeof(*cap)) {
+ nfp_err(cpp, "truncated adjust_head TLV: %d\n", length);
+ return -EINVAL;
+ }
+
+ bpf->adjust_head.flags = readl(&cap->flags);
+ bpf->adjust_head.off_min = readl(&cap->off_min);
+ bpf->adjust_head.off_max = readl(&cap->off_max);
+ bpf->adjust_head.guaranteed_sub = readl(&cap->guaranteed_sub);
+ bpf->adjust_head.guaranteed_add = readl(&cap->guaranteed_add);
+
+ if (bpf->adjust_head.off_min > bpf->adjust_head.off_max) {
+ nfp_err(cpp, "invalid adjust_head TLV: min > max\n");
+ return -EINVAL;
+ }
+ if (!FIELD_FIT(UR_REG_IMM_MAX, bpf->adjust_head.off_min) ||
+ !FIELD_FIT(UR_REG_IMM_MAX, bpf->adjust_head.off_max)) {
+ nfp_warn(cpp, "disabling adjust_head - driver expects min/max to fit in as immediates\n");
+ memset(&bpf->adjust_head, 0, sizeof(bpf->adjust_head));
+ return 0;
+ }
+
+ return 0;
+}
+
+static int nfp_bpf_parse_capabilities(struct nfp_app *app)
+{
+ struct nfp_cpp *cpp = app->pf->cpp;
+ struct nfp_cpp_area *area;
+ u8 __iomem *mem, *start;
+
+ mem = nfp_rtsym_map(app->pf->rtbl, "_abi_bpf_capabilities", "bpf.cap",
+ 8, &area);
+ if (IS_ERR(mem))
+ return PTR_ERR(mem) == -ENOENT ? 0 : PTR_ERR(mem);
+
+ start = mem;
+ while (mem - start + 8 < nfp_cpp_area_size(area)) {
+ u8 __iomem *value;
+ u32 type, length;
+
+ type = readl(mem);
+ length = readl(mem + 4);
+ value = mem + 8;
+
+ mem += 8 + length;
+ if (mem - start > nfp_cpp_area_size(area))
+ goto err_release_free;
+
+ switch (type) {
+ case NFP_BPF_CAP_TYPE_ADJUST_HEAD:
+ if (nfp_bpf_parse_cap_adjust_head(app->priv, value,
+ length))
+ goto err_release_free;
+ break;
+ default:
+ nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
+ break;
+ }
+ }
+ if (mem - start != nfp_cpp_area_size(area)) {
+ nfp_err(cpp, "BPF capabilities left after parsing, parsed:%zd total length:%zu\n",
+ mem - start, nfp_cpp_area_size(area));
+ goto err_release_free;
+ }
+
+ nfp_cpp_area_release_free(area);
+
+ return 0;
+
+err_release_free:
+ nfp_err(cpp, "invalid BPF capabilities at offset:%zd\n", mem - start);
+ nfp_cpp_area_release_free(area);
+ return -EINVAL;
+}
+
+static int nfp_bpf_init(struct nfp_app *app)
+{
+ struct nfp_app_bpf *bpf;
+ int err;
+
+ bpf = kzalloc(sizeof(*bpf), GFP_KERNEL);
+ if (!bpf)
+ return -ENOMEM;
+ bpf->app = app;
+ app->priv = bpf;
+
+ err = nfp_bpf_parse_capabilities(app);
+ if (err)
+ goto err_free_bpf;
+
+ return 0;
+
+err_free_bpf:
+ kfree(bpf);
+ return err;
+}
+
+static void nfp_bpf_clean(struct nfp_app *app)
+{
+ kfree(app->priv);
+}
+
const struct nfp_app_type app_bpf = {
.id = NFP_APP_BPF_NIC,
.name = "ebpf",
+ .init = nfp_bpf_init,
+ .clean = nfp_bpf_clean,
+
.extra_cap = nfp_bpf_extra_cap,
.vnic_alloc = nfp_app_nic_vnic_alloc,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 5884291..f49669b 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -78,6 +78,29 @@ enum pkt_vec {
#define NFP_BPF_ABI_FLAGS reg_imm(0)
#define NFP_BPF_ABI_FLAG_MARK 1
+/**
+ * struct nfp_app_bpf - bpf app priv structure
+ * @app: backpointer to the app
+ *
+ * @adjust_head: adjust head capability
+ * @flags: extra flags for adjust head
+ * @off_min: minimal packet offset within buffer required
+ * @off_max: maximum packet offset within buffer required
+ * @guaranteed_sub: amount of negative adjustment guaranteed possible
+ * @guaranteed_add: amount of positive adjustment guaranteed possible
+ */
+struct nfp_app_bpf {
+ struct nfp_app *app;
+
+ struct nfp_bpf_cap_adjust_head {
+ u32 flags;
+ int off_min;
+ int off_max;
+ int guaranteed_sub;
+ int guaranteed_add;
+ } adjust_head;
+};
+
struct nfp_prog;
struct nfp_insn_meta;
typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
@@ -97,6 +120,7 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
* @ptr: pointer type for memory operations
* @ldst_gather_len: memcpy length gathered from load/store sequence
* @paired_st: the paired store insn at the head of the sequence
+ * @arg2: arg2 for call instructions
* @ptr_not_const: pointer is not always constant
* @jmp_dst: destination info for jump instructions
* @off: index of first generated machine instruction (in nfp_prog.prog)
@@ -116,6 +140,7 @@ struct nfp_insn_meta {
bool ptr_not_const;
};
struct nfp_insn_meta *jmp_dst;
+ struct bpf_reg_state arg2;
};
unsigned int off;
unsigned short n;
@@ -160,6 +185,7 @@ static inline bool is_mbpf_store(const struct nfp_insn_meta *meta)
/**
* struct nfp_prog - nfp BPF program
+ * @bpf: backpointer to the bpf app priv structure
* @prog: machine code
* @prog_len: number of valid instructions in @prog array
* @__prog_alloc_len: alloc size of @prog array
@@ -173,9 +199,12 @@ static inline bool is_mbpf_store(const struct nfp_insn_meta *meta)
* @n_translated: number of successfully translated instructions (for errors)
* @error: error code if something went wrong
* @stack_depth: max stack depth from the verifier
+ * @adjust_head_location: if program has single adjust head call - the insn no.
* @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
*/
struct nfp_prog {
+ struct nfp_app_bpf *bpf;
+
u64 *prog;
unsigned int prog_len;
unsigned int __prog_alloc_len;
@@ -194,6 +223,7 @@ struct nfp_prog {
int error;
unsigned int stack_depth;
+ unsigned int adjust_head_location;
struct list_head insns;
};
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 377976c..fa2905e 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -48,6 +48,7 @@
#include <net/tc_act/tc_mirred.h>
#include "main.h"
+#include "../nfp_app.h"
#include "../nfp_net_ctrl.h"
#include "../nfp_net.h"
@@ -115,6 +116,7 @@ int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
INIT_LIST_HEAD(&nfp_prog->insns);
nfp_prog->type = prog->type;
+ nfp_prog->bpf = app->priv;
ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len);
if (ret)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index d2bf29c..9c26084 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -38,6 +38,7 @@
#include <linux/kernel.h>
#include <linux/pkt_cls.h>
+#include "fw.h"
#include "main.h"
struct nfp_insn_meta *
@@ -68,6 +69,73 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
return meta;
}
+static void
+nfp_record_adjust_head(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
+ struct nfp_insn_meta *meta,
+ const struct bpf_reg_state *reg2)
+{
+ unsigned int location = UINT_MAX;
+ int imm;
+
+ /* Datapath usually can give us guarantees on how much adjust head
+ * can be done without the need for any checks. Optimize the simple
+ * case where there is only one adjust head by a constant.
+ */
+ if (reg2->type != SCALAR_VALUE || !tnum_is_const(reg2->var_off))
+ goto exit_set_location;
+ imm = reg2->var_off.value;
+ /* Translator will skip all checks, we need to guarantee min pkt len */
+ if (imm > ETH_ZLEN - ETH_HLEN)
+ goto exit_set_location;
+ if (imm > (int)bpf->adjust_head.guaranteed_add ||
+ imm < -bpf->adjust_head.guaranteed_sub)
+ goto exit_set_location;
+
+ if (nfp_prog->adjust_head_location) {
+ /* Only one call per program allowed */
+ if (nfp_prog->adjust_head_location != meta->n)
+ goto exit_set_location;
+
+ if (meta->arg2.var_off.value != imm)
+ goto exit_set_location;
+ }
+
+ location = meta->n;
+exit_set_location:
+ nfp_prog->adjust_head_location = location;
+}
+
+static int
+nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
+ struct nfp_insn_meta *meta)
+{
+ const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2;
+ struct nfp_app_bpf *bpf = nfp_prog->bpf;
+ u32 func_id = meta->insn.imm;
+
+ switch (func_id) {
+ case BPF_FUNC_xdp_adjust_head:
+ if (!bpf->adjust_head.off_max) {
+ pr_warn("adjust_head not supported by FW\n");
+ return -EOPNOTSUPP;
+ }
+ if (!(bpf->adjust_head.flags & NFP_BPF_ADJUST_HEAD_NO_META)) {
+ pr_warn("adjust_head: FW requires shifting metadata, not supported by the driver\n");
+ return -EOPNOTSUPP;
+ }
+
+ nfp_record_adjust_head(bpf, nfp_prog, meta, reg2);
+ break;
+ default:
+ pr_warn("unsupported function id: %d\n", func_id);
+ return -EOPNOTSUPP;
+ }
+
+ meta->arg2 = *reg2;
+
+ return 0;
+}
+
static int
nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
struct bpf_verifier_env *env)
@@ -177,6 +245,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
return -EINVAL;
}
+ if (meta->insn.code == (BPF_JMP | BPF_CALL))
+ return nfp_bpf_check_call(nfp_prog, env, meta);
if (meta->insn.code == (BPF_JMP | BPF_EXIT))
return nfp_bpf_check_exit(nfp_prog, env);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 3387e69..a24daea 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -77,6 +77,7 @@
enum br_mask {
BR_BEQ = 0x00,
BR_BNE = 0x01,
+ BR_BMI = 0x02,
BR_BHS = 0x04,
BR_BLO = 0x05,
BR_BGE = 0x08,
@@ -175,6 +176,7 @@ enum alu_op {
ALU_OP_NONE = 0x00,
ALU_OP_ADD = 0x01,
ALU_OP_NOT = 0x04,
+ ALU_OP_ADD_2B = 0x05,
ALU_OP_AND = 0x08,
ALU_OP_SUB_C = 0x0d,
ALU_OP_ADD_C = 0x11,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index ad3e9f6..0add487 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3392,7 +3392,7 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
if (nn->dp.bpf_offload_xdp)
xdp->prog_attached = XDP_ATTACHED_HW;
xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0;
- xdp->flags = nn->xdp_prog ? nn->xdp_flags : 0;
+ xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0;
return 0;
case BPF_OFFLOAD_VERIFIER_PREP:
return nfp_app_bpf_verifier_prep(nn->app, nn, xdp);
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
index 5798adc..c8f2c06 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
@@ -242,6 +242,7 @@ int nfp_cpp_area_read(struct nfp_cpp_area *area, unsigned long offset,
void *buffer, size_t length);
int nfp_cpp_area_write(struct nfp_cpp_area *area, unsigned long offset,
const void *buffer, size_t length);
+size_t nfp_cpp_area_size(struct nfp_cpp_area *area);
const char *nfp_cpp_area_name(struct nfp_cpp_area *cpp_area);
void *nfp_cpp_area_priv(struct nfp_cpp_area *cpp_area);
struct nfp_cpp *nfp_cpp_area_cpp(struct nfp_cpp_area *cpp_area);
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
index 3fcb522..2826247 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
@@ -568,6 +568,17 @@ int nfp_cpp_area_write(struct nfp_cpp_area *area,
}
/**
+ * nfp_cpp_area_size() - return size of a CPP area
+ * @cpp_area: CPP area handle
+ *
+ * Return: Size of the area
+ */
+size_t nfp_cpp_area_size(struct nfp_cpp_area *cpp_area)
+{
+ return cpp_area->size;
+}
+
+/**
* nfp_cpp_area_name() - return name of a CPP area
* @cpp_area: CPP area handle
*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a8ecccf..5da18eb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -30,6 +30,7 @@
#include <linux/ratelimit.h>
#include <linux/uuid.h>
#include <linux/semaphore.h>
+#include <linux/bpf.h>
#include <asm/unaligned.h>
#include "ctree.h"
#include "disk-io.h"
@@ -3123,6 +3124,7 @@ recovery_tree_root:
goto fail_block_groups;
goto retry_root_backup;
}
+BPF_ALLOW_ERROR_INJECTION(open_ctree);
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
{
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 4426d1c..fb13828 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -22,6 +22,7 @@
#include <linux/slab.h>
#include <linux/math64.h>
#include <linux/ratelimit.h>
+#include <linux/bpf.h>
#include "ctree.h"
#include "free-space-cache.h"
#include "transaction.h"
@@ -332,6 +333,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
return 0;
}
+BPF_ALLOW_ERROR_INJECTION(io_ctl_init);
static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
{
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index ee8b707..a2e8582 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -136,6 +136,15 @@
#define KPROBE_BLACKLIST()
#endif
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+#define ERROR_INJECT_LIST() . = ALIGN(8); \
+ VMLINUX_SYMBOL(__start_kprobe_error_inject_list) = .; \
+ KEEP(*(_kprobe_error_inject_list)) \
+ VMLINUX_SYMBOL(__stop_kprobe_error_inject_list) = .;
+#else
+#define ERROR_INJECT_LIST()
+#endif
+
#ifdef CONFIG_EVENT_TRACING
#define FTRACE_EVENTS() . = ALIGN(8); \
VMLINUX_SYMBOL(__start_ftrace_events) = .; \
@@ -564,6 +573,7 @@
FTRACE_EVENTS() \
TRACE_SYSCALLS() \
KPROBE_BLACKLIST() \
+ ERROR_INJECT_LIST() \
MEM_DISCARD(init.rodata) \
CLK_OF_TABLES() \
RESERVEDMEM_OF_TABLES() \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e55e425..da54ef6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -200,6 +200,9 @@ struct bpf_prog_aux {
u32 max_ctx_offset;
u32 stack_depth;
u32 id;
+ u32 func_cnt;
+ struct bpf_prog **func;
+ void *jit_data; /* JIT specific data. arch dependent */
struct latch_tree_node ksym_tnode;
struct list_head ksym_lnode;
const struct bpf_prog_ops *ops;
@@ -285,6 +288,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
struct bpf_prog *old_prog);
+int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
+ __u32 __user *prog_ids, u32 request_cnt,
+ __u32 __user *prog_cnt);
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
@@ -399,6 +405,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
/* verify correctness of eBPF program */
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
+void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
/* Map specifics */
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
@@ -576,4 +583,15 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
void bpf_user_rnd_init_once(void);
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+#define BPF_ALLOW_ERROR_INJECTION(fname) \
+static unsigned long __used \
+ __attribute__((__section__("_kprobe_error_inject_list"))) \
+ _eil_addr_##fname = (unsigned long)fname;
+#else
+#define BPF_ALLOW_ERROR_INJECTION(fname)
+#endif
+#endif
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c561b98..aaac589 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -76,6 +76,14 @@ struct bpf_reg_state {
s64 smax_value; /* maximum possible (s64)value */
u64 umin_value; /* minimum possible (u64)value */
u64 umax_value; /* maximum possible (u64)value */
+ /* Inside the callee two registers can be both PTR_TO_STACK like
+ * R1=fp-8 and R2=fp-8, but one of them points to this function stack
+ * while another to the caller's stack. To differentiate them 'frameno'
+ * is used which is an index in bpf_verifier_state->frame[] array
+ * pointing to bpf_func_state.
+ * This field must be second to last, for states_equal() reasons.
+ */
+ u32 frameno;
/* This field must be last, for states_equal() reasons. */
enum bpf_reg_liveness live;
};
@@ -83,7 +91,8 @@ struct bpf_reg_state {
enum bpf_stack_slot_type {
STACK_INVALID, /* nothing was stored in this stack slot */
STACK_SPILL, /* register spilled into stack */
- STACK_MISC /* BPF program wrote some data into this slot */
+ STACK_MISC, /* BPF program wrote some data into this slot */
+ STACK_ZERO, /* BPF program wrote constant zero */
};
#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
@@ -96,13 +105,34 @@ struct bpf_stack_state {
/* state of the program:
* type of all registers and stack info
*/
-struct bpf_verifier_state {
+struct bpf_func_state {
struct bpf_reg_state regs[MAX_BPF_REG];
struct bpf_verifier_state *parent;
+ /* index of call instruction that called into this func */
+ int callsite;
+ /* stack frame number of this function state from pov of
+ * enclosing bpf_verifier_state.
+ * 0 = main function, 1 = first callee.
+ */
+ u32 frameno;
+ /* subprog number == index within subprog_stack_depth
+ * zero == main subprog
+ */
+ u32 subprogno;
+
+ /* should be second to last. See copy_func_state() */
int allocated_stack;
struct bpf_stack_state *stack;
};
+#define MAX_CALL_FRAMES 8
+struct bpf_verifier_state {
+ /* call stack tracking */
+ struct bpf_func_state *frame[MAX_CALL_FRAMES];
+ struct bpf_verifier_state *parent;
+ u32 curframe;
+};
+
/* linked list of verifier states used to prune search */
struct bpf_verifier_state_list {
struct bpf_verifier_state state;
@@ -113,6 +143,7 @@ struct bpf_insn_aux_data {
union {
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
+ s32 call_imm; /* saved imm field of call insn */
};
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
bool seen; /* this insn was processed by the verifier */
@@ -141,6 +172,8 @@ struct bpf_ext_analyzer_ops {
int insn_idx, int prev_insn_idx);
};
+#define BPF_MAX_SUBPROGS 256
+
/* single container for all structs
* one verifier_env per bpf_check() call
*/
@@ -159,13 +192,17 @@ struct bpf_verifier_env {
bool allow_ptr_leaks;
bool seen_direct_write;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
-
struct bpf_verifer_log log;
+ u32 subprog_starts[BPF_MAX_SUBPROGS];
+ u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1];
+ u32 subprog_cnt;
};
static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
{
- return env->cur_state->regs;
+ struct bpf_verifier_state *cur = env->cur_state;
+
+ return cur->frame[cur->curframe]->regs;
}
#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0062302..e872b4e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -58,6 +58,9 @@ struct bpf_prog_aux;
/* unused opcode to mark special call to bpf_tail_call() helper */
#define BPF_TAIL_CALL 0xf0
+/* unused opcode to mark call to interpreter with arguments */
+#define BPF_CALL_ARGS 0xe0
+
/* As per nm, we expose JITed images as text (code) section for
* kallsyms. That way, tools like perf can find it to match
* addresses.
@@ -455,10 +458,14 @@ struct bpf_binary_header {
struct bpf_prog {
u16 pages; /* Number of allocated pages */
u16 jited:1, /* Is our filter JIT'ed? */
+ jit_requested:1,/* archs need to JIT the prog */
locked:1, /* Program image locked? */
gpl_compatible:1, /* Is filter GPL compatible? */
cb_access:1, /* Is control block accessed? */
- dst_needed:1; /* Do we need dst entry? */
+ dst_needed:1, /* Do we need dst entry? */
+ blinded:1, /* Was blinded */
+ is_func:1, /* program is a bpf function */
+ kprobe_override:1; /* Do we override a kprobe? */
enum bpf_prog_type type; /* Type of BPF program */
u32 len; /* Number of filter blocks */
u32 jited_len; /* Size of jited insns in bytes */
@@ -709,6 +716,9 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+#define __bpf_call_base_args \
+ ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \
+ __bpf_call_base)
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
@@ -797,7 +807,7 @@ static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
return fp->jited && bpf_jit_is_ebpf();
}
-static inline bool bpf_jit_blinding_enabled(void)
+static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
{
/* These are the prerequisites, should someone ever have the
* idea to call blinding outside of them, we make sure to
@@ -805,7 +815,7 @@ static inline bool bpf_jit_blinding_enabled(void)
*/
if (!bpf_jit_is_ebpf())
return false;
- if (!bpf_jit_enable)
+ if (!prog->jit_requested)
return false;
if (!bpf_jit_harden)
return false;
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 9440a2f..963fd36 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -271,6 +271,7 @@ extern bool arch_kprobe_on_func_entry(unsigned long offset);
extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
extern bool within_kprobe_blacklist(unsigned long addr);
+extern bool within_kprobe_error_injection_list(unsigned long addr);
struct kprobe_insn_cache {
struct mutex mutex;
diff --git a/include/linux/module.h b/include/linux/module.h
index c69b49a..548fa09 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -475,6 +475,11 @@ struct module {
ctor_fn_t *ctors;
unsigned int num_ctors;
#endif
+
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+ unsigned int num_kprobe_ei_funcs;
+ unsigned long *kprobe_ei_funcs;
+#endif
} ____cacheline_aligned __randomize_layout;
#ifndef MODULE_ARCH_INIT
#define MODULE_ARCH_INIT {}
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index af44e7c..8a1442c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -467,6 +467,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
void perf_event_detach_bpf_prog(struct perf_event *event);
+int perf_event_query_prog_array(struct perf_event *event, void __user *info);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
@@ -481,6 +482,11 @@ perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
static inline void perf_event_detach_bpf_prog(struct perf_event *event) { }
+static inline int
+perf_event_query_prog_array(struct perf_event *event, void __user *info)
+{
+ return -EOPNOTSUPP;
+}
#endif
enum {
@@ -528,6 +534,7 @@ do { \
struct perf_event;
DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
+DECLARE_PER_CPU(int, bpf_kprobe_override);
extern int perf_trace_init(struct perf_event *event);
extern void perf_trace_destroy(struct perf_event *event);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 80d62e8..d01f1cb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -197,8 +197,14 @@ enum bpf_attach_type {
*/
#define BPF_F_STRICT_ALIGNMENT (1U << 0)
+/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
#define BPF_PSEUDO_MAP_FD 1
+/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
+ * offset to another bpf function
+ */
+#define BPF_PSEUDO_CALL 1
+
/* flags for BPF_MAP_UPDATE_ELEM command */
#define BPF_ANY 0 /* create new element or update existing */
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
@@ -677,6 +683,10 @@ union bpf_attr {
* @buf: buf to fill
* @buf_size: size of the buf
* Return : 0 on success or negative error code
+ *
+ * int bpf_override_return(pt_regs, rc)
+ * @pt_regs: pointer to struct pt_regs
+ * @rc: the return value to set
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -736,7 +746,8 @@ union bpf_attr {
FN(xdp_adjust_meta), \
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
- FN(getsockopt),
+ FN(getsockopt), \
+ FN(override_return),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b9a4953..7695336 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -418,6 +418,27 @@ struct perf_event_attr {
__u16 __reserved_2; /* align to __u64 */
};
+/*
+ * Structure used by below PERF_EVENT_IOC_QUERY_BPF command
+ * to query bpf programs attached to the same perf tracepoint
+ * as the given perf event.
+ */
+struct perf_event_query_bpf {
+ /*
+ * The below ids array length
+ */
+ __u32 ids_len;
+ /*
+ * Set by the kernel to indicate the number of
+ * available programs
+ */
+ __u32 prog_cnt;
+ /*
+ * User provided buffer to store program ids
+ */
+ __u32 ids[0];
+};
+
#define perf_flags(attr) (*(&(attr)->read_format + 1))
/*
@@ -433,6 +454,7 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 86b50aa..768e0a0 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -94,6 +94,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
fp->pages = size / PAGE_SIZE;
fp->aux = aux;
fp->aux->prog = fp;
+ fp->jit_requested = ebpf_jit_enabled();
INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode);
@@ -217,30 +218,40 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
return 0;
}
-static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn)
-{
- return BPF_CLASS(insn->code) == BPF_JMP &&
- /* Call and Exit are both special jumps with no
- * target inside the BPF instruction image.
- */
- BPF_OP(insn->code) != BPF_CALL &&
- BPF_OP(insn->code) != BPF_EXIT;
-}
-
static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta)
{
struct bpf_insn *insn = prog->insnsi;
u32 i, insn_cnt = prog->len;
+ bool pseudo_call;
+ u8 code;
+ int off;
for (i = 0; i < insn_cnt; i++, insn++) {
- if (!bpf_is_jmp_and_has_target(insn))
+ code = insn->code;
+ if (BPF_CLASS(code) != BPF_JMP)
+ continue;
+ if (BPF_OP(code) == BPF_EXIT)
continue;
+ if (BPF_OP(code) == BPF_CALL) {
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ pseudo_call = true;
+ else
+ continue;
+ } else {
+ pseudo_call = false;
+ }
+ off = pseudo_call ? insn->imm : insn->off;
/* Adjust offset of jmps if we cross boundaries. */
- if (i < pos && i + insn->off + 1 > pos)
- insn->off += delta;
- else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
- insn->off -= delta;
+ if (i < pos && i + off + 1 > pos)
+ off += delta;
+ else if (i > pos + delta && i + off + 1 <= pos + delta)
+ off -= delta;
+
+ if (pseudo_call)
+ insn->imm = off;
+ else
+ insn->off = off;
}
}
@@ -711,7 +722,7 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
struct bpf_insn *insn;
int i, rewritten;
- if (!bpf_jit_blinding_enabled())
+ if (!bpf_jit_blinding_enabled(prog) || prog->blinded)
return prog;
clone = bpf_prog_clone_create(prog, GFP_USER);
@@ -753,6 +764,7 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
i += insn_delta;
}
+ clone->blinded = 1;
return clone;
}
#endif /* CONFIG_BPF_JIT */
@@ -774,8 +786,7 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
*
* Decode and execute eBPF instructions.
*/
-static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
- u64 *stack)
+static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
u64 tmp;
static const void *jumptable[256] = {
@@ -835,6 +846,7 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
/* Call instruction */
[BPF_JMP | BPF_CALL] = &&JMP_CALL,
+ [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
/* Jumps */
[BPF_JMP | BPF_JA] = &&JMP_JA,
@@ -1025,6 +1037,13 @@ select_insn:
BPF_R4, BPF_R5);
CONT;
+ JMP_CALL_ARGS:
+ BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2,
+ BPF_R3, BPF_R4,
+ BPF_R5,
+ insn + insn->off + 1);
+ CONT;
+
JMP_TAIL_CALL: {
struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
struct bpf_array *array = container_of(map, struct bpf_array, map);
@@ -1297,6 +1316,23 @@ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn
return ___bpf_prog_run(regs, insn, stack); \
}
+#define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size
+#define DEFINE_BPF_PROG_RUN_ARGS(stack_size) \
+static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
+ const struct bpf_insn *insn) \
+{ \
+ u64 stack[stack_size / sizeof(u64)]; \
+ u64 regs[MAX_BPF_REG]; \
+\
+ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
+ BPF_R1 = r1; \
+ BPF_R2 = r2; \
+ BPF_R3 = r3; \
+ BPF_R4 = r4; \
+ BPF_R5 = r5; \
+ return ___bpf_prog_run(regs, insn, stack); \
+}
+
#define EVAL1(FN, X) FN(X)
#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
@@ -1308,6 +1344,10 @@ EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192);
EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384);
EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);
+EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 32, 64, 96, 128, 160, 192);
+EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 224, 256, 288, 320, 352, 384);
+EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512);
+
#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),
static unsigned int (*interpreters[])(const void *ctx,
@@ -1316,10 +1356,31 @@ EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
};
+#undef PROG_NAME_LIST
+#define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size),
+static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5,
+ const struct bpf_insn *insn) = {
+EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
+EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
+EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
+};
+#undef PROG_NAME_LIST
+
+void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
+{
+ stack_depth = max_t(u32, stack_depth, 1);
+ insn->off = (s16) insn->imm;
+ insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] -
+ __bpf_call_base_args;
+ insn->code = BPF_JMP | BPF_CALL_ARGS;
+}
bool bpf_prog_array_compatible(struct bpf_array *array,
const struct bpf_prog *fp)
{
+ if (fp->kprobe_override)
+ return false;
+
if (!array->owner_prog_type) {
/* There's no owner yet where we could check for
* compatibility.
@@ -1462,6 +1523,8 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
rcu_read_lock();
prog = rcu_dereference(progs)->progs;
for (; *prog; prog++) {
+ if (*prog == &dummy_bpf_prog.prog)
+ continue;
id = (*prog)->aux->id;
if (copy_to_user(prog_ids + i, &id, sizeof(id))) {
rcu_read_unlock();
@@ -1545,14 +1608,41 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
return 0;
}
+int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
+ __u32 __user *prog_ids, u32 request_cnt,
+ __u32 __user *prog_cnt)
+{
+ u32 cnt = 0;
+
+ if (array)
+ cnt = bpf_prog_array_length(array);
+
+ if (copy_to_user(prog_cnt, &cnt, sizeof(cnt)))
+ return -EFAULT;
+
+ /* return early if user requested only program count or nothing to copy */
+ if (!request_cnt || !cnt)
+ return 0;
+
+ return bpf_prog_array_copy_to_user(array, prog_ids, request_cnt);
+}
+
static void bpf_prog_free_deferred(struct work_struct *work)
{
struct bpf_prog_aux *aux;
+ int i;
aux = container_of(work, struct bpf_prog_aux, work);
if (bpf_prog_is_dev_bound(aux))
bpf_prog_offload_destroy(aux->prog);
- bpf_jit_free(aux->prog);
+ for (i = 0; i < aux->func_cnt; i++)
+ bpf_jit_free(aux->func[i]);
+ if (aux->func_cnt) {
+ kfree(aux->func);
+ bpf_prog_unlock_free(aux->prog);
+ } else {
+ bpf_jit_free(aux->prog);
+ }
}
/* Free internal BPF program */
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index e682850..883f88f 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -189,8 +189,12 @@ void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env,
u8 opcode = BPF_OP(insn->code);
if (opcode == BPF_CALL) {
- verbose(env, "(%02x) call %s#%d\n", insn->code,
- func_id_name(insn->imm), insn->imm);
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ verbose(env, "(%02x) call pc%+d\n", insn->code,
+ insn->imm);
+ else
+ verbose(env, "(%02x) call %s#%d\n", insn->code,
+ func_id_name(insn->imm), insn->imm);
} else if (insn->code == (BPF_JMP | BPF_JA)) {
verbose(env, "(%02x) goto pc%+d\n",
insn->code, insn->off);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2c4cfea..e2e1c78 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1194,7 +1194,8 @@ static int bpf_prog_load(union bpf_attr *attr)
goto free_used_maps;
/* eBPF program is ready to be JITed */
- prog = bpf_prog_select_runtime(prog, &err);
+ if (!prog->bpf_func)
+ prog = bpf_prog_select_runtime(prog, &err);
if (err < 0)
goto free_used_maps;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7afa92e..48b2901 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20,6 +20,8 @@
#include <linux/file.h>
#include <linux/vmalloc.h>
#include <linux/stringify.h>
+#include <linux/bsearch.h>
+#include <linux/sort.h>
#include "disasm.h"
@@ -227,13 +229,23 @@ static void print_liveness(struct bpf_verifier_env *env,
verbose(env, "w");
}
+static struct bpf_func_state *func(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg)
+{
+ struct bpf_verifier_state *cur = env->cur_state;
+
+ return cur->frame[reg->frameno];
+}
+
static void print_verifier_state(struct bpf_verifier_env *env,
- struct bpf_verifier_state *state)
+ const struct bpf_func_state *state)
{
- struct bpf_reg_state *reg;
+ const struct bpf_reg_state *reg;
enum bpf_reg_type t;
int i;
+ if (state->frameno)
+ verbose(env, " frame%d:", state->frameno);
for (i = 0; i < MAX_BPF_REG; i++) {
reg = &state->regs[i];
t = reg->type;
@@ -246,6 +258,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
tnum_is_const(reg->var_off)) {
/* reg->off should be 0 for SCALAR_VALUE */
verbose(env, "%lld", reg->var_off.value + reg->off);
+ if (t == PTR_TO_STACK)
+ verbose(env, ",call_%d", func(env, reg)->callsite);
} else {
verbose(env, "(id=%d", reg->id);
if (t != SCALAR_VALUE)
@@ -297,12 +311,14 @@ static void print_verifier_state(struct bpf_verifier_env *env,
verbose(env, "=%s",
reg_type_str[state->stack[i].spilled_ptr.type]);
}
+ if (state->stack[i].slot_type[0] == STACK_ZERO)
+ verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE);
}
verbose(env, "\n");
}
-static int copy_stack_state(struct bpf_verifier_state *dst,
- const struct bpf_verifier_state *src)
+static int copy_stack_state(struct bpf_func_state *dst,
+ const struct bpf_func_state *src)
{
if (!src->stack)
return 0;
@@ -318,13 +334,13 @@ static int copy_stack_state(struct bpf_verifier_state *dst,
/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
* make it consume minimal amount of memory. check_stack_write() access from
- * the program calls into realloc_verifier_state() to grow the stack size.
+ * the program calls into realloc_func_state() to grow the stack size.
* Note there is a non-zero 'parent' pointer inside bpf_verifier_state
* which this function copies over. It points to previous bpf_verifier_state
* which is never reallocated
*/
-static int realloc_verifier_state(struct bpf_verifier_state *state, int size,
- bool copy_old)
+static int realloc_func_state(struct bpf_func_state *state, int size,
+ bool copy_old)
{
u32 old_size = state->allocated_stack;
struct bpf_stack_state *new_stack;
@@ -357,10 +373,21 @@ static int realloc_verifier_state(struct bpf_verifier_state *state, int size,
return 0;
}
+static void free_func_state(struct bpf_func_state *state)
+{
+ kfree(state->stack);
+ kfree(state);
+}
+
static void free_verifier_state(struct bpf_verifier_state *state,
bool free_self)
{
- kfree(state->stack);
+ int i;
+
+ for (i = 0; i <= state->curframe; i++) {
+ free_func_state(state->frame[i]);
+ state->frame[i] = NULL;
+ }
if (free_self)
kfree(state);
}
@@ -368,18 +395,46 @@ static void free_verifier_state(struct bpf_verifier_state *state,
/* copy verifier state from src to dst growing dst stack space
* when necessary to accommodate larger src stack
*/
-static int copy_verifier_state(struct bpf_verifier_state *dst,
- const struct bpf_verifier_state *src)
+static int copy_func_state(struct bpf_func_state *dst,
+ const struct bpf_func_state *src)
{
int err;
- err = realloc_verifier_state(dst, src->allocated_stack, false);
+ err = realloc_func_state(dst, src->allocated_stack, false);
if (err)
return err;
- memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack));
+ memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
return copy_stack_state(dst, src);
}
+static int copy_verifier_state(struct bpf_verifier_state *dst_state,
+ const struct bpf_verifier_state *src)
+{
+ struct bpf_func_state *dst;
+ int i, err;
+
+ /* if dst has more stack frames then src frame, free them */
+ for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
+ free_func_state(dst_state->frame[i]);
+ dst_state->frame[i] = NULL;
+ }
+ dst_state->curframe = src->curframe;
+ dst_state->parent = src->parent;
+ for (i = 0; i <= src->curframe; i++) {
+ dst = dst_state->frame[i];
+ if (!dst) {
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return -ENOMEM;
+ dst_state->frame[i] = dst;
+ }
+ err = copy_func_state(dst, src->frame[i]);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
int *insn_idx)
{
@@ -441,6 +496,10 @@ err:
static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
+#define CALLEE_SAVED_REGS 5
+static const int callee_saved[CALLEE_SAVED_REGS] = {
+ BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
+};
static void __mark_reg_not_init(struct bpf_reg_state *reg);
@@ -465,6 +524,13 @@ static void __mark_reg_known_zero(struct bpf_reg_state *reg)
__mark_reg_known(reg, 0);
}
+static void __mark_reg_const_zero(struct bpf_reg_state *reg)
+{
+ __mark_reg_known(reg, 0);
+ reg->off = 0;
+ reg->type = SCALAR_VALUE;
+}
+
static void mark_reg_known_zero(struct bpf_verifier_env *env,
struct bpf_reg_state *regs, u32 regno)
{
@@ -576,6 +642,7 @@ static void __mark_reg_unknown(struct bpf_reg_state *reg)
reg->id = 0;
reg->off = 0;
reg->var_off = tnum_unknown;
+ reg->frameno = 0;
__mark_reg_unbounded(reg);
}
@@ -612,8 +679,9 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
}
static void init_reg_state(struct bpf_verifier_env *env,
- struct bpf_reg_state *regs)
+ struct bpf_func_state *state)
{
+ struct bpf_reg_state *regs = state->regs;
int i;
for (i = 0; i < MAX_BPF_REG; i++) {
@@ -624,41 +692,217 @@ static void init_reg_state(struct bpf_verifier_env *env,
/* frame pointer */
regs[BPF_REG_FP].type = PTR_TO_STACK;
mark_reg_known_zero(env, regs, BPF_REG_FP);
+ regs[BPF_REG_FP].frameno = state->frameno;
/* 1st arg to a function */
regs[BPF_REG_1].type = PTR_TO_CTX;
mark_reg_known_zero(env, regs, BPF_REG_1);
}
+#define BPF_MAIN_FUNC (-1)
+static void init_func_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *state,
+ int callsite, int frameno, int subprogno)
+{
+ state->callsite = callsite;
+ state->frameno = frameno;
+ state->subprogno = subprogno;
+ init_reg_state(env, state);
+}
+
enum reg_arg_type {
SRC_OP, /* register is used as source operand */
DST_OP, /* register is used as destination operand */
DST_OP_NO_MARK /* same as above, check only, don't mark */
};
-static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno)
+static int cmp_subprogs(const void *a, const void *b)
+{
+ return *(int *)a - *(int *)b;
+}
+
+static int find_subprog(struct bpf_verifier_env *env, int off)
{
- struct bpf_verifier_state *parent = state->parent;
+ u32 *p;
+
+ p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
+ sizeof(env->subprog_starts[0]), cmp_subprogs);
+ if (!p)
+ return -ENOENT;
+ return p - env->subprog_starts;
+
+}
+
+static int add_subprog(struct bpf_verifier_env *env, int off)
+{
+ int insn_cnt = env->prog->len;
+ int ret;
+
+ if (off >= insn_cnt || off < 0) {
+ verbose(env, "call to invalid destination\n");
+ return -EINVAL;
+ }
+ ret = find_subprog(env, off);
+ if (ret >= 0)
+ return 0;
+ if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
+ verbose(env, "too many subprograms\n");
+ return -E2BIG;
+ }
+ env->subprog_starts[env->subprog_cnt++] = off;
+ sort(env->subprog_starts, env->subprog_cnt,
+ sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
+ return 0;
+}
+
+static int check_subprogs(struct bpf_verifier_env *env)
+{
+ int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+
+ /* determine subprog starts. The end is one before the next starts */
+ for (i = 0; i < insn_cnt; i++) {
+ if (insn[i].code != (BPF_JMP | BPF_CALL))
+ continue;
+ if (insn[i].src_reg != BPF_PSEUDO_CALL)
+ continue;
+ if (!env->allow_ptr_leaks) {
+ verbose(env, "function calls to other bpf functions are allowed for root only\n");
+ return -EPERM;
+ }
+ if (bpf_prog_is_dev_bound(env->prog->aux)) {
+ verbose(env, "funcation calls in offloaded programs are not supported yet\n");
+ return -EINVAL;
+ }
+ ret = add_subprog(env, i + insn[i].imm + 1);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (env->log.level > 1)
+ for (i = 0; i < env->subprog_cnt; i++)
+ verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
+
+ /* now check that all jumps are within the same subprog */
+ subprog_start = 0;
+ if (env->subprog_cnt == cur_subprog)
+ subprog_end = insn_cnt;
+ else
+ subprog_end = env->subprog_starts[cur_subprog++];
+ for (i = 0; i < insn_cnt; i++) {
+ u8 code = insn[i].code;
+
+ if (BPF_CLASS(code) != BPF_JMP)
+ goto next;
+ if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
+ goto next;
+ off = i + insn[i].off + 1;
+ if (off < subprog_start || off >= subprog_end) {
+ verbose(env, "jump out of range from insn %d to %d\n", i, off);
+ return -EINVAL;
+ }
+next:
+ if (i == subprog_end - 1) {
+ /* to avoid fall-through from one subprog into another
+ * the last insn of the subprog should be either exit
+ * or unconditional jump back
+ */
+ if (code != (BPF_JMP | BPF_EXIT) &&
+ code != (BPF_JMP | BPF_JA)) {
+ verbose(env, "last insn is not an exit or jmp\n");
+ return -EINVAL;
+ }
+ subprog_start = subprog_end;
+ if (env->subprog_cnt == cur_subprog)
+ subprog_end = insn_cnt;
+ else
+ subprog_end = env->subprog_starts[cur_subprog++];
+ }
+ }
+ return 0;
+}
+
+struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env,
+ const struct bpf_verifier_state *state,
+ struct bpf_verifier_state *parent,
+ u32 regno)
+{
+ struct bpf_verifier_state *tmp = NULL;
+
+ /* 'parent' could be a state of caller and
+ * 'state' could be a state of callee. In such case
+ * parent->curframe < state->curframe
+ * and it's ok for r1 - r5 registers
+ *
+ * 'parent' could be a callee's state after it bpf_exit-ed.
+ * In such case parent->curframe > state->curframe
+ * and it's ok for r0 only
+ */
+ if (parent->curframe == state->curframe ||
+ (parent->curframe < state->curframe &&
+ regno >= BPF_REG_1 && regno <= BPF_REG_5) ||
+ (parent->curframe > state->curframe &&
+ regno == BPF_REG_0))
+ return parent;
+
+ if (parent->curframe > state->curframe &&
+ regno >= BPF_REG_6) {
+ /* for callee saved regs we have to skip the whole chain
+ * of states that belong to callee and mark as LIVE_READ
+ * the registers before the call
+ */
+ tmp = parent;
+ while (tmp && tmp->curframe != state->curframe) {
+ tmp = tmp->parent;
+ }
+ if (!tmp)
+ goto bug;
+ parent = tmp;
+ } else {
+ goto bug;
+ }
+ return parent;
+bug:
+ verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp);
+ verbose(env, "regno %d parent frame %d current frame %d\n",
+ regno, parent->curframe, state->curframe);
+ return 0;
+}
+
+static int mark_reg_read(struct bpf_verifier_env *env,
+ const struct bpf_verifier_state *state,
+ struct bpf_verifier_state *parent,
+ u32 regno)
+{
+ bool writes = parent == state->parent; /* Observe write marks */
if (regno == BPF_REG_FP)
/* We don't need to worry about FP liveness because it's read-only */
- return;
+ return 0;
while (parent) {
/* if read wasn't screened by an earlier write ... */
- if (state->regs[regno].live & REG_LIVE_WRITTEN)
+ if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN)
break;
+ parent = skip_callee(env, state, parent, regno);
+ if (!parent)
+ return -EFAULT;
/* ... then we depend on parent's value */
- parent->regs[regno].live |= REG_LIVE_READ;
+ parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ;
state = parent;
parent = state->parent;
+ writes = true;
}
+ return 0;
}
static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
enum reg_arg_type t)
{
- struct bpf_reg_state *regs = env->cur_state->regs;
+ struct bpf_verifier_state *vstate = env->cur_state;
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ struct bpf_reg_state *regs = state->regs;
if (regno >= MAX_BPF_REG) {
verbose(env, "R%d is invalid\n", regno);
@@ -671,7 +915,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
verbose(env, "R%d !read_ok\n", regno);
return -EACCES;
}
- mark_reg_read(env->cur_state, regno);
+ return mark_reg_read(env, vstate, vstate->parent, regno);
} else {
/* check whether register used as dest operand can be written to */
if (regno == BPF_REG_FP) {
@@ -702,17 +946,25 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
}
}
+/* Does this register contain a constant zero? */
+static bool register_is_null(struct bpf_reg_state *reg)
+{
+ return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
+}
+
/* check_stack_read/write functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
static int check_stack_write(struct bpf_verifier_env *env,
- struct bpf_verifier_state *state, int off,
- int size, int value_regno)
+ struct bpf_func_state *state, /* func where register points to */
+ int off, int size, int value_regno)
{
+ struct bpf_func_state *cur; /* state of the current function */
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
+ enum bpf_reg_type type;
- err = realloc_verifier_state(state, round_up(slot + 1, BPF_REG_SIZE),
- true);
+ err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
+ true);
if (err)
return err;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -725,8 +977,9 @@ static int check_stack_write(struct bpf_verifier_env *env,
return -EACCES;
}
+ cur = env->cur_state->frame[env->cur_state->curframe];
if (value_regno >= 0 &&
- is_spillable_regtype(state->regs[value_regno].type)) {
+ is_spillable_regtype((type = cur->regs[value_regno].type))) {
/* register containing pointer is being spilled into stack */
if (size != BPF_REG_SIZE) {
@@ -734,51 +987,116 @@ static int check_stack_write(struct bpf_verifier_env *env,
return -EACCES;
}
+ if (state != cur && type == PTR_TO_STACK) {
+ verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
+ return -EINVAL;
+ }
+
/* save register state */
- state->stack[spi].spilled_ptr = state->regs[value_regno];
+ state->stack[spi].spilled_ptr = cur->regs[value_regno];
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
for (i = 0; i < BPF_REG_SIZE; i++)
state->stack[spi].slot_type[i] = STACK_SPILL;
} else {
+ u8 type = STACK_MISC;
+
/* regular write of data into stack */
state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
+ /* only mark the slot as written if all 8 bytes were written
+ * otherwise read propagation may incorrectly stop too soon
+ * when stack slots are partially written.
+ * This heuristic means that read propagation will be
+ * conservative, since it will add reg_live_read marks
+ * to stack slots all the way to first state when programs
+ * writes+reads less than 8 bytes
+ */
+ if (size == BPF_REG_SIZE)
+ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+
+ /* when we zero initialize stack slots mark them as such */
+ if (value_regno >= 0 &&
+ register_is_null(&cur->regs[value_regno]))
+ type = STACK_ZERO;
+
for (i = 0; i < size; i++)
state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
- STACK_MISC;
+ type;
}
return 0;
}
-static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slot)
+/* registers of every function are unique and mark_reg_read() propagates
+ * the liveness in the following cases:
+ * - from callee into caller for R1 - R5 that were used as arguments
+ * - from caller into callee for R0 that used as result of the call
+ * - from caller to the same caller skipping states of the callee for R6 - R9,
+ * since R6 - R9 are callee saved by implicit function prologue and
+ * caller's R6 != callee's R6, so when we propagate liveness up to
+ * parent states we need to skip callee states for R6 - R9.
+ *
+ * stack slot marking is different, since stacks of caller and callee are
+ * accessible in both (since caller can pass a pointer to caller's stack to
+ * callee which can pass it to another function), hence mark_stack_slot_read()
+ * has to propagate the stack liveness to all parent states at given frame number.
+ * Consider code:
+ * f1() {
+ * ptr = fp - 8;
+ * *ptr = ctx;
+ * call f2 {
+ * .. = *ptr;
+ * }
+ * .. = *ptr;
+ * }
+ * First *ptr is reading from f1's stack and mark_stack_slot_read() has
+ * to mark liveness at the f1's frame and not f2's frame.
+ * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has
+ * to propagate liveness to f2 states at f1's frame level and further into
+ * f1 states at f1's frame level until write into that stack slot
+ */
+static void mark_stack_slot_read(struct bpf_verifier_env *env,
+ const struct bpf_verifier_state *state,
+ struct bpf_verifier_state *parent,
+ int slot, int frameno)
{
- struct bpf_verifier_state *parent = state->parent;
+ bool writes = parent == state->parent; /* Observe write marks */
while (parent) {
+ if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE)
+ /* since LIVE_WRITTEN mark is only done for full 8-byte
+ * write the read marks are conservative and parent
+ * state may not even have the stack allocated. In such case
+ * end the propagation, since the loop reached beginning
+ * of the function
+ */
+ break;
/* if read wasn't screened by an earlier write ... */
- if (state->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
+ if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
break;
/* ... then we depend on parent's value */
- parent->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
+ parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
state = parent;
parent = state->parent;
+ writes = true;
}
}
static int check_stack_read(struct bpf_verifier_env *env,
- struct bpf_verifier_state *state, int off, int size,
- int value_regno)
+ struct bpf_func_state *reg_state /* func where register points to */,
+ int off, int size, int value_regno)
{
+ struct bpf_verifier_state *vstate = env->cur_state;
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
u8 *stype;
- if (state->allocated_stack <= slot) {
+ if (reg_state->allocated_stack <= slot) {
verbose(env, "invalid read from stack off %d+0 size %d\n",
off, size);
return -EACCES;
}
- stype = state->stack[spi].slot_type;
+ stype = reg_state->stack[spi].slot_type;
if (stype[0] == STACK_SPILL) {
if (size != BPF_REG_SIZE) {
@@ -794,26 +1112,44 @@ static int check_stack_read(struct bpf_verifier_env *env,
if (value_regno >= 0) {
/* restore register state from stack */
- state->regs[value_regno] = state->stack[spi].spilled_ptr;
+ state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
/* mark reg as written since spilled pointer state likely
* has its liveness marks cleared by is_state_visited()
* which resets stack/reg liveness for state transitions
*/
state->regs[value_regno].live |= REG_LIVE_WRITTEN;
- mark_stack_slot_read(state, spi);
}
+ mark_stack_slot_read(env, vstate, vstate->parent, spi,
+ reg_state->frameno);
return 0;
} else {
+ int zeros = 0;
+
for (i = 0; i < size; i++) {
- if (stype[(slot - i) % BPF_REG_SIZE] != STACK_MISC) {
- verbose(env, "invalid read from stack off %d+%d size %d\n",
- off, i, size);
- return -EACCES;
+ if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
+ continue;
+ if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
+ zeros++;
+ continue;
}
+ verbose(env, "invalid read from stack off %d+%d size %d\n",
+ off, i, size);
+ return -EACCES;
+ }
+ mark_stack_slot_read(env, vstate, vstate->parent, spi,
+ reg_state->frameno);
+ if (value_regno >= 0) {
+ if (zeros == size) {
+ /* any size read into register is zero extended,
+ * so the whole register == const_zero
+ */
+ __mark_reg_const_zero(&state->regs[value_regno]);
+ } else {
+ /* have read misc data from the stack */
+ mark_reg_unknown(env, state->regs, value_regno);
+ }
+ state->regs[value_regno].live |= REG_LIVE_WRITTEN;
}
- if (value_regno >= 0)
- /* have read misc data from the stack */
- mark_reg_unknown(env, state->regs, value_regno);
return 0;
}
}
@@ -838,7 +1174,8 @@ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
static int check_map_access(struct bpf_verifier_env *env, u32 regno,
int off, int size, bool zero_size_allowed)
{
- struct bpf_verifier_state *state = env->cur_state;
+ struct bpf_verifier_state *vstate = env->cur_state;
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
struct bpf_reg_state *reg = &state->regs[regno];
int err;
@@ -1088,6 +1425,54 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
strict);
}
+static int update_stack_depth(struct bpf_verifier_env *env,
+ const struct bpf_func_state *func,
+ int off)
+{
+ u16 stack = env->subprog_stack_depth[func->subprogno], total = 0;
+ struct bpf_verifier_state *cur = env->cur_state;
+ int i;
+
+ if (stack >= -off)
+ return 0;
+
+ /* update known max for given subprogram */
+ env->subprog_stack_depth[func->subprogno] = -off;
+
+ /* compute the total for current call chain */
+ for (i = 0; i <= cur->curframe; i++) {
+ u32 depth = env->subprog_stack_depth[cur->frame[i]->subprogno];
+
+ /* round up to 32-bytes, since this is granularity
+ * of interpreter stack sizes
+ */
+ depth = round_up(depth, 32);
+ total += depth;
+ }
+
+ if (total > MAX_BPF_STACK) {
+ verbose(env, "combined stack size of %d calls is %d. Too large\n",
+ cur->curframe, total);
+ return -EACCES;
+ }
+ return 0;
+}
+
+static int get_callee_stack_depth(struct bpf_verifier_env *env,
+ const struct bpf_insn *insn, int idx)
+{
+ int start = idx + insn->imm + 1, subprog;
+
+ subprog = find_subprog(env, start);
+ if (subprog < 0) {
+ WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+ start);
+ return -EFAULT;
+ }
+ subprog++;
+ return env->subprog_stack_depth[subprog];
+}
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
@@ -1098,9 +1483,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
int bpf_size, enum bpf_access_type t,
int value_regno)
{
- struct bpf_verifier_state *state = env->cur_state;
struct bpf_reg_state *regs = cur_regs(env);
struct bpf_reg_state *reg = regs + regno;
+ struct bpf_func_state *state;
int size, err = 0;
size = bpf_size_to_bytes(bpf_size);
@@ -1189,8 +1574,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return -EACCES;
}
- if (env->prog->aux->stack_depth < -off)
- env->prog->aux->stack_depth = -off;
+ state = func(env, reg);
+ err = update_stack_depth(env, state, off);
+ if (err)
+ return err;
if (t == BPF_WRITE)
err = check_stack_write(env, state, off, size,
@@ -1264,12 +1651,6 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
BPF_SIZE(insn->code), BPF_WRITE, -1);
}
-/* Does this register contain a constant zero? */
-static bool register_is_null(struct bpf_reg_state *reg)
-{
- return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
-}
-
/* when register 'regno' is passed into function that will read 'access_size'
* bytes from that pointer, make sure that it's within stack boundary
* and all elements of stack are initialized.
@@ -1281,7 +1662,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *reg = cur_regs(env) + regno;
- struct bpf_verifier_state *state = env->cur_state;
+ struct bpf_func_state *state = func(env, reg);
int off, i, slot, spi;
if (reg->type != PTR_TO_STACK) {
@@ -1312,9 +1693,6 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
return -EACCES;
}
- if (env->prog->aux->stack_depth < -off)
- env->prog->aux->stack_depth = -off;
-
if (meta && meta->raw_mode) {
meta->access_size = access_size;
meta->regno = regno;
@@ -1322,17 +1700,32 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
}
for (i = 0; i < access_size; i++) {
+ u8 *stype;
+
slot = -(off + i) - 1;
spi = slot / BPF_REG_SIZE;
- if (state->allocated_stack <= slot ||
- state->stack[spi].slot_type[slot % BPF_REG_SIZE] !=
- STACK_MISC) {
- verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
- off, i, access_size);
- return -EACCES;
+ if (state->allocated_stack <= slot)
+ goto err;
+ stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
+ if (*stype == STACK_MISC)
+ goto mark;
+ if (*stype == STACK_ZERO) {
+ /* helper can write anything into the stack */
+ *stype = STACK_MISC;
+ goto mark;
}
+err:
+ verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
+ off, i, access_size);
+ return -EACCES;
+mark:
+ /* reading any byte out of 8-byte 'spill_slot' will cause
+ * the whole slot to be marked as 'read'
+ */
+ mark_stack_slot_read(env, env->cur_state, env->cur_state->parent,
+ spi, state->frameno);
}
- return 0;
+ return update_stack_depth(env, state, off);
}
static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
@@ -1585,6 +1978,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_tail_call:
if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
goto error;
+ if (env->subprog_cnt) {
+ verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
+ return -EINVAL;
+ }
break;
case BPF_FUNC_perf_event_read:
case BPF_FUNC_perf_event_output:
@@ -1646,9 +2043,9 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
* are now invalid, so turn them into unknown SCALAR_VALUE.
*/
-static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
+static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
+ struct bpf_func_state *state)
{
- struct bpf_verifier_state *state = env->cur_state;
struct bpf_reg_state *regs = state->regs, *reg;
int i;
@@ -1665,7 +2062,121 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
}
}
-static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
+static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
+{
+ struct bpf_verifier_state *vstate = env->cur_state;
+ int i;
+
+ for (i = 0; i <= vstate->curframe; i++)
+ __clear_all_pkt_pointers(env, vstate->frame[i]);
+}
+
+static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ int *insn_idx)
+{
+ struct bpf_verifier_state *state = env->cur_state;
+ struct bpf_func_state *caller, *callee;
+ int i, subprog, target_insn;
+
+ if (state->curframe >= MAX_CALL_FRAMES) {
+ verbose(env, "the call stack of %d frames is too deep\n",
+ state->curframe);
+ return -E2BIG;
+ }
+
+ target_insn = *insn_idx + insn->imm;
+ subprog = find_subprog(env, target_insn + 1);
+ if (subprog < 0) {
+ verbose(env, "verifier bug. No program starts at insn %d\n",
+ target_insn + 1);
+ return -EFAULT;
+ }
+
+ caller = state->frame[state->curframe];
+ if (state->frame[state->curframe + 1]) {
+ verbose(env, "verifier bug. Frame %d already allocated\n",
+ state->curframe + 1);
+ return -EFAULT;
+ }
+
+ callee = kzalloc(sizeof(*callee), GFP_KERNEL);
+ if (!callee)
+ return -ENOMEM;
+ state->frame[state->curframe + 1] = callee;
+
+ /* callee cannot access r0, r6 - r9 for reading and has to write
+ * into its own stack before reading from it.
+ * callee can read/write into caller's stack
+ */
+ init_func_state(env, callee,
+ /* remember the callsite, it will be used by bpf_exit */
+ *insn_idx /* callsite */,
+ state->curframe + 1 /* frameno within this callchain */,
+ subprog + 1 /* subprog number within this prog */);
+
+ /* copy r1 - r5 args that callee can access */
+ for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+ callee->regs[i] = caller->regs[i];
+
+ /* after the call regsiters r0 - r5 were scratched */
+ for (i = 0; i < CALLER_SAVED_REGS; i++) {
+ mark_reg_not_init(env, caller->regs, caller_saved[i]);
+ check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
+ }
+
+ /* only increment it after check_reg_arg() finished */
+ state->curframe++;
+
+ /* and go analyze first insn of the callee */
+ *insn_idx = target_insn;
+
+ if (env->log.level) {
+ verbose(env, "caller:\n");
+ print_verifier_state(env, caller);
+ verbose(env, "callee:\n");
+ print_verifier_state(env, callee);
+ }
+ return 0;
+}
+
+static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
+{
+ struct bpf_verifier_state *state = env->cur_state;
+ struct bpf_func_state *caller, *callee;
+ struct bpf_reg_state *r0;
+
+ callee = state->frame[state->curframe];
+ r0 = &callee->regs[BPF_REG_0];
+ if (r0->type == PTR_TO_STACK) {
+ /* technically it's ok to return caller's stack pointer
+ * (or caller's caller's pointer) back to the caller,
+ * since these pointers are valid. Only current stack
+ * pointer will be invalid as soon as function exits,
+ * but let's be conservative
+ */
+ verbose(env, "cannot return stack pointer to the caller\n");
+ return -EINVAL;
+ }
+
+ state->curframe--;
+ caller = state->frame[state->curframe];
+ /* return to the caller whatever r0 had in the callee */
+ caller->regs[BPF_REG_0] = *r0;
+
+ *insn_idx = callee->callsite + 1;
+ if (env->log.level) {
+ verbose(env, "returning from callee:\n");
+ print_verifier_state(env, callee);
+ verbose(env, "to caller at %d:\n", *insn_idx);
+ print_verifier_state(env, caller);
+ }
+ /* clear everything in the callee */
+ free_func_state(callee);
+ state->frame[state->curframe + 1] = NULL;
+ return 0;
+}
+
+static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
{
const struct bpf_func_proto *fn = NULL;
struct bpf_reg_state *regs;
@@ -1825,7 +2336,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
const struct bpf_reg_state *ptr_reg,
const struct bpf_reg_state *off_reg)
{
- struct bpf_reg_state *regs = cur_regs(env), *dst_reg;
+ struct bpf_verifier_state *vstate = env->cur_state;
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ struct bpf_reg_state *regs = state->regs, *dst_reg;
bool known = tnum_is_const(off_reg->var_off);
s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
@@ -1837,13 +2350,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst_reg = &regs[dst];
if (WARN_ON_ONCE(known && (smin_val != smax_val))) {
- print_verifier_state(env, env->cur_state);
+ print_verifier_state(env, state);
verbose(env,
"verifier internal error: known but bad sbounds\n");
return -EINVAL;
}
if (WARN_ON_ONCE(known && (umin_val != umax_val))) {
- print_verifier_state(env, env->cur_state);
+ print_verifier_state(env, state);
verbose(env,
"verifier internal error: known but bad ubounds\n");
return -EINVAL;
@@ -2245,7 +2758,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
struct bpf_insn *insn)
{
- struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
+ struct bpf_verifier_state *vstate = env->cur_state;
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
u8 opcode = BPF_OP(insn->code);
int rc;
@@ -2319,12 +2834,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
/* Got here implies adding two SCALAR_VALUEs */
if (WARN_ON_ONCE(ptr_reg)) {
- print_verifier_state(env, env->cur_state);
+ print_verifier_state(env, state);
verbose(env, "verifier internal error: unexpected ptr_reg\n");
return -EINVAL;
}
if (WARN_ON(!src_reg)) {
- print_verifier_state(env, env->cur_state);
+ print_verifier_state(env, state);
verbose(env, "verifier internal error: no src_reg\n");
return -EINVAL;
}
@@ -2478,14 +2993,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
-static void find_good_pkt_pointers(struct bpf_verifier_state *state,
+static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
struct bpf_reg_state *dst_reg,
enum bpf_reg_type type,
bool range_right_open)
{
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
struct bpf_reg_state *regs = state->regs, *reg;
u16 new_range;
- int i;
+ int i, j;
if (dst_reg->off < 0 ||
(dst_reg->off == 0 && range_right_open))
@@ -2555,12 +3071,15 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
/* keep the maximum range already checked */
regs[i].range = max(regs[i].range, new_range);
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (state->stack[i].slot_type[0] != STACK_SPILL)
- continue;
- reg = &state->stack[i].spilled_ptr;
- if (reg->type == type && reg->id == dst_reg->id)
- reg->range = max(reg->range, new_range);
+ for (j = 0; j <= vstate->curframe; j++) {
+ state = vstate->frame[j];
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ if (state->stack[i].slot_type[0] != STACK_SPILL)
+ continue;
+ reg = &state->stack[i].spilled_ptr;
+ if (reg->type == type && reg->id == dst_reg->id)
+ reg->range = max(reg->range, new_range);
+ }
}
}
@@ -2798,20 +3317,24 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
/* The logic is similar to find_good_pkt_pointers(), both could eventually
* be folded together at some point.
*/
-static void mark_map_regs(struct bpf_verifier_state *state, u32 regno,
+static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
bool is_null)
{
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
struct bpf_reg_state *regs = state->regs;
u32 id = regs[regno].id;
- int i;
+ int i, j;
for (i = 0; i < MAX_BPF_REG; i++)
mark_map_reg(regs, i, id, is_null);
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (state->stack[i].slot_type[0] != STACK_SPILL)
- continue;
- mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
+ for (j = 0; j <= vstate->curframe; j++) {
+ state = vstate->frame[j];
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ if (state->stack[i].slot_type[0] != STACK_SPILL)
+ continue;
+ mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
+ }
}
}
@@ -2911,8 +3434,10 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_insn *insn, int *insn_idx)
{
- struct bpf_verifier_state *other_branch, *this_branch = env->cur_state;
- struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
+ struct bpf_verifier_state *this_branch = env->cur_state;
+ struct bpf_verifier_state *other_branch;
+ struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
+ struct bpf_reg_state *dst_reg, *other_branch_regs;
u8 opcode = BPF_OP(insn->code);
int err;
@@ -2975,6 +3500,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
if (!other_branch)
return -EFAULT;
+ other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
/* detect if we are comparing against a constant value so we can adjust
* our min/max values for our dst register.
@@ -2987,22 +3513,22 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
if (dst_reg->type == SCALAR_VALUE &&
regs[insn->src_reg].type == SCALAR_VALUE) {
if (tnum_is_const(regs[insn->src_reg].var_off))
- reg_set_min_max(&other_branch->regs[insn->dst_reg],
+ reg_set_min_max(&other_branch_regs[insn->dst_reg],
dst_reg, regs[insn->src_reg].var_off.value,
opcode);
else if (tnum_is_const(dst_reg->var_off))
- reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
+ reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
&regs[insn->src_reg],
dst_reg->var_off.value, opcode);
else if (opcode == BPF_JEQ || opcode == BPF_JNE)
/* Comparing for equality, we can combine knowledge */
- reg_combine_min_max(&other_branch->regs[insn->src_reg],
- &other_branch->regs[insn->dst_reg],
+ reg_combine_min_max(&other_branch_regs[insn->src_reg],
+ &other_branch_regs[insn->dst_reg],
&regs[insn->src_reg],
&regs[insn->dst_reg], opcode);
}
} else if (dst_reg->type == SCALAR_VALUE) {
- reg_set_min_max(&other_branch->regs[insn->dst_reg],
+ reg_set_min_max(&other_branch_regs[insn->dst_reg],
dst_reg, insn->imm, opcode);
}
@@ -3023,7 +3549,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
return -EACCES;
}
if (env->log.level)
- print_verifier_state(env, this_branch);
+ print_verifier_state(env, this_branch->frame[this_branch->curframe]);
return 0;
}
@@ -3108,6 +3634,18 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EINVAL;
}
+ if (env->subprog_cnt) {
+ /* when program has LD_ABS insn JITs and interpreter assume
+ * that r1 == ctx == skb which is not the case for callees
+ * that can have arbitrary arguments. It's problematic
+ * for main prog as well since JITs would need to analyze
+ * all functions in order to make proper register save/restore
+ * decisions in the main prog. Hence disallow LD_ABS with calls
+ */
+ verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
+ return -EINVAL;
+ }
+
if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
BPF_SIZE(insn->code) == BPF_DW ||
(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
@@ -3284,6 +3822,10 @@ static int check_cfg(struct bpf_verifier_env *env)
int ret = 0;
int i, t;
+ ret = check_subprogs(env);
+ if (ret < 0)
+ return ret;
+
insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
if (!insn_state)
return -ENOMEM;
@@ -3316,6 +3858,14 @@ peek_stack:
goto err_free;
if (t + 1 < insn_cnt)
env->explored_states[t + 1] = STATE_LIST_MARK;
+ if (insns[t].src_reg == BPF_PSEUDO_CALL) {
+ env->explored_states[t] = STATE_LIST_MARK;
+ ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
+ if (ret == 1)
+ goto peek_stack;
+ else if (ret < 0)
+ goto err_free;
+ }
} else if (opcode == BPF_JA) {
if (BPF_SRC(insns[t].code) != BPF_K) {
ret = -EINVAL;
@@ -3434,11 +3984,21 @@ static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
struct idpair *idmap)
{
+ bool equal;
+
if (!(rold->live & REG_LIVE_READ))
/* explored state didn't use this */
return true;
- if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, live)) == 0)
+ equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, frameno)) == 0;
+
+ if (rold->type == PTR_TO_STACK)
+ /* two stack pointers are equal only if they're pointing to
+ * the same stack frame, since fp-8 in foo != fp-8 in bar
+ */
+ return equal && rold->frameno == rcur->frameno;
+
+ if (equal)
return true;
if (rold->type == NOT_INIT)
@@ -3511,7 +4071,6 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
tnum_in(rold->var_off, rcur->var_off);
case PTR_TO_CTX:
case CONST_PTR_TO_MAP:
- case PTR_TO_STACK:
case PTR_TO_PACKET_END:
/* Only valid matches are exact, which memcmp() above
* would have accepted
@@ -3526,8 +4085,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
return false;
}
-static bool stacksafe(struct bpf_verifier_state *old,
- struct bpf_verifier_state *cur,
+static bool stacksafe(struct bpf_func_state *old,
+ struct bpf_func_state *cur,
struct idpair *idmap)
{
int i, spi;
@@ -3545,8 +4104,19 @@ static bool stacksafe(struct bpf_verifier_state *old,
for (i = 0; i < old->allocated_stack; i++) {
spi = i / BPF_REG_SIZE;
+ if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
+ /* explored state didn't use this */
+ return true;
+
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
continue;
+ /* if old state was safe with misc data in the stack
+ * it will be safe with zero-initialized stack.
+ * The opposite is not true
+ */
+ if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
+ cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
+ continue;
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
cur->stack[spi].slot_type[i % BPF_REG_SIZE])
/* Ex: old explored (safe) state has STACK_SPILL in
@@ -3603,9 +4173,8 @@ static bool stacksafe(struct bpf_verifier_state *old,
* whereas register type in current state is meaningful, it means that
* the current state will reach 'bpf_exit' instruction safely
*/
-static bool states_equal(struct bpf_verifier_env *env,
- struct bpf_verifier_state *old,
- struct bpf_verifier_state *cur)
+static bool func_states_equal(struct bpf_func_state *old,
+ struct bpf_func_state *cur)
{
struct idpair *idmap;
bool ret = false;
@@ -3629,71 +4198,72 @@ out_free:
return ret;
}
+static bool states_equal(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *old,
+ struct bpf_verifier_state *cur)
+{
+ int i;
+
+ if (old->curframe != cur->curframe)
+ return false;
+
+ /* for states to be equal callsites have to be the same
+ * and all frame states need to be equivalent
+ */
+ for (i = 0; i <= old->curframe; i++) {
+ if (old->frame[i]->callsite != cur->frame[i]->callsite)
+ return false;
+ if (!func_states_equal(old->frame[i], cur->frame[i]))
+ return false;
+ }
+ return true;
+}
+
/* A write screens off any subsequent reads; but write marks come from the
- * straight-line code between a state and its parent. When we arrive at a
- * jump target (in the first iteration of the propagate_liveness() loop),
- * we didn't arrive by the straight-line code, so read marks in state must
- * propagate to parent regardless of state's write marks.
+ * straight-line code between a state and its parent. When we arrive at an
+ * equivalent state (jump target or such) we didn't arrive by the straight-line
+ * code, so read marks in the state must propagate to the parent regardless
+ * of the state's write marks. That's what 'parent == state->parent' comparison
+ * in mark_reg_read() and mark_stack_slot_read() is for.
*/
-static bool do_propagate_liveness(const struct bpf_verifier_state *state,
- struct bpf_verifier_state *parent)
+static int propagate_liveness(struct bpf_verifier_env *env,
+ const struct bpf_verifier_state *vstate,
+ struct bpf_verifier_state *vparent)
{
- bool writes = parent == state->parent; /* Observe write marks */
- bool touched = false; /* any changes made? */
- int i;
+ int i, frame, err = 0;
+ struct bpf_func_state *state, *parent;
- if (!parent)
- return touched;
+ if (vparent->curframe != vstate->curframe) {
+ WARN(1, "propagate_live: parent frame %d current frame %d\n",
+ vparent->curframe, vstate->curframe);
+ return -EFAULT;
+ }
/* Propagate read liveness of registers... */
BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
/* We don't need to worry about FP liveness because it's read-only */
for (i = 0; i < BPF_REG_FP; i++) {
- if (parent->regs[i].live & REG_LIVE_READ)
+ if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
continue;
- if (writes && (state->regs[i].live & REG_LIVE_WRITTEN))
- continue;
- if (state->regs[i].live & REG_LIVE_READ) {
- parent->regs[i].live |= REG_LIVE_READ;
- touched = true;
+ if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
+ err = mark_reg_read(env, vstate, vparent, i);
+ if (err)
+ return err;
}
}
+
/* ... and stack slots */
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
- i < parent->allocated_stack / BPF_REG_SIZE; i++) {
- if (parent->stack[i].slot_type[0] != STACK_SPILL)
- continue;
- if (state->stack[i].slot_type[0] != STACK_SPILL)
- continue;
- if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
- continue;
- if (writes &&
- (state->stack[i].spilled_ptr.live & REG_LIVE_WRITTEN))
- continue;
- if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) {
- parent->stack[i].spilled_ptr.live |= REG_LIVE_READ;
- touched = true;
+ for (frame = 0; frame <= vstate->curframe; frame++) {
+ state = vstate->frame[frame];
+ parent = vparent->frame[frame];
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
+ i < parent->allocated_stack / BPF_REG_SIZE; i++) {
+ if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
+ continue;
+ if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
+ mark_stack_slot_read(env, vstate, vparent, i, frame);
}
}
- return touched;
-}
-
-/* "parent" is "a state from which we reach the current state", but initially
- * it is not the state->parent (i.e. "the state whose straight-line code leads
- * to the current state"), instead it is the state that happened to arrive at
- * a (prunable) equivalent of the current state. See comment above
- * do_propagate_liveness() for consequences of this.
- * This function is just a more efficient way of calling mark_reg_read() or
- * mark_stack_slot_read() on each reg in "parent" that is read in "state",
- * though it requires that parent != state->parent in the call arguments.
- */
-static void propagate_liveness(const struct bpf_verifier_state *state,
- struct bpf_verifier_state *parent)
-{
- while (do_propagate_liveness(state, parent)) {
- /* Something changed, so we need to feed those changes onward */
- state = parent;
- parent = state->parent;
- }
+ return err;
}
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
@@ -3701,7 +4271,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
struct bpf_verifier_state_list *new_sl;
struct bpf_verifier_state_list *sl;
struct bpf_verifier_state *cur = env->cur_state;
- int i, err;
+ int i, j, err;
sl = env->explored_states[insn_idx];
if (!sl)
@@ -3722,7 +4292,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
* they'll be immediately forgotten as we're pruning
* this state and will pop a new one.
*/
- propagate_liveness(&sl->state, cur);
+ err = propagate_liveness(env, &sl->state, cur);
+ if (err)
+ return err;
return 1;
}
sl = sl->next;
@@ -3730,9 +4302,10 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
/* there were no equivalent states, remember current one.
* technically the current state is not proven to be safe yet,
- * but it will either reach bpf_exit (which means it's safe) or
- * it will be rejected. Since there are no loops, we won't be
- * seeing this 'insn_idx' instruction again on the way to bpf_exit
+ * but it will either reach outer most bpf_exit (which means it's safe)
+ * or it will be rejected. Since there are no loops, we won't be
+ * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
+ * again on the way to bpf_exit
*/
new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
if (!new_sl)
@@ -3756,10 +4329,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
* explored_states can get read marks.)
*/
for (i = 0; i < BPF_REG_FP; i++)
- cur->regs[i].live = REG_LIVE_NONE;
- for (i = 0; i < cur->allocated_stack / BPF_REG_SIZE; i++)
- if (cur->stack[i].slot_type[0] == STACK_SPILL)
- cur->stack[i].spilled_ptr.live = REG_LIVE_NONE;
+ cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
+
+ /* all stack frames are accessible from callee, clear them all */
+ for (j = 0; j <= cur->curframe; j++) {
+ struct bpf_func_state *frame = cur->frame[j];
+
+ for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++)
+ frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
+ }
return 0;
}
@@ -3777,7 +4355,7 @@ static int do_check(struct bpf_verifier_env *env)
struct bpf_verifier_state *state;
struct bpf_insn *insns = env->prog->insnsi;
struct bpf_reg_state *regs;
- int insn_cnt = env->prog->len;
+ int insn_cnt = env->prog->len, i;
int insn_idx, prev_insn_idx = 0;
int insn_processed = 0;
bool do_print_state = false;
@@ -3785,9 +4363,18 @@ static int do_check(struct bpf_verifier_env *env)
state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
if (!state)
return -ENOMEM;
- env->cur_state = state;
- init_reg_state(env, state->regs);
+ state->curframe = 0;
state->parent = NULL;
+ state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
+ if (!state->frame[0]) {
+ kfree(state);
+ return -ENOMEM;
+ }
+ env->cur_state = state;
+ init_func_state(env, state->frame[0],
+ BPF_MAIN_FUNC /* callsite */,
+ 0 /* frameno */,
+ 0 /* subprogno, zero == main subprog */);
insn_idx = 0;
for (;;) {
struct bpf_insn *insn;
@@ -3834,7 +4421,7 @@ static int do_check(struct bpf_verifier_env *env)
else
verbose(env, "\nfrom %d to %d:",
prev_insn_idx, insn_idx);
- print_verifier_state(env, state);
+ print_verifier_state(env, state->frame[state->curframe]);
do_print_state = false;
}
@@ -3967,13 +4554,17 @@ static int do_check(struct bpf_verifier_env *env)
if (opcode == BPF_CALL) {
if (BPF_SRC(insn->code) != BPF_K ||
insn->off != 0 ||
- insn->src_reg != BPF_REG_0 ||
+ (insn->src_reg != BPF_REG_0 &&
+ insn->src_reg != BPF_PSEUDO_CALL) ||
insn->dst_reg != BPF_REG_0) {
verbose(env, "BPF_CALL uses reserved fields\n");
return -EINVAL;
}
- err = check_call(env, insn->imm, insn_idx);
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ err = check_func_call(env, insn, &insn_idx);
+ else
+ err = check_helper_call(env, insn->imm, insn_idx);
if (err)
return err;
@@ -3998,6 +4589,16 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
+ if (state->curframe) {
+ /* exit from nested function */
+ prev_insn_idx = insn_idx;
+ err = prepare_func_exit(env, &insn_idx);
+ if (err)
+ return err;
+ do_print_state = true;
+ continue;
+ }
+
/* eBPF calling convetion is such that R0 is used
* to return the value from eBPF program.
* Make sure that it's readable at this time
@@ -4058,8 +4659,16 @@ process_bpf_exit:
insn_idx++;
}
- verbose(env, "processed %d insns, stack depth %d\n", insn_processed,
- env->prog->aux->stack_depth);
+ verbose(env, "processed %d insns, stack depth ", insn_processed);
+ for (i = 0; i < env->subprog_cnt + 1; i++) {
+ u32 depth = env->subprog_stack_depth[i];
+
+ verbose(env, "%d", depth);
+ if (i + 1 < env->subprog_cnt + 1)
+ verbose(env, "+");
+ }
+ verbose(env, "\n");
+ env->prog->aux->stack_depth = env->subprog_stack_depth[0];
return 0;
}
@@ -4245,6 +4854,19 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
return 0;
}
+static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
+{
+ int i;
+
+ if (len == 1)
+ return;
+ for (i = 0; i < env->subprog_cnt; i++) {
+ if (env->subprog_starts[i] < off)
+ continue;
+ env->subprog_starts[i] += len - 1;
+ }
+}
+
static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
const struct bpf_insn *patch, u32 len)
{
@@ -4255,6 +4877,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
return NULL;
if (adjust_insn_aux_data(env, new_prog->len, off, len))
return NULL;
+ adjust_subprog_starts(env, off, len);
return new_prog;
}
@@ -4389,6 +5012,150 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
return 0;
}
+static int jit_subprogs(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog, **func, *tmp;
+ int i, j, subprog_start, subprog_end = 0, len, subprog;
+ struct bpf_insn *insn = prog->insnsi;
+ void *old_bpf_func;
+ int err = -ENOMEM;
+
+ if (env->subprog_cnt == 0)
+ return 0;
+
+ for (i = 0; i < prog->len; i++, insn++) {
+ if (insn->code != (BPF_JMP | BPF_CALL) ||
+ insn->src_reg != BPF_PSEUDO_CALL)
+ continue;
+ subprog = find_subprog(env, i + insn->imm + 1);
+ if (subprog < 0) {
+ WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+ i + insn->imm + 1);
+ return -EFAULT;
+ }
+ /* temporarily remember subprog id inside insn instead of
+ * aux_data, since next loop will split up all insns into funcs
+ */
+ insn->off = subprog + 1;
+ /* remember original imm in case JIT fails and fallback
+ * to interpreter will be needed
+ */
+ env->insn_aux_data[i].call_imm = insn->imm;
+ /* point imm to __bpf_call_base+1 from JITs point of view */
+ insn->imm = 1;
+ }
+
+ func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
+ if (!func)
+ return -ENOMEM;
+
+ for (i = 0; i <= env->subprog_cnt; i++) {
+ subprog_start = subprog_end;
+ if (env->subprog_cnt == i)
+ subprog_end = prog->len;
+ else
+ subprog_end = env->subprog_starts[i];
+
+ len = subprog_end - subprog_start;
+ func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
+ if (!func[i])
+ goto out_free;
+ memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
+ len * sizeof(struct bpf_insn));
+ func[i]->len = len;
+ func[i]->is_func = 1;
+ /* Use bpf_prog_F_tag to indicate functions in stack traces.
+ * Long term would need debug info to populate names
+ */
+ func[i]->aux->name[0] = 'F';
+ func[i]->aux->stack_depth = env->subprog_stack_depth[i];
+ func[i]->jit_requested = 1;
+ func[i] = bpf_int_jit_compile(func[i]);
+ if (!func[i]->jited) {
+ err = -ENOTSUPP;
+ goto out_free;
+ }
+ cond_resched();
+ }
+ /* at this point all bpf functions were successfully JITed
+ * now populate all bpf_calls with correct addresses and
+ * run last pass of JIT
+ */
+ for (i = 0; i <= env->subprog_cnt; i++) {
+ insn = func[i]->insnsi;
+ for (j = 0; j < func[i]->len; j++, insn++) {
+ if (insn->code != (BPF_JMP | BPF_CALL) ||
+ insn->src_reg != BPF_PSEUDO_CALL)
+ continue;
+ subprog = insn->off;
+ insn->off = 0;
+ insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
+ func[subprog]->bpf_func -
+ __bpf_call_base;
+ }
+ }
+ for (i = 0; i <= env->subprog_cnt; i++) {
+ old_bpf_func = func[i]->bpf_func;
+ tmp = bpf_int_jit_compile(func[i]);
+ if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
+ verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
+ err = -EFAULT;
+ goto out_free;
+ }
+ cond_resched();
+ }
+
+ /* finally lock prog and jit images for all functions and
+ * populate kallsysm
+ */
+ for (i = 0; i <= env->subprog_cnt; i++) {
+ bpf_prog_lock_ro(func[i]);
+ bpf_prog_kallsyms_add(func[i]);
+ }
+ prog->jited = 1;
+ prog->bpf_func = func[0]->bpf_func;
+ prog->aux->func = func;
+ prog->aux->func_cnt = env->subprog_cnt + 1;
+ return 0;
+out_free:
+ for (i = 0; i <= env->subprog_cnt; i++)
+ if (func[i])
+ bpf_jit_free(func[i]);
+ kfree(func);
+ /* cleanup main prog to be interpreted */
+ prog->jit_requested = 0;
+ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ if (insn->code != (BPF_JMP | BPF_CALL) ||
+ insn->src_reg != BPF_PSEUDO_CALL)
+ continue;
+ insn->off = 0;
+ insn->imm = env->insn_aux_data[i].call_imm;
+ }
+ return err;
+}
+
+static int fixup_call_args(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog;
+ struct bpf_insn *insn = prog->insnsi;
+ int i, depth;
+
+ if (env->prog->jit_requested)
+ if (jit_subprogs(env) == 0)
+ return 0;
+
+ for (i = 0; i < prog->len; i++, insn++) {
+ if (insn->code != (BPF_JMP | BPF_CALL) ||
+ insn->src_reg != BPF_PSEUDO_CALL)
+ continue;
+ depth = get_callee_stack_depth(env, insn, i);
+ if (depth < 0)
+ return depth;
+ bpf_patch_call_args(insn, depth);
+ }
+ return 0;
+}
+
/* fixup insn->imm field of bpf_call instructions
* and inline eligible helpers as explicit sequence of BPF instructions
*
@@ -4408,11 +5175,15 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
for (i = 0; i < insn_cnt; i++, insn++) {
if (insn->code != (BPF_JMP | BPF_CALL))
continue;
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ continue;
if (insn->imm == BPF_FUNC_get_route_realm)
prog->dst_needed = 1;
if (insn->imm == BPF_FUNC_get_prandom_u32)
bpf_user_rnd_init_once();
+ if (insn->imm == BPF_FUNC_override_return)
+ prog->kprobe_override = 1;
if (insn->imm == BPF_FUNC_tail_call) {
/* If we tail call into other programs, we
* cannot make any assumptions since they can
@@ -4435,7 +5206,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
* handlers are currently limited to 64 bit only.
*/
- if (ebpf_jit_enabled() && BITS_PER_LONG == 64 &&
+ if (prog->jit_requested && BITS_PER_LONG == 64 &&
insn->imm == BPF_FUNC_map_lookup_elem) {
map_ptr = env->insn_aux_data[i + delta].map_ptr;
if (map_ptr == BPF_MAP_PTR_POISON ||
@@ -4587,12 +5358,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
if (!env->explored_states)
goto skip_full_check;
+ env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+
ret = check_cfg(env);
if (ret < 0)
goto skip_full_check;
- env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
-
ret = do_check(env);
if (env->cur_state) {
free_verifier_state(env->cur_state, true);
@@ -4613,6 +5384,9 @@ skip_full_check:
if (ret == 0)
ret = fixup_bpf_calls(env);
+ if (ret == 0)
+ ret = fixup_call_args(env);
+
if (log->level && bpf_verifier_log_full(log))
ret = -ENOSPC;
if (log->level && !log->ubuf) {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4df5b69..878d86c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4723,6 +4723,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
rcu_read_unlock();
return 0;
}
+
+ case PERF_EVENT_IOC_QUERY_BPF:
+ return perf_event_query_prog_array(event, (void __user *)arg);
default:
return -ENOTTY;
}
@@ -8080,6 +8083,13 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
return -EINVAL;
}
+ /* Kprobe override only works for kprobes, not uprobes. */
+ if (prog->kprobe_override &&
+ !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
if (is_tracepoint || is_syscall_tp) {
int off = trace_event_get_offsets(event->tp_event);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index da2ccf1..b4aab48 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -83,6 +83,16 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
return &(kretprobe_table_locks[hash].lock);
}
+/* List of symbols that can be overriden for error injection. */
+static LIST_HEAD(kprobe_error_injection_list);
+static DEFINE_MUTEX(kprobe_ei_mutex);
+struct kprobe_ei_entry {
+ struct list_head list;
+ unsigned long start_addr;
+ unsigned long end_addr;
+ void *priv;
+};
+
/* Blacklist -- list of struct kprobe_blacklist_entry */
static LIST_HEAD(kprobe_blacklist);
@@ -1394,6 +1404,17 @@ bool within_kprobe_blacklist(unsigned long addr)
return false;
}
+bool within_kprobe_error_injection_list(unsigned long addr)
+{
+ struct kprobe_ei_entry *ent;
+
+ list_for_each_entry(ent, &kprobe_error_injection_list, list) {
+ if (addr >= ent->start_addr && addr < ent->end_addr)
+ return true;
+ }
+ return false;
+}
+
/*
* If we have a symbol_name argument, look it up and add the offset field
* to it. This way, we can specify a relative address to a symbol.
@@ -2168,6 +2189,86 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
return 0;
}
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+/* Markers of the _kprobe_error_inject_list section */
+extern unsigned long __start_kprobe_error_inject_list[];
+extern unsigned long __stop_kprobe_error_inject_list[];
+
+/*
+ * Lookup and populate the kprobe_error_injection_list.
+ *
+ * For safety reasons we only allow certain functions to be overriden with
+ * bpf_error_injection, so we need to populate the list of the symbols that have
+ * been marked as safe for overriding.
+ */
+static void populate_kprobe_error_injection_list(unsigned long *start,
+ unsigned long *end,
+ void *priv)
+{
+ unsigned long *iter;
+ struct kprobe_ei_entry *ent;
+ unsigned long entry, offset = 0, size = 0;
+
+ mutex_lock(&kprobe_ei_mutex);
+ for (iter = start; iter < end; iter++) {
+ entry = arch_deref_entry_point((void *)*iter);
+
+ if (!kernel_text_address(entry) ||
+ !kallsyms_lookup_size_offset(entry, &size, &offset)) {
+ pr_err("Failed to find error inject entry at %p\n",
+ (void *)entry);
+ continue;
+ }
+
+ ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ break;
+ ent->start_addr = entry;
+ ent->end_addr = entry + size;
+ ent->priv = priv;
+ INIT_LIST_HEAD(&ent->list);
+ list_add_tail(&ent->list, &kprobe_error_injection_list);
+ }
+ mutex_unlock(&kprobe_ei_mutex);
+}
+
+static void __init populate_kernel_kprobe_ei_list(void)
+{
+ populate_kprobe_error_injection_list(__start_kprobe_error_inject_list,
+ __stop_kprobe_error_inject_list,
+ NULL);
+}
+
+static void module_load_kprobe_ei_list(struct module *mod)
+{
+ if (!mod->num_kprobe_ei_funcs)
+ return;
+ populate_kprobe_error_injection_list(mod->kprobe_ei_funcs,
+ mod->kprobe_ei_funcs +
+ mod->num_kprobe_ei_funcs, mod);
+}
+
+static void module_unload_kprobe_ei_list(struct module *mod)
+{
+ struct kprobe_ei_entry *ent, *n;
+ if (!mod->num_kprobe_ei_funcs)
+ return;
+
+ mutex_lock(&kprobe_ei_mutex);
+ list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) {
+ if (ent->priv == mod) {
+ list_del_init(&ent->list);
+ kfree(ent);
+ }
+ }
+ mutex_unlock(&kprobe_ei_mutex);
+}
+#else
+static inline void __init populate_kernel_kprobe_ei_list(void) {}
+static inline void module_load_kprobe_ei_list(struct module *m) {}
+static inline void module_unload_kprobe_ei_list(struct module *m) {}
+#endif
+
/* Module notifier call back, checking kprobes on the module */
static int kprobes_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
@@ -2178,6 +2279,11 @@ static int kprobes_module_callback(struct notifier_block *nb,
unsigned int i;
int checkcore = (val == MODULE_STATE_GOING);
+ if (val == MODULE_STATE_COMING)
+ module_load_kprobe_ei_list(mod);
+ else if (val == MODULE_STATE_GOING)
+ module_unload_kprobe_ei_list(mod);
+
if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
return NOTIFY_DONE;
@@ -2240,6 +2346,8 @@ static int __init init_kprobes(void)
pr_err("Please take care of using kprobes.\n");
}
+ populate_kernel_kprobe_ei_list();
+
if (kretprobe_blacklist_size) {
/* lookup the function address from its name */
for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
@@ -2407,6 +2515,56 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
.release = seq_release,
};
+/*
+ * kprobes/error_injection_list -- shows which functions can be overriden for
+ * error injection.
+ * */
+static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&kprobe_ei_mutex);
+ return seq_list_start(&kprobe_error_injection_list, *pos);
+}
+
+static void kprobe_ei_seq_stop(struct seq_file *m, void *v)
+{
+ mutex_unlock(&kprobe_ei_mutex);
+}
+
+static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ return seq_list_next(v, &kprobe_error_injection_list, pos);
+}
+
+static int kprobe_ei_seq_show(struct seq_file *m, void *v)
+{
+ char buffer[KSYM_SYMBOL_LEN];
+ struct kprobe_ei_entry *ent =
+ list_entry(v, struct kprobe_ei_entry, list);
+
+ sprint_symbol(buffer, ent->start_addr);
+ seq_printf(m, "%s\n", buffer);
+ return 0;
+}
+
+static const struct seq_operations kprobe_ei_seq_ops = {
+ .start = kprobe_ei_seq_start,
+ .next = kprobe_ei_seq_next,
+ .stop = kprobe_ei_seq_stop,
+ .show = kprobe_ei_seq_show,
+};
+
+static int kprobe_ei_open(struct inode *inode, struct file *filp)
+{
+ return seq_open(filp, &kprobe_ei_seq_ops);
+}
+
+static const struct file_operations debugfs_kprobe_ei_ops = {
+ .open = kprobe_ei_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
static void arm_all_kprobes(void)
{
struct hlist_head *head;
@@ -2548,6 +2706,11 @@ static int __init debugfs_kprobe_init(void)
if (!file)
goto error;
+ file = debugfs_create_file("error_injection_list", 0444, dir, NULL,
+ &debugfs_kprobe_ei_ops);
+ if (!file)
+ goto error;
+
return 0;
error:
diff --git a/kernel/module.c b/kernel/module.c
index dea01ac..bd695bf 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3118,7 +3118,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->ftrace_callsites),
&mod->num_ftrace_callsites);
#endif
-
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+ mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list",
+ sizeof(*mod->kprobe_ei_funcs),
+ &mod->num_kprobe_ei_funcs);
+#endif
mod->extable = section_objs(info, "__ex_table",
sizeof(*mod->extable), &mod->num_exentries);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 904c952..ae3a2d5 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -530,6 +530,17 @@ config FUNCTION_PROFILER
If in doubt, say N.
+config BPF_KPROBE_OVERRIDE
+ bool "Enable BPF programs to override a kprobed function"
+ depends on BPF_EVENTS
+ depends on KPROBES_ON_FTRACE
+ depends on HAVE_KPROBE_OVERRIDE
+ depends on DYNAMIC_FTRACE_WITH_REGS
+ default n
+ help
+ Allows BPF to override the execution of a probed function and
+ set a different return value. This is used for error injection.
+
config FTRACE_MCOUNT_RECORD
def_bool y
depends on DYNAMIC_FTRACE
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 40207c2..f6d2327 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -13,6 +13,10 @@
#include <linux/filter.h>
#include <linux/uaccess.h>
#include <linux/ctype.h>
+#include <linux/kprobes.h>
+#include <asm/kprobes.h>
+
+#include "trace_probe.h"
#include "trace.h"
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
@@ -76,6 +80,24 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
}
EXPORT_SYMBOL_GPL(trace_call_bpf);
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
+{
+ __this_cpu_write(bpf_kprobe_override, 1);
+ regs_set_return_value(regs, rc);
+ arch_ftrace_kprobe_override_function(regs);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_override_return_proto = {
+ .func = bpf_override_return,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+#endif
+
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
{
int ret;
@@ -556,6 +578,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_get_stackid_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+ case BPF_FUNC_override_return:
+ return &bpf_override_return_proto;
+#endif
default:
return tracing_func_proto(func_id);
}
@@ -773,6 +799,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
struct bpf_prog_array *new_array;
int ret = -EEXIST;
+ /*
+ * Kprobe override only works for ftrace based kprobes, and only if they
+ * are on the opt-in list.
+ */
+ if (prog->kprobe_override &&
+ (!trace_kprobe_ftrace(event->tp_event) ||
+ !trace_kprobe_error_injectable(event->tp_event)))
+ return -EINVAL;
+
mutex_lock(&bpf_event_mutex);
if (event->prog)
@@ -825,3 +860,26 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
unlock:
mutex_unlock(&bpf_event_mutex);
}
+
+int perf_event_query_prog_array(struct perf_event *event, void __user *info)
+{
+ struct perf_event_query_bpf __user *uquery = info;
+ struct perf_event_query_bpf query = {};
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (event->attr.type != PERF_TYPE_TRACEPOINT)
+ return -EINVAL;
+ if (copy_from_user(&query, uquery, sizeof(query)))
+ return -EFAULT;
+
+ mutex_lock(&bpf_event_mutex);
+ ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
+ uquery->ids,
+ query.ids_len,
+ &uquery->prog_cnt);
+ mutex_unlock(&bpf_event_mutex);
+
+ return ret;
+}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 492700c..91f4b57 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -42,6 +42,7 @@ struct trace_kprobe {
(offsetof(struct trace_kprobe, tp.args) + \
(sizeof(struct probe_arg) * (n)))
+DEFINE_PER_CPU(int, bpf_kprobe_override);
static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
{
@@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
return nhit;
}
+int trace_kprobe_ftrace(struct trace_event_call *call)
+{
+ struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+ return kprobe_ftrace(&tk->rp.kp);
+}
+
+int trace_kprobe_error_injectable(struct trace_event_call *call)
+{
+ struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+ unsigned long addr;
+
+ if (tk->symbol) {
+ addr = (unsigned long)
+ kallsyms_lookup_name(trace_kprobe_symbol(tk));
+ addr += tk->rp.kp.offset;
+ } else {
+ addr = (unsigned long)tk->rp.kp.addr;
+ }
+ return within_kprobe_error_injection_list(addr);
+}
+
static int register_kprobe_event(struct trace_kprobe *tk);
static int unregister_kprobe_event(struct trace_kprobe *tk);
@@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call)
#ifdef CONFIG_PERF_EVENTS
/* Kprobe profile handler */
-static void
+static int
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
{
struct trace_event_call *call = &tk->tp.call;
@@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
int size, __size, dsize;
int rctx;
- if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
- return;
+ if (bpf_prog_array_valid(call)) {
+ int ret;
+
+ ret = trace_call_bpf(call, regs);
+
+ /*
+ * We need to check and see if we modified the pc of the
+ * pt_regs, and if so clear the kprobe and return 1 so that we
+ * don't do the instruction skipping. Also reset our state so
+ * we are clean the next pass through.
+ */
+ if (__this_cpu_read(bpf_kprobe_override)) {
+ __this_cpu_write(bpf_kprobe_override, 0);
+ reset_current_kprobe();
+ return 1;
+ }
+ if (!ret)
+ return 0;
+ }
head = this_cpu_ptr(call->perf_events);
if (hlist_empty(head))
- return;
+ return 0;
dsize = __get_data_size(&tk->tp, regs);
__size = sizeof(*entry) + tk->tp.size + dsize;
@@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
entry = perf_trace_buf_alloc(size, NULL, &rctx);
if (!entry)
- return;
+ return 0;
entry->ip = (unsigned long)tk->rp.kp.addr;
memset(&entry[1], 0, dsize);
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
+ return 0;
}
NOKPROBE_SYMBOL(kprobe_perf_func);
@@ -1275,16 +1315,24 @@ static int kprobe_register(struct trace_event_call *event,
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
{
struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
+ int ret = 0;
raw_cpu_inc(*tk->nhit);
if (tk->tp.flags & TP_FLAG_TRACE)
kprobe_trace_func(tk, regs);
#ifdef CONFIG_PERF_EVENTS
- if (tk->tp.flags & TP_FLAG_PROFILE)
- kprobe_perf_func(tk, regs);
+ if (tk->tp.flags & TP_FLAG_PROFILE) {
+ ret = kprobe_perf_func(tk, regs);
+ /*
+ * The ftrace kprobe handler leaves it up to us to re-enable
+ * preemption here before returning if we've modified the ip.
+ */
+ if (ret)
+ preempt_enable_no_resched();
+ }
#endif
- return 0; /* We don't tweek kernel, so just return 0 */
+ return ret;
}
NOKPROBE_SYMBOL(kprobe_dispatcher);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index fb66e3e..5e54d74 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -252,6 +252,8 @@ struct symbol_cache;
unsigned long update_symbol_cache(struct symbol_cache *sc);
void free_symbol_cache(struct symbol_cache *sc);
struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
+int trace_kprobe_ftrace(struct trace_event_call *call);
+int trace_kprobe_error_injectable(struct trace_event_call *call);
#else
/* uprobes do not support symbol fetch methods */
#define fetch_symbol_u8 NULL
@@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset)
{
return NULL;
}
+
+static inline int trace_kprobe_ftrace(struct trace_event_call *call)
+{
+ return 0;
+}
+
+static inline int trace_kprobe_error_injectable(struct trace_event_call *call)
+{
+ return 0;
+}
#endif /* CONFIG_KPROBE_EVENTS */
struct probe_arg {
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index adeaa13..4fb944a 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -12,6 +12,7 @@ hostprogs-y += tracex3
hostprogs-y += tracex4
hostprogs-y += tracex5
hostprogs-y += tracex6
+hostprogs-y += tracex7
hostprogs-y += test_probe_write_user
hostprogs-y += trace_output
hostprogs-y += lathist
@@ -58,6 +59,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o
tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o
tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o
tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o
+tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o
load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
@@ -101,6 +103,7 @@ always += tracex3_kern.o
always += tracex4_kern.o
always += tracex5_kern.o
always += tracex6_kern.o
+always += tracex7_kern.o
always += sock_flags_kern.o
always += test_probe_write_user_kern.o
always += trace_output_kern.o
@@ -155,6 +158,7 @@ HOSTLOADLIBES_tracex3 += -lelf
HOSTLOADLIBES_tracex4 += -lelf -lrt
HOSTLOADLIBES_tracex5 += -lelf
HOSTLOADLIBES_tracex6 += -lelf
+HOSTLOADLIBES_tracex7 += -lelf
HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
HOSTLOADLIBES_load_sock_ops += -lelf
HOSTLOADLIBES_test_probe_write_user += -lelf
diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh
new file mode 100755
index 0000000..e68b9ee
--- /dev/null
+++ b/samples/bpf/test_override_return.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+rm -f testfile.img
+dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
+DEVICE=$(losetup --show -f testfile.img)
+mkfs.btrfs -f $DEVICE
+mkdir tmpmnt
+./tracex7 $DEVICE
+if [ $? -eq 0 ]
+then
+ echo "SUCCESS!"
+else
+ echo "FAILED!"
+fi
+losetup -d $DEVICE
diff --git a/samples/bpf/tracex7_kern.c b/samples/bpf/tracex7_kern.c
new file mode 100644
index 0000000..1ab308a
--- /dev/null
+++ b/samples/bpf/tracex7_kern.c
@@ -0,0 +1,16 @@
+#include <uapi/linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+SEC("kprobe/open_ctree")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ unsigned long rc = -12;
+
+ bpf_override_return(ctx, rc);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c
new file mode 100644
index 0000000..8a52ac4
--- /dev/null
+++ b/samples/bpf/tracex7_user.c
@@ -0,0 +1,28 @@
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+int main(int argc, char **argv)
+{
+ FILE *f;
+ char filename[256];
+ char command[256];
+ int ret;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ snprintf(command, 256, "mount %s tmpmnt/", argv[1]);
+ f = popen(command, "r");
+ ret = pclose(f);
+
+ return ret ? 0 : 1;
+}
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index 37292bb..c462a92 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -3,12 +3,16 @@ include ../../../scripts/utilities.mak
INSTALL ?= install
RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
-# Make the path relative to DESTDIR, not prefix
-ifndef DESTDIR
-prefix ?= /usr/local
+ifeq ($(V),1)
+ Q =
+else
+ Q = @
endif
-mandir ?= $(prefix)/share/man
+
+prefix ?= /usr/local
+mandir ?= $(prefix)/man
man8dir = $(mandir)/man8
MAN8_RST = $(wildcard *.rst)
@@ -20,15 +24,21 @@ man: man8
man8: $(DOC_MAN8)
$(OUTPUT)%.8: %.rst
- rst2man $< > $@
+ $(QUIET_GEN)rst2man $< > $@
clean:
- $(call QUIET_CLEAN, Documentation) $(RM) $(DOC_MAN8)
+ $(call QUIET_CLEAN, Documentation)
+ $(Q)$(RM) $(DOC_MAN8)
install: man
- $(call QUIET_INSTALL, Documentation-man) \
- $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir); \
- $(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir);
+ $(call QUIET_INSTALL, Documentation-man)
+ $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
+ $(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir)
+
+uninstall:
+ $(call QUIET_UNINST, Documentation-man)
+ $(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8))
+ $(Q)$(RMDIR) $(DESTDIR)$(man8dir)
-.PHONY: man man8 clean install
+.PHONY: man man8 clean install uninstall
.DEFAULT_GOAL := man
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
new file mode 100644
index 0000000..45c71b1
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -0,0 +1,118 @@
+================
+bpftool-cgroup
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF progs
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **cgroup** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+
+ *COMMANDS* :=
+ { **list** | **attach** | **detach** | **help** }
+
+MAP COMMANDS
+=============
+
+| **bpftool** **cgroup list** *CGROUP*
+| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
+| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
+| **bpftool** **cgroup help**
+|
+| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+| *ATTACH_TYPE* := { *ingress* | *egress* | *sock_create* | *sock_ops* | *device* }
+| *ATTACH_FLAGS* := { *multi* | *override* }
+
+DESCRIPTION
+===========
+ **bpftool cgroup list** *CGROUP*
+ List all programs attached to the cgroup *CGROUP*.
+
+ Output will start with program ID followed by attach type,
+ attach flags and program name.
+
+ **bpftool cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
+ Attach program *PROG* to the cgroup *CGROUP* with attach type
+ *ATTACH_TYPE* and optional *ATTACH_FLAGS*.
+
+ *ATTACH_FLAGS* can be one of: **override** if a sub-cgroup installs
+ some bpf program, the program in this cgroup yields to sub-cgroup
+ program; **multi** if a sub-cgroup installs some bpf program,
+ that cgroup program gets run in addition to the program in this
+ cgroup.
+
+ Only one program is allowed to be attached to a cgroup with
+ no attach flags or the **override** flag. Attaching another
+ program will release old program and attach the new one.
+
+ Multiple programs are allowed to be attached to a cgroup with
+ **multi**. They are executed in FIFO order (those that were
+ attached first, run first).
+
+ Non-default *ATTACH_FLAGS* are supported by kernel version 4.14
+ and later.
+
+ *ATTACH_TYPE* can be on of:
+ **ingress** ingress path of the inet socket (since 4.10);
+ **egress** egress path of the inet socket (since 4.10);
+ **sock_create** opening of an inet socket (since 4.10);
+ **sock_ops** various socket operations (since 4.12);
+ **device** device access (since 4.15).
+
+ **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
+ Detach *PROG* from the cgroup *CGROUP* and attach type
+ *ATTACH_TYPE*.
+
+ **bpftool prog help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -v, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+ -f, --bpffs
+ Show file names of pinned programs.
+
+EXAMPLES
+========
+|
+| **# mount -t bpf none /sys/fs/bpf/**
+| **# mkdir /sys/fs/cgroup/test.slice**
+| **# bpftool prog load ./device_cgroup.o /sys/fs/bpf/prog**
+| **# bpftool cgroup attach /sys/fs/cgroup/test.slice/ device id 1 allow_multi**
+
+**# bpftool cgroup list /sys/fs/cgroup/test.slice/**
+
+::
+
+ ID AttachType AttachFlags Name
+ 1 device allow_multi bpf_prog1
+
+|
+| **# bpftool cgroup detach /sys/fs/cgroup/test.slice/ device id 1**
+| **# bpftool cgroup list /sys/fs/cgroup/test.slice/**
+
+::
+
+ ID AttachType AttachFlags Name
+
+SEE ALSO
+========
+ **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 9f51a26..421cabc 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -128,4 +128,4 @@ EXAMPLES
SEE ALSO
========
- **bpftool**\ (8), **bpftool-prog**\ (8)
+ **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 36e8d1c..81c97c0 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -15,7 +15,7 @@ SYNOPSIS
*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
*COMMANDS* :=
- { **show** | **dump xlated** | **dump jited** | **pin** | **help** }
+ { **show** | **dump xlated** | **dump jited** | **pin** | **load** | **help** }
MAP COMMANDS
=============
@@ -24,6 +24,7 @@ MAP COMMANDS
| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}]
| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}]
| **bpftool** **prog pin** *PROG* *FILE*
+| **bpftool** **prog load** *OBJ* *FILE*
| **bpftool** **prog help**
|
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
@@ -57,6 +58,11 @@ DESCRIPTION
Note: *FILE* must be located in *bpffs* mount.
+ **bpftool prog load** *OBJ* *FILE*
+ Load bpf program from binary *OBJ* and pin as *FILE*.
+
+ Note: *FILE* must be located in *bpffs* mount.
+
**bpftool prog help**
Print short help message.
@@ -126,8 +132,10 @@ EXAMPLES
|
| **# mount -t bpf none /sys/fs/bpf/**
| **# bpftool prog pin id 10 /sys/fs/bpf/prog**
+| **# bpftool prog load ./my_prog.o /sys/fs/bpf/prog2**
| **# ls -l /sys/fs/bpf/**
| -rw------- 1 root root 0 Jul 22 01:43 prog
+| -rw------- 1 root root 0 Jul 22 01:44 prog2
**# bpftool prog dum jited pinned /sys/fs/bpf/prog opcodes**
@@ -147,4 +155,4 @@ EXAMPLES
SEE ALSO
========
- **bpftool**\ (8), **bpftool-map**\ (8)
+ **bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 926c03d..6732a5a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -16,7 +16,7 @@ SYNOPSIS
**bpftool** **version**
- *OBJECT* := { **map** | **program** }
+ *OBJECT* := { **map** | **program** | **cgroup** }
*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@@ -26,7 +26,9 @@ SYNOPSIS
| **pin** | **help** }
*PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin**
- | **help** }
+ | **load** | **help** }
+
+ *CGROUP-COMMANDS* := { **list** | **attach** | **detach** | **help** }
DESCRIPTION
===========
@@ -53,4 +55,4 @@ OPTIONS
SEE ALSO
========
- **bpftool-map**\ (8), **bpftool-prog**\ (8)
+ **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index ec3052c..3f17ad3 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -1,25 +1,10 @@
include ../../scripts/Makefile.include
-
include ../../scripts/utilities.mak
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
-#$(info Determined 'srctree' to be $(srctree))
-endif
-
-ifneq ($(objtree),)
-#$(info Determined 'objtree' to be $(objtree))
-endif
-
-ifneq ($(OUTPUT),)
-#$(info Determined 'OUTPUT' to be $(OUTPUT))
-# Adding $(OUTPUT) as a directory to look for source files,
-# because use generated output files as sources dependency
-# for flex/bison parsers.
-VPATH += $(OUTPUT)
-export VPATH
endif
ifeq ($(V),1)
@@ -28,12 +13,12 @@ else
Q = @
endif
-BPF_DIR = $(srctree)/tools/lib/bpf/
+BPF_DIR = $(srctree)/tools/lib/bpf/
ifneq ($(OUTPUT),)
- BPF_PATH=$(OUTPUT)
+ BPF_PATH = $(OUTPUT)
else
- BPF_PATH=$(BPF_DIR)
+ BPF_PATH = $(BPF_DIR)
endif
LIBBPF = $(BPF_PATH)libbpf.a
@@ -45,7 +30,7 @@ $(LIBBPF)-clean:
$(call QUIET_CLEAN, libbpf)
$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null
-prefix = /usr/local
+prefix ?= /usr/local
bash_compdir ?= /usr/share/bash-completion/completions
CC = gcc
@@ -55,12 +40,15 @@ CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
+INSTALL ?= install
+RM ?= rm -f
+
include $(wildcard *.d)
all: $(OUTPUT)bpftool
-SRCS=$(wildcard *.c)
-OBJS=$(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
+SRCS = $(wildcard *.c)
+OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
@@ -73,21 +61,34 @@ $(OUTPUT)%.o: %.c
clean: $(LIBBPF)-clean
$(call QUIET_CLEAN, bpftool)
- $(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+ $(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+
+install: $(OUTPUT)bpftool
+ $(call QUIET_INSTALL, bpftool)
+ $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/sbin
+ $(Q)$(INSTALL) $(OUTPUT)bpftool $(DESTDIR)$(prefix)/sbin/bpftool
+ $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir)
+ $(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir)
-install:
- install -m 0755 -d $(prefix)/sbin
- install $(OUTPUT)bpftool $(prefix)/sbin/bpftool
- install -m 0755 -d $(bash_compdir)
- install -m 0644 bash-completion/bpftool $(bash_compdir)
+uninstall:
+ $(call QUIET_UNINST, bpftool)
+ $(Q)$(RM) $(DESTDIR)$(prefix)/sbin/bpftool
+ $(Q)$(RM) $(DESTDIR)$(bash_compdir)/bpftool
doc:
- $(Q)$(MAKE) -C Documentation/
+ $(call descend,Documentation)
+
+doc-clean:
+ $(call descend,Documentation,clean)
doc-install:
- $(Q)$(MAKE) -C Documentation/ install
+ $(call descend,Documentation,install)
+
+doc-uninstall:
+ $(call descend,Documentation,uninstall)
FORCE:
-.PHONY: all clean FORCE install doc doc-install
+.PHONY: all FORCE clean install uninstall
+.PHONY: doc doc-clean doc-install doc-uninstall
.DEFAULT_GOAL := all
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
new file mode 100644
index 0000000..34ca303
--- /dev/null
+++ b/tools/bpf/bpftool/cgroup.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2017 Facebook
+// Author: Roman Gushchin <guro@fb.com>
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+#define HELP_SPEC_ATTACH_FLAGS \
+ "ATTACH_FLAGS := { multi | override }"
+
+#define HELP_SPEC_ATTACH_TYPES \
+ "ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }"
+
+static const char * const attach_type_strings[] = {
+ [BPF_CGROUP_INET_INGRESS] = "ingress",
+ [BPF_CGROUP_INET_EGRESS] = "egress",
+ [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
+ [BPF_CGROUP_SOCK_OPS] = "sock_ops",
+ [BPF_CGROUP_DEVICE] = "device",
+ [__MAX_BPF_ATTACH_TYPE] = NULL,
+};
+
+static enum bpf_attach_type parse_attach_type(const char *str)
+{
+ enum bpf_attach_type type;
+
+ for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+ if (attach_type_strings[type] &&
+ is_prefix(str, attach_type_strings[type]))
+ return type;
+ }
+
+ return __MAX_BPF_ATTACH_TYPE;
+}
+
+static int list_bpf_prog(int id, const char *attach_type_str,
+ const char *attach_flags_str)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int prog_fd;
+
+ prog_fd = bpf_prog_get_fd_by_id(id);
+ if (prog_fd < 0)
+ return -1;
+
+ if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+ close(prog_fd);
+ return -1;
+ }
+
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "id", info.id);
+ jsonw_string_field(json_wtr, "attach_type",
+ attach_type_str);
+ jsonw_string_field(json_wtr, "attach_flags",
+ attach_flags_str);
+ jsonw_string_field(json_wtr, "name", info.name);
+ jsonw_end_object(json_wtr);
+ } else {
+ printf("%-8u %-15s %-15s %-15s\n", info.id,
+ attach_type_str,
+ attach_flags_str,
+ info.name);
+ }
+
+ close(prog_fd);
+ return 0;
+}
+
+static int list_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
+{
+ __u32 prog_ids[1024] = {0};
+ char *attach_flags_str;
+ __u32 prog_cnt, iter;
+ __u32 attach_flags;
+ char buf[32];
+ int ret;
+
+ prog_cnt = ARRAY_SIZE(prog_ids);
+ ret = bpf_prog_query(cgroup_fd, type, 0, &attach_flags, prog_ids,
+ &prog_cnt);
+ if (ret)
+ return ret;
+
+ if (prog_cnt == 0)
+ return 0;
+
+ switch (attach_flags) {
+ case BPF_F_ALLOW_MULTI:
+ attach_flags_str = "multi";
+ break;
+ case BPF_F_ALLOW_OVERRIDE:
+ attach_flags_str = "override";
+ break;
+ case 0:
+ attach_flags_str = "";
+ break;
+ default:
+ snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags);
+ attach_flags_str = buf;
+ }
+
+ for (iter = 0; iter < prog_cnt; iter++)
+ list_bpf_prog(prog_ids[iter], attach_type_strings[type],
+ attach_flags_str);
+
+ return 0;
+}
+
+static int do_list(int argc, char **argv)
+{
+ enum bpf_attach_type type;
+ int cgroup_fd;
+ int ret = -1;
+
+ if (argc < 1) {
+ p_err("too few parameters for cgroup list\n");
+ goto exit;
+ } else if (argc > 1) {
+ p_err("too many parameters for cgroup list\n");
+ goto exit;
+ }
+
+ cgroup_fd = open(argv[0], O_RDONLY);
+ if (cgroup_fd < 0) {
+ p_err("can't open cgroup %s\n", argv[1]);
+ goto exit;
+ }
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ else
+ printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType",
+ "AttachFlags", "Name");
+
+ for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+ /*
+ * Not all attach types may be supported, so it's expected,
+ * that some requests will fail.
+ * If we were able to get the list for at least one
+ * attach type, let's return 0.
+ */
+ if (list_attached_bpf_progs(cgroup_fd, type) == 0)
+ ret = 0;
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ close(cgroup_fd);
+exit:
+ return ret;
+}
+
+static int do_attach(int argc, char **argv)
+{
+ enum bpf_attach_type attach_type;
+ int cgroup_fd, prog_fd;
+ int attach_flags = 0;
+ int ret = -1;
+ int i;
+
+ if (argc < 4) {
+ p_err("too few parameters for cgroup attach\n");
+ goto exit;
+ }
+
+ cgroup_fd = open(argv[0], O_RDONLY);
+ if (cgroup_fd < 0) {
+ p_err("can't open cgroup %s\n", argv[1]);
+ goto exit;
+ }
+
+ attach_type = parse_attach_type(argv[1]);
+ if (attach_type == __MAX_BPF_ATTACH_TYPE) {
+ p_err("invalid attach type\n");
+ goto exit_cgroup;
+ }
+
+ argc -= 2;
+ argv = &argv[2];
+ prog_fd = prog_parse_fd(&argc, &argv);
+ if (prog_fd < 0)
+ goto exit_cgroup;
+
+ for (i = 0; i < argc; i++) {
+ if (is_prefix(argv[i], "multi")) {
+ attach_flags |= BPF_F_ALLOW_MULTI;
+ } else if (is_prefix(argv[i], "override")) {
+ attach_flags |= BPF_F_ALLOW_OVERRIDE;
+ } else {
+ p_err("unknown option: %s\n", argv[i]);
+ goto exit_cgroup;
+ }
+ }
+
+ if (bpf_prog_attach(prog_fd, cgroup_fd, attach_type, attach_flags)) {
+ p_err("failed to attach program");
+ goto exit_prog;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ ret = 0;
+
+exit_prog:
+ close(prog_fd);
+exit_cgroup:
+ close(cgroup_fd);
+exit:
+ return ret;
+}
+
+static int do_detach(int argc, char **argv)
+{
+ enum bpf_attach_type attach_type;
+ int prog_fd, cgroup_fd;
+ int ret = -1;
+
+ if (argc < 4) {
+ p_err("too few parameters for cgroup detach\n");
+ goto exit;
+ }
+
+ cgroup_fd = open(argv[0], O_RDONLY);
+ if (cgroup_fd < 0) {
+ p_err("can't open cgroup %s\n", argv[1]);
+ goto exit;
+ }
+
+ attach_type = parse_attach_type(argv[1]);
+ if (attach_type == __MAX_BPF_ATTACH_TYPE) {
+ p_err("invalid attach type");
+ goto exit_cgroup;
+ }
+
+ argc -= 2;
+ argv = &argv[2];
+ prog_fd = prog_parse_fd(&argc, &argv);
+ if (prog_fd < 0)
+ goto exit_cgroup;
+
+ if (bpf_prog_detach2(prog_fd, cgroup_fd, attach_type)) {
+ p_err("failed to detach program");
+ goto exit_prog;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ ret = 0;
+
+exit_prog:
+ close(prog_fd);
+exit_cgroup:
+ close(cgroup_fd);
+exit:
+ return ret;
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s %s list CGROUP\n"
+ " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
+ " %s %s detach CGROUP ATTACH_TYPE PROG\n"
+ " %s %s help\n"
+ "\n"
+ " " HELP_SPEC_ATTACH_TYPES "\n"
+ " " HELP_SPEC_ATTACH_FLAGS "\n"
+ " " HELP_SPEC_PROGRAM "\n"
+ " " HELP_SPEC_OPTIONS "\n"
+ "",
+ bin_name, argv[-2], bin_name, argv[-2],
+ bin_name, argv[-2], bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "list", do_list },
+ { "attach", do_attach },
+ { "detach", do_detach },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_cgroup(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 2bd3b28..b62c94e 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -163,13 +163,49 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
return fd;
}
-int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
+int do_pin_fd(int fd, const char *name)
{
char err_str[ERR_MAX_LEN];
- unsigned int id;
- char *endptr;
char *file;
char *dir;
+ int err = 0;
+
+ err = bpf_obj_pin(fd, name);
+ if (!err)
+ goto out;
+
+ file = malloc(strlen(name) + 1);
+ strcpy(file, name);
+ dir = dirname(file);
+
+ if (errno != EPERM || is_bpffs(dir)) {
+ p_err("can't pin the object (%s): %s", name, strerror(errno));
+ goto out_free;
+ }
+
+ /* Attempt to mount bpffs, then retry pinning. */
+ err = mnt_bpffs(dir, err_str, ERR_MAX_LEN);
+ if (!err) {
+ err = bpf_obj_pin(fd, name);
+ if (err)
+ p_err("can't pin the object (%s): %s", name,
+ strerror(errno));
+ } else {
+ err_str[ERR_MAX_LEN - 1] = '\0';
+ p_err("can't mount BPF file system to pin the object (%s): %s",
+ name, err_str);
+ }
+
+out_free:
+ free(file);
+out:
+ return err;
+}
+
+int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
+{
+ unsigned int id;
+ char *endptr;
int err;
int fd;
@@ -195,35 +231,8 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
return -1;
}
- err = bpf_obj_pin(fd, *argv);
- if (!err)
- goto out_close;
-
- file = malloc(strlen(*argv) + 1);
- strcpy(file, *argv);
- dir = dirname(file);
-
- if (errno != EPERM || is_bpffs(dir)) {
- p_err("can't pin the object (%s): %s", *argv, strerror(errno));
- goto out_free;
- }
+ err = do_pin_fd(fd, *argv);
- /* Attempt to mount bpffs, then retry pinning. */
- err = mnt_bpffs(dir, err_str, ERR_MAX_LEN);
- if (!err) {
- err = bpf_obj_pin(fd, *argv);
- if (err)
- p_err("can't pin the object (%s): %s", *argv,
- strerror(errno));
- } else {
- err_str[ERR_MAX_LEN - 1] = '\0';
- p_err("can't mount BPF file system to pin the object (%s): %s",
- *argv, err_str);
- }
-
-out_free:
- free(file);
-out_close:
close(fd);
return err;
}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index d294bc8..ecd53cc 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -85,7 +85,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map }\n"
+ " OBJECT := { prog | map | cgroup }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, bin_name, bin_name);
@@ -173,6 +173,7 @@ static const struct cmd cmds[] = {
{ "batch", do_batch },
{ "prog", do_prog },
{ "map", do_map },
+ { "cgroup", do_cgroup },
{ "version", do_version },
{ 0 }
};
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index bff330b..8f6d3ca 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -111,9 +111,11 @@ char *get_fdinfo(int fd, const char *key);
int open_obj_pinned(char *path);
int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
+int do_pin_fd(int fd, const char *name);
int do_prog(int argc, char **arg);
int do_map(int argc, char **arg);
+int do_cgroup(int argc, char **arg);
int prog_parse_fd(int *argc, char ***argv);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index ad619b9..037484c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -45,6 +45,7 @@
#include <sys/stat.h>
#include <bpf.h>
+#include <libbpf.h>
#include "main.h"
#include "disasm.h"
@@ -635,6 +636,30 @@ static int do_pin(int argc, char **argv)
return err;
}
+static int do_load(int argc, char **argv)
+{
+ struct bpf_object *obj;
+ int prog_fd;
+
+ if (argc != 2)
+ usage();
+
+ if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) {
+ p_err("failed to load program\n");
+ return -1;
+ }
+
+ if (do_pin_fd(prog_fd, argv[1])) {
+ p_err("failed to pin program\n");
+ return -1;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ return 0;
+}
+
static int do_help(int argc, char **argv)
{
if (json_output) {
@@ -647,13 +672,14 @@ static int do_help(int argc, char **argv)
" %s %s dump xlated PROG [{ file FILE | opcodes }]\n"
" %s %s dump jited PROG [{ file FILE | opcodes }]\n"
" %s %s pin PROG FILE\n"
+ " %s %s load OBJ FILE\n"
" %s %s help\n"
"\n"
" " HELP_SPEC_PROGRAM "\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2]);
+ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
return 0;
}
@@ -663,6 +689,7 @@ static const struct cmd cmds[] = {
{ "help", do_help },
{ "dump", do_dump },
{ "pin", do_pin },
+ { "load", do_load },
{ 0 }
};
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4c223ab..db1b092 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -197,8 +197,14 @@ enum bpf_attach_type {
*/
#define BPF_F_STRICT_ALIGNMENT (1U << 0)
+/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
#define BPF_PSEUDO_MAP_FD 1
+/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
+ * offset to another bpf function
+ */
+#define BPF_PSEUDO_CALL 1
+
/* flags for BPF_MAP_UPDATE_ELEM command */
#define BPF_ANY 0 /* create new element or update existing */
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
@@ -677,6 +683,10 @@ union bpf_attr {
* @buf: buf to fill
* @buf_size: size of the buf
* Return : 0 on success or negative error code
+ *
+ * int bpf_override_return(pt_regs, rc)
+ * @pt_regs: pointer to struct pt_regs
+ * @rc: the return value to set
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -736,7 +746,8 @@ union bpf_attr {
FN(xdp_adjust_meta), \
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
- FN(getsockopt),
+ FN(getsockopt), \
+ FN(override_return),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index b9a4953..7695336 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -418,6 +418,27 @@ struct perf_event_attr {
__u16 __reserved_2; /* align to __u64 */
};
+/*
+ * Structure used by below PERF_EVENT_IOC_QUERY_BPF command
+ * to query bpf programs attached to the same perf tracepoint
+ * as the given perf event.
+ */
+struct perf_event_query_bpf {
+ /*
+ * The below ids array length
+ */
+ __u32 ids_len;
+ /*
+ * Set by the kernel to indicate the number of
+ * available programs
+ */
+ __u32 prog_cnt;
+ /*
+ * User provided buffer to store program ids
+ */
+ __u32 ids[0];
+};
+
#define perf_flags(attr) (*(&(attr)->read_format + 1))
/*
@@ -433,6 +454,7 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 4555304..8ed43ae 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -213,10 +213,10 @@ PHONY += force elfdep bpfdep
force:
elfdep:
- @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit -1 ; fi
+ @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi
bpfdep:
- @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit -1 ; fi
+ @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi
# Declare the contents of the .PHONY variable as phony. We keep that
# information in a variable so we can use it in if_changed and friends.
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6534889..9f44c19 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -40,7 +40,7 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
__u32 map_flags);
/* Recommend log buffer size */
-#define BPF_LOG_BUF_SIZE 65536
+#define BPF_LOG_BUF_SIZE (256 * 1024)
int bpf_load_program_name(enum bpf_prog_type type, const char *name,
const struct bpf_insn *insns,
size_t insns_cnt, const char *license,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5aa45f8..5b83875 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -174,12 +174,19 @@ struct bpf_program {
char *name;
char *section_name;
struct bpf_insn *insns;
- size_t insns_cnt;
+ size_t insns_cnt, main_prog_cnt;
enum bpf_prog_type type;
- struct {
+ struct reloc_desc {
+ enum {
+ RELO_LD64,
+ RELO_CALL,
+ } type;
int insn_idx;
- int map_idx;
+ union {
+ int map_idx;
+ int text_off;
+ };
} *reloc_desc;
int nr_reloc;
@@ -234,6 +241,7 @@ struct bpf_object {
} *reloc;
int nr_reloc;
int maps_shndx;
+ int text_shndx;
} efile;
/*
* All loaded bpf_object is linked in a list, which is
@@ -375,9 +383,13 @@ bpf_object__init_prog_names(struct bpf_object *obj)
size_t pi, si;
for (pi = 0; pi < obj->nr_programs; pi++) {
- char *name = NULL;
+ const char *name = NULL;
prog = &obj->programs[pi];
+ if (prog->idx == obj->efile.text_shndx) {
+ name = ".text";
+ goto skip_search;
+ }
for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
si++) {
@@ -387,6 +399,8 @@ bpf_object__init_prog_names(struct bpf_object *obj)
continue;
if (sym.st_shndx != prog->idx)
continue;
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
+ continue;
name = elf_strptr(obj->efile.elf,
obj->efile.strtabidx,
@@ -403,7 +417,7 @@ bpf_object__init_prog_names(struct bpf_object *obj)
prog->section_name);
return -EINVAL;
}
-
+skip_search:
prog->name = strdup(name);
if (!prog->name) {
pr_warning("failed to allocate memory for prog sym %s\n",
@@ -793,6 +807,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
} else if ((sh.sh_type == SHT_PROGBITS) &&
(sh.sh_flags & SHF_EXECINSTR) &&
(data->d_size > 0)) {
+ if (strcmp(name, ".text") == 0)
+ obj->efile.text_shndx = idx;
err = bpf_object__add_program(obj, data->d_buf,
data->d_size, name, idx);
if (err) {
@@ -854,11 +870,14 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
}
static int
-bpf_program__collect_reloc(struct bpf_program *prog,
- size_t nr_maps, GElf_Shdr *shdr,
- Elf_Data *data, Elf_Data *symbols,
- int maps_shndx, struct bpf_map *maps)
+bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
+ Elf_Data *data, struct bpf_object *obj)
{
+ Elf_Data *symbols = obj->efile.symbols;
+ int text_shndx = obj->efile.text_shndx;
+ int maps_shndx = obj->efile.maps_shndx;
+ struct bpf_map *maps = obj->maps;
+ size_t nr_maps = obj->nr_maps;
int i, nrels;
pr_debug("collecting relocating info for: '%s'\n",
@@ -891,8 +910,10 @@ bpf_program__collect_reloc(struct bpf_program *prog,
GELF_R_SYM(rel.r_info));
return -LIBBPF_ERRNO__FORMAT;
}
+ pr_debug("relo for %ld value %ld name %d\n",
+ rel.r_info >> 32, sym.st_value, sym.st_name);
- if (sym.st_shndx != maps_shndx) {
+ if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) {
pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
prog->section_name, sym.st_shndx);
return -LIBBPF_ERRNO__RELOC;
@@ -901,6 +922,17 @@ bpf_program__collect_reloc(struct bpf_program *prog,
insn_idx = rel.r_offset / sizeof(struct bpf_insn);
pr_debug("relocation: insn_idx=%u\n", insn_idx);
+ if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) {
+ if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) {
+ pr_warning("incorrect bpf_call opcode\n");
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ prog->reloc_desc[i].type = RELO_CALL;
+ prog->reloc_desc[i].insn_idx = insn_idx;
+ prog->reloc_desc[i].text_off = sym.st_value;
+ continue;
+ }
+
if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
insn_idx, insns[insn_idx].code);
@@ -922,6 +954,7 @@ bpf_program__collect_reloc(struct bpf_program *prog,
return -LIBBPF_ERRNO__RELOC;
}
+ prog->reloc_desc[i].type = RELO_LD64;
prog->reloc_desc[i].insn_idx = insn_idx;
prog->reloc_desc[i].map_idx = map_idx;
}
@@ -961,27 +994,76 @@ bpf_object__create_maps(struct bpf_object *obj)
}
static int
+bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
+ struct reloc_desc *relo)
+{
+ struct bpf_insn *insn, *new_insn;
+ struct bpf_program *text;
+ size_t new_cnt;
+
+ if (relo->type != RELO_CALL)
+ return -LIBBPF_ERRNO__RELOC;
+
+ if (prog->idx == obj->efile.text_shndx) {
+ pr_warning("relo in .text insn %d into off %d\n",
+ relo->insn_idx, relo->text_off);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ if (prog->main_prog_cnt == 0) {
+ text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
+ if (!text) {
+ pr_warning("no .text section found yet relo into text exist\n");
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ new_cnt = prog->insns_cnt + text->insns_cnt;
+ new_insn = realloc(prog->insns, new_cnt * sizeof(*insn));
+ if (!new_insn) {
+ pr_warning("oom in prog realloc\n");
+ return -ENOMEM;
+ }
+ memcpy(new_insn + prog->insns_cnt, text->insns,
+ text->insns_cnt * sizeof(*insn));
+ prog->insns = new_insn;
+ prog->main_prog_cnt = prog->insns_cnt;
+ prog->insns_cnt = new_cnt;
+ }
+ insn = &prog->insns[relo->insn_idx];
+ insn->imm += prog->main_prog_cnt - relo->insn_idx;
+ pr_debug("added %zd insn from %s to prog %s\n",
+ text->insns_cnt, text->section_name, prog->section_name);
+ return 0;
+}
+
+static int
bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
{
- int i;
+ int i, err;
if (!prog || !prog->reloc_desc)
return 0;
for (i = 0; i < prog->nr_reloc; i++) {
- int insn_idx, map_idx;
- struct bpf_insn *insns = prog->insns;
+ if (prog->reloc_desc[i].type == RELO_LD64) {
+ struct bpf_insn *insns = prog->insns;
+ int insn_idx, map_idx;
- insn_idx = prog->reloc_desc[i].insn_idx;
- map_idx = prog->reloc_desc[i].map_idx;
+ insn_idx = prog->reloc_desc[i].insn_idx;
+ map_idx = prog->reloc_desc[i].map_idx;
- if (insn_idx >= (int)prog->insns_cnt) {
- pr_warning("relocation out of range: '%s'\n",
- prog->section_name);
- return -LIBBPF_ERRNO__RELOC;
+ if (insn_idx >= (int)prog->insns_cnt) {
+ pr_warning("relocation out of range: '%s'\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+ insns[insn_idx].imm = obj->maps[map_idx].fd;
+ } else {
+ err = bpf_program__reloc_text(prog, obj,
+ &prog->reloc_desc[i]);
+ if (err)
+ return err;
}
- insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
- insns[insn_idx].imm = obj->maps[map_idx].fd;
}
zfree(&prog->reloc_desc);
@@ -1024,7 +1106,6 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
Elf_Data *data = obj->efile.reloc[i].data;
int idx = shdr->sh_info;
struct bpf_program *prog;
- size_t nr_maps = obj->nr_maps;
if (shdr->sh_type != SHT_REL) {
pr_warning("internal error at %d\n", __LINE__);
@@ -1038,11 +1119,9 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
return -LIBBPF_ERRNO__RELOC;
}
- err = bpf_program__collect_reloc(prog, nr_maps,
+ err = bpf_program__collect_reloc(prog,
shdr, data,
- obj->efile.symbols,
- obj->efile.maps_shndx,
- obj->maps);
+ obj);
if (err)
return err;
}
@@ -1195,6 +1274,8 @@ bpf_object__load_progs(struct bpf_object *obj)
int err;
for (i = 0; i < obj->nr_programs; i++) {
+ if (obj->programs[i].idx == obj->efile.text_shndx)
+ continue;
err = bpf_program__load(&obj->programs[i],
obj->license,
obj->kern_version);
@@ -1721,6 +1802,45 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
+#define BPF_PROG_SEC(string, type) { string, sizeof(string), type }
+static const struct {
+ const char *sec;
+ size_t len;
+ enum bpf_prog_type prog_type;
+} section_names[] = {
+ BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
+ BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT),
+ BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
+ BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
+ BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
+ BPF_PROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK),
+ BPF_PROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE),
+ BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS),
+ BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB),
+};
+#undef BPF_PROG_SEC
+
+static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog)
+{
+ int i;
+
+ if (!prog->section_name)
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(section_names); i++)
+ if (strncmp(prog->section_name, section_names[i].sec,
+ section_names[i].len) == 0)
+ return section_names[i].prog_type;
+
+err:
+ pr_warning("failed to guess program type based on section name %s\n",
+ prog->section_name);
+
+ return BPF_PROG_TYPE_UNSPEC;
+}
+
int bpf_map__fd(struct bpf_map *map)
{
return map ? map->fd : -EINVAL;
@@ -1818,7 +1938,7 @@ long libbpf_get_error(const void *ptr)
int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd)
{
- struct bpf_program *prog;
+ struct bpf_program *prog, *first_prog = NULL;
struct bpf_object *obj;
int err;
@@ -1826,13 +1946,30 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
if (IS_ERR(obj))
return -ENOENT;
- prog = bpf_program__next(NULL, obj);
- if (!prog) {
+ bpf_object__for_each_program(prog, obj) {
+ /*
+ * If type is not specified, try to guess it based on
+ * section name.
+ */
+ if (type == BPF_PROG_TYPE_UNSPEC) {
+ type = bpf_program__guess_type(prog);
+ if (type == BPF_PROG_TYPE_UNSPEC) {
+ bpf_object__close(obj);
+ return -EINVAL;
+ }
+ }
+
+ bpf_program__set_type(prog, type);
+ if (prog->idx != obj->efile.text_shndx && !first_prog)
+ first_prog = prog;
+ }
+
+ if (!first_prog) {
+ pr_warning("object file doesn't contain bpf program\n");
bpf_object__close(obj);
return -ENOENT;
}
- bpf_program__set_type(prog, type);
err = bpf_object__load(obj);
if (err) {
bpf_object__close(obj);
@@ -1840,6 +1977,6 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
}
*pobj = obj;
- *prog_fd = bpf_program__fd(prog);
+ *prog_fd = bpf_program__fd(first_prog);
return 0;
}
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 3fab179..fcb3ed0 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -99,5 +99,6 @@ ifneq ($(silent),1)
QUIET_CLEAN = @printf ' CLEAN %s\n' $1;
QUIET_INSTALL = @printf ' INSTALL %s\n' $1;
+ QUIET_UNINST = @printf ' UNINST %s\n' $1;
endif
endif
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4a9fb8f..a1fcb0c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -18,7 +18,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
- sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o
+ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
+ test_l4lb_noinline.o test_xdp_noinline.o
TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
test_offload.py
@@ -50,8 +51,13 @@ else
CPU ?= generic
endif
+CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
+ -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
+$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
+
%.o: %.c
- $(CLANG) -I. -I./include/uapi -I../../../include/uapi \
- -Wno-compare-distinct-pointer-types \
+ $(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index fd9a17f..33cb00e 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -82,7 +82,8 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
unsigned int buf_size) =
(void *) BPF_FUNC_perf_prog_read_value;
-
+static int (*bpf_override_return)(void *ctx, unsigned long rc) =
+ (void *) BPF_FUNC_override_return;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 52d53ed..9d48973 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -3,3 +3,4 @@ CONFIG_BPF_SYSCALL=y
CONFIG_NET_CLS_BPF=m
CONFIG_BPF_EVENTS=y
CONFIG_TEST_BPF=m
+CONFIG_CGROUP_BPF=y
diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/test_l4lb_noinline.c
new file mode 100644
index 0000000..ba44a14
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_l4lb_noinline.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[ETH_ALEN];
+ unsigned char eth_source[ETH_ALEN];
+ unsigned short eth_proto;
+};
+
+struct bpf_map_def SEC("maps") vip_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct vip_meta),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ch_rings = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = CH_RINGS_SIZE,
+};
+
+struct bpf_map_def SEC("maps") reals = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct real_definition),
+ .max_entries = MAX_REALS,
+};
+
+struct bpf_map_def SEC("maps") stats = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct vip_stats),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ctl_array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct ctl_value),
+ .max_entries = CTL_MAP_SIZE,
+};
+
+static __u32 get_packet_hash(struct packet_description *pckt,
+ bool ipv6)
+{
+ if (ipv6)
+ return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+ pckt->ports, CH_RINGS_SIZE);
+ else
+ return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
+{
+ __u32 hash = get_packet_hash(pckt, is_ipv6);
+ __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
+ __u32 *real_pos;
+
+ if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
+ hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+ return 0;
+
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return false;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return false;
+ return true;
+}
+
+static int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+ return TC_ACT_OK;
+ off += sizeof(struct icmp6hdr);
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+ pckt->proto = ip6h->nexthdr;
+ pckt->flags |= F_ICMP;
+ memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+ memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+ return TC_ACT_UNSPEC;
+}
+
+static int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+ icmp_hdr->code != ICMP_FRAG_NEEDED)
+ return TC_ACT_OK;
+ off += sizeof(struct icmphdr);
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+ pckt->proto = iph->protocol;
+ pckt->flags |= F_ICMP;
+ pckt->src = iph->daddr;
+ pckt->dst = iph->saddr;
+ return TC_ACT_UNSPEC;
+}
+
+static bool parse_udp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct udphdr *udp;
+ udp = data + off;
+
+ if (udp + 1 > data_end)
+ return false;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = udp->source;
+ pckt->port16[1] = udp->dest;
+ } else {
+ pckt->port16[0] = udp->dest;
+ pckt->port16[1] = udp->source;
+ }
+ return true;
+}
+
+static bool parse_tcp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct tcphdr *tcp;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return false;
+
+ if (tcp->syn)
+ pckt->flags |= F_SYN_SET;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = tcp->source;
+ pckt->port16[1] = tcp->dest;
+ } else {
+ pckt->port16[0] = tcp->dest;
+ pckt->port16[1] = tcp->source;
+ }
+ return true;
+}
+
+static int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct __sk_buff *skb)
+{
+ void *pkt_start = (void *)(long)skb->data;
+ struct packet_description pckt = {};
+ struct eth_hdr *eth = pkt_start;
+ struct bpf_tunnel_key tkey = {};
+ struct vip_stats *data_stats;
+ struct real_definition *dst;
+ struct vip_meta *vip_info;
+ struct ctl_value *cval;
+ __u32 v4_intf_pos = 1;
+ __u32 v6_intf_pos = 2;
+ struct ipv6hdr *ip6h;
+ struct vip vip = {};
+ struct iphdr *iph;
+ int tun_flag = 0;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u32 ifindex;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ tkey.tunnel_ttl = 64;
+ if (is_ipv6) {
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ iph_len = sizeof(struct ipv6hdr);
+ protocol = ip6h->nexthdr;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(ip6h->payload_len);
+ off += iph_len;
+ if (protocol == IPPROTO_FRAGMENT) {
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_ICMPV6) {
+ action = parse_icmpv6(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV6_PLUS_ICMP_HDR;
+ } else {
+ memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+ memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+ }
+ } else {
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ protocol = iph->protocol;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(iph->tot_len);
+ off += IPV4_HDR_LEN_NO_OPT;
+
+ if (iph->frag_off & PCKT_FRAGMENTED)
+ return TC_ACT_SHOT;
+ if (protocol == IPPROTO_ICMP) {
+ action = parse_icmp(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV4_PLUS_ICMP_HDR;
+ } else {
+ pckt.src = iph->saddr;
+ pckt.dst = iph->daddr;
+ }
+ }
+ protocol = pckt.proto;
+
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else {
+ return TC_ACT_SHOT;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.daddr.v6, pckt.dstv6, 16);
+ else
+ vip.daddr.v4 = pckt.dst;
+
+ vip.dport = pckt.port16[1];
+ vip.protocol = pckt.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.dport = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return TC_ACT_SHOT;
+ pckt.port16[1] = 0;
+ }
+
+ if (vip_info->flags & F_HASH_NO_SRC_PORT)
+ pckt.port16[0] = 0;
+
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+ return TC_ACT_SHOT;
+
+ if (dst->flags & F_IPV6) {
+ cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+ tun_flag = BPF_F_TUNINFO_IPV6;
+ } else {
+ cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ tkey.remote_ipv4 = dst->dst;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return TC_ACT_SHOT;
+ data_stats->pkts++;
+ data_stats->bytes += pkt_bytes;
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+ *(u32 *)eth->eth_dest = tkey.remote_ipv4;
+ return bpf_redirect(ifindex, 0);
+}
+
+SEC("l4lb-demo")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return TC_ACT_SHOT;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == bpf_htons(ETH_P_IP))
+ return process_packet(data, nh_off, data_end, false, ctx);
+ else if (eth_proto == bpf_htons(ETH_P_IPV6))
+ return process_packet(data, nh_off, data_end, true, ctx);
+ else
+ return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6942753..6472ca9 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -21,8 +21,10 @@ typedef __u16 __sum16;
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/filter.h>
+#include <linux/perf_event.h>
#include <linux/unistd.h>
+#include <sys/ioctl.h>
#include <sys/wait.h>
#include <sys/resource.h>
#include <sys/types.h>
@@ -167,10 +169,9 @@ out:
#define NUM_ITER 100000
#define VIP_NUM 5
-static void test_l4lb(void)
+static void test_l4lb(const char *file)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
- const char *file = "./test_l4lb.o";
struct vip key = {.protocol = 6};
struct vip_meta {
__u32 flags;
@@ -247,6 +248,95 @@ out:
bpf_object__close(obj);
}
+static void test_l4lb_all(void)
+{
+ const char *file1 = "./test_l4lb.o";
+ const char *file2 = "./test_l4lb_noinline.o";
+
+ test_l4lb(file1);
+ test_l4lb(file2);
+}
+
+static void test_xdp_noinline(void)
+{
+ const char *file = "./test_xdp_noinline.o";
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct vip key = {.protocol = 6};
+ struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+ } value = {.vip_num = VIP_NUM};
+ __u32 stats_key = VIP_NUM;
+ struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+ } stats[nr_cpus];
+ struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+ } real_def = {.dst = MAGIC_VAL};
+ __u32 ch_key = 11, real_num = 3;
+ __u32 duration, retval, size;
+ int err, i, prog_fd, map_fd;
+ __u64 bytes = 0, pkts = 0;
+ struct bpf_object *obj;
+ char buf[128];
+ u32 *magic = (u32 *)buf;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ map_fd = bpf_find_map(__func__, obj, "vip_map");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &key, &value, 0);
+
+ map_fd = bpf_find_map(__func__, obj, "ch_rings");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+
+ map_fd = bpf_find_map(__func__, obj, "reals");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+
+ err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != 1 || size != 54 ||
+ *magic != MAGIC_VAL, "ipv4",
+ "err %d errno %d retval %d size %d magic %x\n",
+ err, errno, retval, size, *magic);
+
+ err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != 1 || size != 74 ||
+ *magic != MAGIC_VAL, "ipv6",
+ "err %d errno %d retval %d size %d magic %x\n",
+ err, errno, retval, size, *magic);
+
+ map_fd = bpf_find_map(__func__, obj, "stats");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_lookup_elem(map_fd, &stats_key, stats);
+ for (i = 0; i < nr_cpus; i++) {
+ bytes += stats[i].bytes;
+ pkts += stats[i].pkts;
+ }
+ if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
+ error_cnt++;
+ printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts);
+ }
+out:
+ bpf_object__close(obj);
+}
+
static void test_tcp_estats(void)
{
const char *file = "./test_tcp_estats.o";
@@ -617,6 +707,136 @@ static void test_obj_name(void)
}
}
+static void test_tp_attach_query(void)
+{
+ const int num_progs = 3;
+ int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs];
+ __u32 duration = 0, info_len, saved_prog_ids[num_progs];
+ const char *file = "./test_tracepoint.o";
+ struct perf_event_query_bpf *query;
+ struct perf_event_attr attr = {};
+ struct bpf_object *obj[num_progs];
+ struct bpf_prog_info prog_info;
+ char buf[256];
+
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ return;
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+ "read", "bytes %d errno %d\n", bytes, errno))
+ return;
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ query = malloc(sizeof(*query) + sizeof(__u32) * num_progs);
+ for (i = 0; i < num_progs; i++) {
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
+ &prog_fd[i]);
+ if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ goto cleanup1;
+
+ bzero(&prog_info, sizeof(prog_info));
+ prog_info.jited_prog_len = 0;
+ prog_info.xlated_prog_len = 0;
+ prog_info.nr_map_ids = 0;
+ info_len = sizeof(prog_info);
+ err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len);
+ if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n",
+ err, errno))
+ goto cleanup1;
+ saved_prog_ids[i] = prog_info.id;
+
+ pmu_fd[i] = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(pmu_fd[i] < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd[i], errno))
+ goto cleanup2;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+ err, errno))
+ goto cleanup3;
+
+ if (i == 0) {
+ /* check NULL prog array query */
+ query->ids_len = num_progs;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ if (CHECK(err || query->prog_cnt != 0,
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d query->prog_cnt %u\n",
+ err, errno, query->prog_cnt))
+ goto cleanup3;
+ }
+
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[i]);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+ err, errno))
+ goto cleanup3;
+
+ if (i == 1) {
+ /* try to get # of programs only */
+ query->ids_len = 0;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ if (CHECK(err || query->prog_cnt != 2,
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d query->prog_cnt %u\n",
+ err, errno, query->prog_cnt))
+ goto cleanup3;
+
+ /* try a few negative tests */
+ /* invalid query pointer */
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF,
+ (struct perf_event_query_bpf *)0x1);
+ if (CHECK(!err || errno != EFAULT,
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d\n", err, errno))
+ goto cleanup3;
+
+ /* no enough space */
+ query->ids_len = 1;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ if (CHECK(!err || errno != ENOSPC || query->prog_cnt != 2,
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d query->prog_cnt %u\n",
+ err, errno, query->prog_cnt))
+ goto cleanup3;
+ }
+
+ query->ids_len = num_progs;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ if (CHECK(err || query->prog_cnt != (i + 1),
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d query->prog_cnt %u\n",
+ err, errno, query->prog_cnt))
+ goto cleanup3;
+ for (j = 0; j < i + 1; j++)
+ if (CHECK(saved_prog_ids[j] != query->ids[j],
+ "perf_event_ioc_query_bpf",
+ "#%d saved_prog_id %x query prog_id %x\n",
+ j, saved_prog_ids[j], query->ids[j]))
+ goto cleanup3;
+ }
+
+ i = num_progs - 1;
+ for (; i >= 0; i--) {
+ cleanup3:
+ ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
+ cleanup2:
+ close(pmu_fd[i]);
+ cleanup1:
+ bpf_object__close(obj[i]);
+ }
+ free(query);
+}
+
int main(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
@@ -625,11 +845,13 @@ int main(void)
test_pkt_access();
test_xdp();
- test_l4lb();
+ test_l4lb_all();
+ test_xdp_noinline();
test_tcp_estats();
test_bpf_obj_id();
test_pkt_md_access();
test_obj_name();
+ test_tp_attach_query();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_tracepoint.c b/tools/testing/selftests/bpf/test_tracepoint.c
new file mode 100644
index 0000000..04bf084
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tracepoint.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+ unsigned long long pad;
+ char prev_comm[16];
+ int prev_pid;
+ int prev_prio;
+ long long prev_state;
+ char next_comm[16];
+ int next_pid;
+ int next_prio;
+};
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3c64f30..3bacff0 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2,6 +2,7 @@
* Testsuite for eBPF verifier
*
* Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -277,7 +278,7 @@ static struct bpf_test tests[] = {
.insns = {
BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2),
},
- .errstr = "jump out of range",
+ .errstr = "not an exit",
.result = REJECT,
},
{
@@ -5648,7 +5649,7 @@ static struct bpf_test tests[] = {
"helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
.insns = {
BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
@@ -5883,7 +5884,7 @@ static struct bpf_test tests[] = {
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
@@ -8097,6 +8098,1623 @@ static struct bpf_test tests[] = {
.result = REJECT,
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
},
+ {
+ "calls: basic sanity",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: not on unpriviledged",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: overlapping caller/callee",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "last insn is not an exit or jmp",
+ .result = REJECT,
+ },
+ {
+ "calls: wrong recursive calls",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "calls: wrong src reg",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "BPF_CALL uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "calls: wrong off value",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "BPF_CALL uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "calls: jump back loop",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge from insn 0 to 0",
+ .result = REJECT,
+ },
+ {
+ "calls: conditional call",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "calls: conditional call 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: conditional call 3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge from insn",
+ .result = REJECT,
+ },
+ {
+ "calls: conditional call 4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: conditional call 5",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge from insn",
+ .result = REJECT,
+ },
+ {
+ "calls: conditional call 6",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge from insn",
+ .result = REJECT,
+ },
+ {
+ "calls: using r0 returned by callee",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: using uninit r0 from callee",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "!read_ok",
+ .result = REJECT,
+ },
+ {
+ "calls: callee is using r1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_ACT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: callee using args1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "allowed for root only",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: callee using wrong args2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "R2 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "calls: callee using two args",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+ offsetof(struct __sk_buff, len)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6,
+ offsetof(struct __sk_buff, len)),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "allowed for root only",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: callee changing pkt pointers",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ /* clear_all_pkt_pointers() has to walk all frames
+ * to make sure that pkt pointers in the caller
+ * are cleared when callee is calling a helper that
+ * adjusts packet size
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_xdp_adjust_head),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R6 invalid mem access 'inv'",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "calls: two calls with args",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "calls: calls with stack arith",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "calls: calls with misaligned stack access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ .errstr = "misaligned stack access",
+ .result = REJECT,
+ },
+ {
+ "calls: calls control flow, jump test",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 43),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "calls: calls control flow, jump test 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 43),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "jump out of range from insn 1 to 4",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls with bad jump",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range from insn 11 to 9",
+ .result = REJECT,
+ },
+ {
+ "calls: recursive call. test1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "calls: recursive call. test2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "calls: unreachable code",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "unreachable insn 6",
+ .result = REJECT,
+ },
+ {
+ "calls: invalid call",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "invalid destination",
+ .result = REJECT,
+ },
+ {
+ "calls: invalid call 2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "invalid destination",
+ .result = REJECT,
+ },
+ {
+ "calls: jumping across function bodies. test1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "calls: jumping across function bodies. test2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "calls: call without exit",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "not an exit",
+ .result = REJECT,
+ },
+ {
+ "calls: call into middle of ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "last insn",
+ .result = REJECT,
+ },
+ {
+ "calls: call into middle of other call",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "last insn",
+ .result = REJECT,
+ },
+ {
+ "calls: ld_abs with changing ctx data in callee",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_vlan_push),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls with bad fallthrough",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "not an exit",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls with stack read",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+ },
+ {
+ "calls: two calls with stack write",
+ .insns = {
+ /* main prog */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_8),
+ /* write into stack frame of main prog */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* read from stack frame of main prog */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+ },
+ {
+ "calls: spill into caller stack frame",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "cannot spill",
+ .result = REJECT,
+ },
+ {
+ "calls: write into caller stack frame",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+ },
+ {
+ "calls: write into callee stack frame",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "cannot return stack pointer",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls with stack write and void return",
+ .insns = {
+ /* main prog */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* write into stack frame of main prog */
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_EXIT_INSN(), /* void return */
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+ },
+ {
+ "calls: ambiguous return value",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "allowed for root only",
+ .result_unpriv = REJECT,
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls that return map_value",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ /* fetch secound map_value_ptr from the stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ /* call 3rd function twice */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* first time with fp-8 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ /* second time with fp-16 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ /* lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr into stack frame of main prog */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(), /* return 0 */
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map1 = { 23 },
+ .result = ACCEPT,
+ },
+ {
+ "calls: two calls that return map_value with bool condition",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ /* call 3rd function twice */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* first time with fp-8 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ /* second time with fp-16 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ /* fetch secound map_value_ptr from the stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ /* lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(), /* return 0 */
+ /* write map_value_ptr into stack frame of main prog */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(), /* return 1 */
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map1 = { 23 },
+ .result = ACCEPT,
+ },
+ {
+ "calls: two calls that return map_value with incorrect bool check",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ /* call 3rd function twice */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* first time with fp-8 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ /* second time with fp-16 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ /* fetch secound map_value_ptr from the stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ /* lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(), /* return 0 */
+ /* write map_value_ptr into stack frame of main prog */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(), /* return 1 */
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map1 = { 23 },
+ .result = REJECT,
+ .errstr = "invalid read from stack off -16+0 size 8",
+ },
+ {
+ "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 1 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=8 off=2 size=8",
+ },
+ {
+ "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 1 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = ACCEPT,
+ },
+ {
+ "calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0), // 26
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34
+ BPF_JMP_IMM(BPF_JA, 0, 0, -30),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 1 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -8),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=8 off=2 size=8",
+ },
+ {
+ "calls: two calls that receive map_value_ptr_or_null via arg. test1",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 1 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = ACCEPT,
+ },
+ {
+ "calls: two calls that receive map_value_ptr_or_null via arg. test2",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 0 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'inv'",
+ },
+ {
+ "calls: pkt_ptr spill into caller stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ /* now the pkt range is verified, read pkt_ptr from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ /* Marking is still kept, but not in all cases safe. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ /* now the pkt range is verified, read pkt_ptr from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ /* Marking is still kept and safe here. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* now the pkt range is verified, read pkt_ptr from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ /* Check marking propagated. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ /* spill checked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "same insn cannot be used with different",
+ .result = REJECT,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ /* spill checked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "R4 invalid mem access",
+ .result = REJECT,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 7",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ /* spill checked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "R4 invalid mem access",
+ .result = REJECT,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 8",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ /* spill checked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 9",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ },
+ {
+ "calls: caller stack init to zero or map_value_or_null",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ /* fetch map_value_or_null or const_zero from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ /* store into map_value */
+ BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ /* if (ctx == 0) return; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
+ /* else bpf_map_lookup() and *(fp - 8) = r0 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 13 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "calls: stack init to zero and pruning",
+ .insns = {
+ /* first make allocated_stack 16 byte */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ /* now fork the execution such that the false branch
+ * of JGT insn will be verified second and it skisp zero
+ * init of fp-8 stack slot. If stack liveness marking
+ * is missing live_read marks from call map_lookup
+ * processing then pruning will incorrectly assume
+ * that fp-8 stack slot was unused in the fall-through
+ * branch and will accept the program incorrectly
+ */
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 6 },
+ .errstr = "invalid indirect read from stack off -8+0 size 8",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
};
static int probe_filter_length(const struct bpf_insn *fp)
diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c
new file mode 100644
index 0000000..5e4aac7
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_noinline.c
@@ -0,0 +1,833 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static __attribute__ ((noinline))
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static __attribute__ ((noinline))
+u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static __attribute__ ((noinline))
+u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+struct flow_key {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+};
+
+struct packet_description {
+ struct flow_key flow;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_definition {
+ union {
+ __be32 vip;
+ __be32 vipv6[4];
+ };
+ __u16 port;
+ __u16 family;
+ __u8 proto;
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_pos_lru {
+ __u32 pos;
+ __u64 atime;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct lb_stats {
+ __u64 v2;
+ __u64 v1;
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip_definition),
+ .value_size = sizeof(struct vip_meta),
+ .max_entries = 512,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = {
+ .type = BPF_MAP_TYPE_LRU_HASH,
+ .key_size = sizeof(struct flow_key),
+ .value_size = sizeof(struct real_pos_lru),
+ .max_entries = 300,
+ .map_flags = 1U << 1,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 12 * 655,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) reals = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct real_definition),
+ .max_entries = 40,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) stats = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct lb_stats),
+ .max_entries = 515,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct ctl_value),
+ .max_entries = 16,
+ .map_flags = 0,
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[6];
+ unsigned char eth_source[6];
+ unsigned short eth_proto;
+};
+
+static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
+{
+ __u64 off = sizeof(struct eth_hdr);
+ if (is_ipv6) {
+ off += sizeof(struct ipv6hdr);
+ if (is_icmp)
+ off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
+ } else {
+ off += sizeof(struct iphdr);
+ if (is_icmp)
+ off += sizeof(struct icmphdr) + sizeof(struct iphdr);
+ }
+ return off;
+}
+
+static __attribute__ ((noinline))
+bool parse_udp(void *data, void *data_end,
+ bool is_ipv6, struct packet_description *pckt)
+{
+
+ bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+ __u64 off = calc_offset(is_ipv6, is_icmp);
+ struct udphdr *udp;
+ udp = data + off;
+
+ if (udp + 1 > data_end)
+ return 0;
+ if (!is_icmp) {
+ pckt->flow.port16[0] = udp->source;
+ pckt->flow.port16[1] = udp->dest;
+ } else {
+ pckt->flow.port16[0] = udp->dest;
+ pckt->flow.port16[1] = udp->source;
+ }
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool parse_tcp(void *data, void *data_end,
+ bool is_ipv6, struct packet_description *pckt)
+{
+
+ bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+ __u64 off = calc_offset(is_ipv6, is_icmp);
+ struct tcphdr *tcp;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return 0;
+ if (tcp->syn)
+ pckt->flags |= (1 << 1);
+ if (!is_icmp) {
+ pckt->flow.port16[0] = tcp->source;
+ pckt->flow.port16[1] = tcp->dest;
+ } else {
+ pckt->flow.port16[0] = tcp->dest;
+ pckt->flow.port16[1] = tcp->source;
+ }
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
+ struct packet_description *pckt,
+ struct real_definition *dst, __u32 pkt_bytes)
+{
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+ struct ipv6hdr *ip6h;
+ __u32 ip_suffix;
+ void *data_end;
+ void *data;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+ return 0;
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ new_eth = data;
+ ip6h = data + sizeof(struct eth_hdr);
+ old_eth = data + sizeof(struct ipv6hdr);
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end || ip6h + 1 > data_end)
+ return 0;
+ memcpy(new_eth->eth_dest, cval->mac, 6);
+ memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+ new_eth->eth_proto = 56710;
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
+ ip6h->payload_len =
+ __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr));
+ ip6h->hop_limit = 4;
+
+ ip6h->saddr.in6_u.u6_addr32[0] = 1;
+ ip6h->saddr.in6_u.u6_addr32[1] = 2;
+ ip6h->saddr.in6_u.u6_addr32[2] = 3;
+ ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
+ memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
+ struct packet_description *pckt,
+ struct real_definition *dst, __u32 pkt_bytes)
+{
+
+ __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]);
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+ __u16 *next_iph_u16;
+ struct iphdr *iph;
+ __u32 csum = 0;
+ void *data_end;
+ void *data;
+
+ ip_suffix <<= 15;
+ ip_suffix ^= pckt->flow.src;
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+ return 0;
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ new_eth = data;
+ iph = data + sizeof(struct eth_hdr);
+ old_eth = data + sizeof(struct iphdr);
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end || iph + 1 > data_end)
+ return 0;
+ memcpy(new_eth->eth_dest, cval->mac, 6);
+ memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+ new_eth->eth_proto = 8;
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 1;
+ iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr));
+ /* don't update iph->daddr, since it will overwrite old eth_proto
+ * and multiple iterations of bpf_prog_run() will fail
+ */
+
+ iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
+ iph->ttl = 4;
+
+ next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+ for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+ csum += *next_iph_u16++;
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+ if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+ return 0;
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
+{
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+
+ old_eth = *data;
+ new_eth = *data + sizeof(struct ipv6hdr);
+ memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+ memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+ if (inner_v4)
+ new_eth->eth_proto = 8;
+ else
+ new_eth->eth_proto = 56710;
+ if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
+ return 0;
+ *data = (void *)(long)xdp->data;
+ *data_end = (void *)(long)xdp->data_end;
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
+{
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+
+ old_eth = *data;
+ new_eth = *data + sizeof(struct iphdr);
+ memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+ memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+ new_eth->eth_proto = 8;
+ if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+ return 0;
+ *data = (void *)(long)xdp->data;
+ *data_end = (void *)(long)xdp->data_end;
+ return 1;
+}
+
+static __attribute__ ((noinline))
+int swap_mac_and_send(void *data, void *data_end)
+{
+ unsigned char tmp_mac[6];
+ struct eth_hdr *eth;
+
+ eth = data;
+ memcpy(tmp_mac, eth->eth_source, 6);
+ memcpy(eth->eth_source, eth->eth_dest, 6);
+ memcpy(eth->eth_dest, tmp_mac, 6);
+ return XDP_TX;
+}
+
+static __attribute__ ((noinline))
+int send_icmp_reply(void *data, void *data_end)
+{
+ struct icmphdr *icmp_hdr;
+ __u16 *next_iph_u16;
+ __u32 tmp_addr = 0;
+ struct iphdr *iph;
+ __u32 csum1 = 0;
+ __u32 csum = 0;
+ __u64 off = 0;
+
+ if (data + sizeof(struct eth_hdr)
+ + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
+ return XDP_DROP;
+ off += sizeof(struct eth_hdr);
+ iph = data + off;
+ off += sizeof(struct iphdr);
+ icmp_hdr = data + off;
+ icmp_hdr->type = 0;
+ icmp_hdr->checksum += 0x0007;
+ iph->ttl = 4;
+ tmp_addr = iph->daddr;
+ iph->daddr = iph->saddr;
+ iph->saddr = tmp_addr;
+ iph->check = 0;
+ next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+ for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+ csum += *next_iph_u16++;
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+ return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int send_icmp6_reply(void *data, void *data_end)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+ __be32 tmp_addr[4];
+ __u64 off = 0;
+
+ if (data + sizeof(struct eth_hdr)
+ + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
+ return XDP_DROP;
+ off += sizeof(struct eth_hdr);
+ ip6h = data + off;
+ off += sizeof(struct ipv6hdr);
+ icmp_hdr = data + off;
+ icmp_hdr->icmp6_type = 129;
+ icmp_hdr->icmp6_cksum -= 0x0001;
+ ip6h->hop_limit = 4;
+ memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
+ memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
+ memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
+ return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return XDP_DROP;
+ if (icmp_hdr->icmp6_type == 128)
+ return send_icmp6_reply(data, data_end);
+ if (icmp_hdr->icmp6_type != 3)
+ return XDP_PASS;
+ off += sizeof(struct icmp6hdr);
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+ pckt->flow.proto = ip6h->nexthdr;
+ pckt->flags |= (1 << 0);
+ memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
+ memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
+ return -1;
+}
+
+static __attribute__ ((noinline))
+int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return XDP_DROP;
+ if (icmp_hdr->type == 8)
+ return send_icmp_reply(data, data_end);
+ if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
+ return XDP_PASS;
+ off += sizeof(struct icmphdr);
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+ if (iph->ihl != 5)
+ return XDP_DROP;
+ pckt->flow.proto = iph->protocol;
+ pckt->flags |= (1 << 0);
+ pckt->flow.src = iph->daddr;
+ pckt->flow.dst = iph->saddr;
+ return -1;
+}
+
+static __attribute__ ((noinline))
+__u32 get_packet_hash(struct packet_description *pckt,
+ bool hash_16bytes)
+{
+ if (hash_16bytes)
+ return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
+ pckt->flow.ports, 24);
+ else
+ return jhash_2words(pckt->flow.src, pckt->flow.ports,
+ 24);
+}
+
+__attribute__ ((noinline))
+static bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6, void *lru_map)
+{
+ struct real_pos_lru new_dst_lru = { };
+ bool hash_16bytes = is_ipv6;
+ __u32 *real_pos, hash, key;
+ __u64 cur_time;
+
+ if (vip_info->flags & (1 << 2))
+ hash_16bytes = 1;
+ if (vip_info->flags & (1 << 3)) {
+ pckt->flow.port16[0] = pckt->flow.port16[1];
+ memset(pckt->flow.srcv6, 0, 16);
+ }
+ hash = get_packet_hash(pckt, hash_16bytes);
+ if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
+ hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+ return 0;
+ key = 2 * vip_info->vip_num + hash % 2;
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return 0;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return 0;
+ if (!(vip_info->flags & (1 << 1))) {
+ __u32 conn_rate_key = 512 + 2;
+ struct lb_stats *conn_rate_stats =
+ bpf_map_lookup_elem(&stats, &conn_rate_key);
+
+ if (!conn_rate_stats)
+ return 1;
+ cur_time = bpf_ktime_get_ns();
+ if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
+ conn_rate_stats->v1 = 1;
+ conn_rate_stats->v2 = cur_time;
+ } else {
+ conn_rate_stats->v1 += 1;
+ if (conn_rate_stats->v1 >= 1)
+ return 1;
+ }
+ if (pckt->flow.proto == IPPROTO_UDP)
+ new_dst_lru.atime = cur_time;
+ new_dst_lru.pos = key;
+ bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
+ }
+ return 1;
+}
+
+__attribute__ ((noinline))
+static void connection_table_lookup(struct real_definition **real,
+ struct packet_description *pckt,
+ void *lru_map)
+{
+
+ struct real_pos_lru *dst_lru;
+ __u64 cur_time;
+ __u32 key;
+
+ dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
+ if (!dst_lru)
+ return;
+ if (pckt->flow.proto == IPPROTO_UDP) {
+ cur_time = bpf_ktime_get_ns();
+ if (cur_time - dst_lru->atime > 300000)
+ return;
+ dst_lru->atime = cur_time;
+ }
+ key = dst_lru->pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+}
+
+/* don't believe your eyes!
+ * below function has 6 arguments whereas bpf and llvm allow maximum of 5
+ * but since it's _static_ llvm can optimize one argument away
+ */
+__attribute__ ((noinline))
+static int process_l3_headers_v6(struct packet_description *pckt,
+ __u8 *protocol, __u64 off,
+ __u16 *pkt_bytes, void *data,
+ void *data_end)
+{
+ struct ipv6hdr *ip6h;
+ __u64 iph_len;
+ int action;
+
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+ iph_len = sizeof(struct ipv6hdr);
+ *protocol = ip6h->nexthdr;
+ pckt->flow.proto = *protocol;
+ *pkt_bytes = __builtin_bswap16(ip6h->payload_len);
+ off += iph_len;
+ if (*protocol == 45) {
+ return XDP_DROP;
+ } else if (*protocol == 59) {
+ action = parse_icmpv6(data, data_end, off, pckt);
+ if (action >= 0)
+ return action;
+ } else {
+ memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
+ memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
+ }
+ return -1;
+}
+
+__attribute__ ((noinline))
+static int process_l3_headers_v4(struct packet_description *pckt,
+ __u8 *protocol, __u64 off,
+ __u16 *pkt_bytes, void *data,
+ void *data_end)
+{
+ struct iphdr *iph;
+ __u64 iph_len;
+ int action;
+
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+ if (iph->ihl != 5)
+ return XDP_DROP;
+ *protocol = iph->protocol;
+ pckt->flow.proto = *protocol;
+ *pkt_bytes = __builtin_bswap16(iph->tot_len);
+ off += 20;
+ if (iph->frag_off & 65343)
+ return XDP_DROP;
+ if (*protocol == IPPROTO_ICMP) {
+ action = parse_icmp(data, data_end, off, pckt);
+ if (action >= 0)
+ return action;
+ } else {
+ pckt->flow.src = iph->saddr;
+ pckt->flow.dst = iph->daddr;
+ }
+ return -1;
+}
+
+__attribute__ ((noinline))
+static int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct xdp_md *xdp)
+{
+
+ struct real_definition *dst = NULL;
+ struct packet_description pckt = { };
+ struct vip_definition vip = { };
+ struct lb_stats *data_stats;
+ struct eth_hdr *eth = data;
+ void *lru_map = &lru_cache;
+ struct vip_meta *vip_info;
+ __u32 lru_stats_key = 513;
+ __u32 mac_addr_pos = 0;
+ __u32 stats_key = 512;
+ struct ctl_value *cval;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ if (is_ipv6)
+ action = process_l3_headers_v6(&pckt, &protocol, off,
+ &pkt_bytes, data, data_end);
+ else
+ action = process_l3_headers_v4(&pckt, &protocol, off,
+ &pkt_bytes, data, data_end);
+ if (action >= 0)
+ return action;
+ protocol = pckt.flow.proto;
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(data, data_end, is_ipv6, &pckt))
+ return XDP_DROP;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(data, data_end, is_ipv6, &pckt))
+ return XDP_DROP;
+ } else {
+ return XDP_TX;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.vipv6, pckt.flow.dstv6, 16);
+ else
+ vip.vip = pckt.flow.dst;
+ vip.port = pckt.flow.port16[1];
+ vip.proto = pckt.flow.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.port = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return XDP_PASS;
+ if (!(vip_info->flags & (1 << 4)))
+ pckt.flow.port16[1] = 0;
+ }
+ if (data_end - data > 1400)
+ return XDP_DROP;
+ data_stats = bpf_map_lookup_elem(&stats, &stats_key);
+ if (!data_stats)
+ return XDP_DROP;
+ data_stats->v1 += 1;
+ if (!dst) {
+ if (vip_info->flags & (1 << 0))
+ pckt.flow.port16[0] = 0;
+ if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
+ connection_table_lookup(&dst, &pckt, lru_map);
+ if (dst)
+ goto out;
+ if (pckt.flow.proto == IPPROTO_TCP) {
+ struct lb_stats *lru_stats =
+ bpf_map_lookup_elem(&stats, &lru_stats_key);
+
+ if (!lru_stats)
+ return XDP_DROP;
+ if (pckt.flags & (1 << 1))
+ lru_stats->v1 += 1;
+ else
+ lru_stats->v2 += 1;
+ }
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
+ return XDP_DROP;
+ data_stats->v2 += 1;
+ }
+out:
+ cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
+ if (!cval)
+ return XDP_DROP;
+ if (dst->flags & (1 << 0)) {
+ if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
+ return XDP_DROP;
+ } else {
+ if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
+ return XDP_DROP;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return XDP_DROP;
+ data_stats->v1 += 1;
+ data_stats->v2 += pkt_bytes;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ if (data + 4 > data_end)
+ return XDP_DROP;
+ *(u32 *)data = dst->dst;
+ return XDP_DROP;
+}
+
+__attribute__ ((section("xdp-test"), used))
+int balancer_ingress(struct xdp_md *ctx)
+{
+ void *data = (void *)(long)ctx->data;
+ void *data_end = (void *)(long)ctx->data_end;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == 8)
+ return process_packet(data, nh_off, data_end, 0, ctx);
+ else if (eth_proto == 56710)
+ return process_packet(data, nh_off, data_end, 1, ctx);
+ else
+ return XDP_DROP;
+}
+
+char _license[] __attribute__ ((section("license"), used)) = "GPL";
+int _version __attribute__ ((section("version"), used)) = 1;
OpenPOWER on IntegriCloud