diff options
Diffstat (limited to 'tools')
54 files changed, 8019 insertions, 297 deletions
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 07a66974..c8ec0ae 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -9,6 +9,35 @@ MAKE = make CFLAGS += -Wall -O2 CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +FEATURE_USER = .bpf +FEATURE_TESTS = libbfd disassembler-four-args +FEATURE_DISPLAY = libbfd disassembler-four-args + +check_feat := 1 +NON_CHECK_FEAT_TARGETS := clean bpftool_clean +ifdef MAKECMDGOALS +ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) + check_feat := 0 +endif +endif + +ifeq ($(check_feat),1) +ifeq ($(FEATURES_DUMP),) +include $(srctree)/tools/build/Makefile.feature +else +include $(FEATURES_DUMP) +endif +endif + +ifeq ($(feature-disassembler-four-args), 1) +CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE +endif + %.yacc.c: %.y $(YACC) -o $@ -d $< diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c index 75bf526..58c2bab 100644 --- a/tools/bpf/bpf_jit_disasm.c +++ b/tools/bpf/bpf_jit_disasm.c @@ -72,7 +72,14 @@ static void get_asm_insns(uint8_t *image, size_t len, int opcodes) disassemble_init_for_target(&info); +#ifdef DISASM_FOUR_ARGS_SIGNATURE + disassemble = disassembler(info.arch, + bfd_big_endian(bfdf), + info.mach, + bfdf); +#else disassemble = disassembler(bfdf); +#endif assert(disassemble); do { @@ -165,7 +172,8 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, { char *ptr, *pptr, *tmp; off_t off = 0; - int ret, flen, proglen, pass, ulen = 0; + unsigned int proglen; + int ret, flen, pass, ulen = 0; regmatch_t pmatch[1]; unsigned long base; regex_t regex; @@ -192,7 +200,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, } ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so); - ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx", + ret = sscanf(ptr, "flen=%d proglen=%u pass=%d image=%lx", &flen, &proglen, &pass, &base); if (ret != 4) { regfree(®ex); @@ -232,7 +240,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, } assert(ulen == proglen); - printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n", + printf("%u bytes emitted from JIT compiler (pass:%d, flen:%d)\n", proglen, pass, flen); printf("%lx + <x>:\n", base); diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index 37292bb..c462a92 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -3,12 +3,16 @@ include ../../../scripts/utilities.mak INSTALL ?= install RM ?= rm -f +RMDIR ?= rmdir --ignore-fail-on-non-empty -# Make the path relative to DESTDIR, not prefix -ifndef DESTDIR -prefix ?= /usr/local +ifeq ($(V),1) + Q = +else + Q = @ endif -mandir ?= $(prefix)/share/man + +prefix ?= /usr/local +mandir ?= $(prefix)/man man8dir = $(mandir)/man8 MAN8_RST = $(wildcard *.rst) @@ -20,15 +24,21 @@ man: man8 man8: $(DOC_MAN8) $(OUTPUT)%.8: %.rst - rst2man $< > $@ + $(QUIET_GEN)rst2man $< > $@ clean: - $(call QUIET_CLEAN, Documentation) $(RM) $(DOC_MAN8) + $(call QUIET_CLEAN, Documentation) + $(Q)$(RM) $(DOC_MAN8) install: man - $(call QUIET_INSTALL, Documentation-man) \ - $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir); \ - $(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir); + $(call QUIET_INSTALL, Documentation-man) + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir) + $(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir) + +uninstall: + $(call QUIET_UNINST, Documentation-man) + $(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8)) + $(Q)$(RMDIR) $(DESTDIR)$(man8dir) -.PHONY: man man8 clean install +.PHONY: man man8 clean install uninstall .DEFAULT_GOAL := man diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst new file mode 100644 index 0000000..2fe2a1b --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -0,0 +1,118 @@ +================ +bpftool-cgroup +================ +------------------------------------------------------------------------------- +tool for inspection and simple manipulation of eBPF progs +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **cgroup** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } + + *COMMANDS* := + { **show** | **list** | **attach** | **detach** | **help** } + +MAP COMMANDS +============= + +| **bpftool** **cgroup { show | list }** *CGROUP* +| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] +| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* +| **bpftool** **cgroup help** +| +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *ATTACH_TYPE* := { *ingress* | *egress* | *sock_create* | *sock_ops* | *device* } +| *ATTACH_FLAGS* := { *multi* | *override* } + +DESCRIPTION +=========== + **bpftool cgroup { show | list }** *CGROUP* + List all programs attached to the cgroup *CGROUP*. + + Output will start with program ID followed by attach type, + attach flags and program name. + + **bpftool cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] + Attach program *PROG* to the cgroup *CGROUP* with attach type + *ATTACH_TYPE* and optional *ATTACH_FLAGS*. + + *ATTACH_FLAGS* can be one of: **override** if a sub-cgroup installs + some bpf program, the program in this cgroup yields to sub-cgroup + program; **multi** if a sub-cgroup installs some bpf program, + that cgroup program gets run in addition to the program in this + cgroup. + + Only one program is allowed to be attached to a cgroup with + no attach flags or the **override** flag. Attaching another + program will release old program and attach the new one. + + Multiple programs are allowed to be attached to a cgroup with + **multi**. They are executed in FIFO order (those that were + attached first, run first). + + Non-default *ATTACH_FLAGS* are supported by kernel version 4.14 + and later. + + *ATTACH_TYPE* can be on of: + **ingress** ingress path of the inet socket (since 4.10); + **egress** egress path of the inet socket (since 4.10); + **sock_create** opening of an inet socket (since 4.10); + **sock_ops** various socket operations (since 4.12); + **device** device access (since 4.15). + + **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* + Detach *PROG* from the cgroup *CGROUP* and attach type + *ATTACH_TYPE*. + + **bpftool prog help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + + -f, --bpffs + Show file names of pinned programs. + +EXAMPLES +======== +| +| **# mount -t bpf none /sys/fs/bpf/** +| **# mkdir /sys/fs/cgroup/test.slice** +| **# bpftool prog load ./device_cgroup.o /sys/fs/bpf/prog** +| **# bpftool cgroup attach /sys/fs/cgroup/test.slice/ device id 1 allow_multi** + +**# bpftool cgroup list /sys/fs/cgroup/test.slice/** + +:: + + ID AttachType AttachFlags Name + 1 device allow_multi bpf_prog1 + +| +| **# bpftool cgroup detach /sys/fs/cgroup/test.slice/ device id 1** +| **# bpftool cgroup list /sys/fs/cgroup/test.slice/** + +:: + + ID AttachType AttachFlags Name + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 9f51a26..0ab32b3 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -15,13 +15,13 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** + { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** | **pin** | **help** } MAP COMMANDS ============= -| **bpftool** **map show** [*MAP*] +| **bpftool** **map { show | list }** [*MAP*] | **bpftool** **map dump** *MAP* | **bpftool** **map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] | **bpftool** **map lookup** *MAP* **key** *BYTES* @@ -36,7 +36,7 @@ MAP COMMANDS DESCRIPTION =========== - **bpftool map show** [*MAP*] + **bpftool map { show | list }** [*MAP*] Show information about loaded maps. If *MAP* is specified show information only about given map, otherwise list all maps currently loaded on the system. @@ -128,4 +128,4 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-prog**\ (8) + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 36e8d1c..e4ceee7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -15,22 +15,23 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **dump xlated** | **dump jited** | **pin** | **help** } + { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** } MAP COMMANDS ============= -| **bpftool** **prog show** [*PROG*] +| **bpftool** **prog { show | list }** [*PROG*] | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog pin** *PROG* *FILE* +| **bpftool** **prog load** *OBJ* *FILE* | **bpftool** **prog help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } DESCRIPTION =========== - **bpftool prog show** [*PROG*] + **bpftool prog { show | list }** [*PROG*] Show information about loaded programs. If *PROG* is specified show information only about given program, otherwise list all programs currently loaded on the system. @@ -57,6 +58,11 @@ DESCRIPTION Note: *FILE* must be located in *bpffs* mount. + **bpftool prog load** *OBJ* *FILE* + Load bpf program from binary *OBJ* and pin as *FILE*. + + Note: *FILE* must be located in *bpffs* mount. + **bpftool prog help** Print short help message. @@ -126,8 +132,10 @@ EXAMPLES | | **# mount -t bpf none /sys/fs/bpf/** | **# bpftool prog pin id 10 /sys/fs/bpf/prog** +| **# bpftool prog load ./my_prog.o /sys/fs/bpf/prog2** | **# ls -l /sys/fs/bpf/** | -rw------- 1 root root 0 Jul 22 01:43 prog +| -rw------- 1 root root 0 Jul 22 01:44 prog2 **# bpftool prog dum jited pinned /sys/fs/bpf/prog opcodes** @@ -147,4 +155,4 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-map**\ (8) + **bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 926c03d..20689a3 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -16,17 +16,19 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** } + *OBJECT* := { **map** | **program** | **cgroup** } *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } | { **-j** | **--json** } [{ **-p** | **--pretty** }] } *MAP-COMMANDS* := - { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** + { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** | **pin** | **help** } - *PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin** - | **help** } + *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** + | **load** | **help** } + + *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } DESCRIPTION =========== @@ -53,4 +55,4 @@ OPTIONS SEE ALSO ======== - **bpftool-map**\ (8), **bpftool-prog**\ (8) + **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index ec3052c..26901ec 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -1,25 +1,10 @@ include ../../scripts/Makefile.include - include ../../scripts/utilities.mak ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) -#$(info Determined 'srctree' to be $(srctree)) -endif - -ifneq ($(objtree),) -#$(info Determined 'objtree' to be $(objtree)) -endif - -ifneq ($(OUTPUT),) -#$(info Determined 'OUTPUT' to be $(OUTPUT)) -# Adding $(OUTPUT) as a directory to look for source files, -# because use generated output files as sources dependency -# for flex/bison parsers. -VPATH += $(OUTPUT) -export VPATH endif ifeq ($(V),1) @@ -28,16 +13,18 @@ else Q = @ endif -BPF_DIR = $(srctree)/tools/lib/bpf/ +BPF_DIR = $(srctree)/tools/lib/bpf/ ifneq ($(OUTPUT),) - BPF_PATH=$(OUTPUT) + BPF_PATH = $(OUTPUT) else - BPF_PATH=$(BPF_DIR) + BPF_PATH = $(BPF_DIR) endif LIBBPF = $(BPF_PATH)libbpf.a +BPFTOOL_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion) + $(LIBBPF): FORCE $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) @@ -45,22 +32,50 @@ $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null -prefix = /usr/local +prefix ?= /usr/local bash_compdir ?= /usr/share/bash-completion/completions CC = gcc CFLAGS += -O2 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ +CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ +CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' LIBS = -lelf -lbfd -lopcodes $(LIBBPF) +INSTALL ?= install +RM ?= rm -f + +FEATURE_USER = .bpftool +FEATURE_TESTS = libbfd disassembler-four-args +FEATURE_DISPLAY = libbfd disassembler-four-args + +check_feat := 1 +NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall +ifdef MAKECMDGOALS +ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) + check_feat := 0 +endif +endif + +ifeq ($(check_feat),1) +ifeq ($(FEATURES_DUMP),) +include $(srctree)/tools/build/Makefile.feature +else +include $(FEATURES_DUMP) +endif +endif + +ifeq ($(feature-disassembler-four-args), 1) +CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE +endif + include $(wildcard *.d) all: $(OUTPUT)bpftool -SRCS=$(wildcard *.c) -OBJS=$(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o +SRCS = $(wildcard *.c) +OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< @@ -73,21 +88,34 @@ $(OUTPUT)%.o: %.c clean: $(LIBBPF)-clean $(call QUIET_CLEAN, bpftool) - $(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + $(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + +install: $(OUTPUT)bpftool + $(call QUIET_INSTALL, bpftool) + $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/sbin + $(Q)$(INSTALL) $(OUTPUT)bpftool $(DESTDIR)$(prefix)/sbin/bpftool + $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir) + $(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir) -install: - install -m 0755 -d $(prefix)/sbin - install $(OUTPUT)bpftool $(prefix)/sbin/bpftool - install -m 0755 -d $(bash_compdir) - install -m 0644 bash-completion/bpftool $(bash_compdir) +uninstall: + $(call QUIET_UNINST, bpftool) + $(Q)$(RM) $(DESTDIR)$(prefix)/sbin/bpftool + $(Q)$(RM) $(DESTDIR)$(bash_compdir)/bpftool doc: - $(Q)$(MAKE) -C Documentation/ + $(call descend,Documentation) + +doc-clean: + $(call descend,Documentation,clean) doc-install: - $(Q)$(MAKE) -C Documentation/ install + $(call descend,Documentation,install) + +doc-uninstall: + $(call descend,Documentation,uninstall) FORCE: -.PHONY: all clean FORCE install doc doc-install +.PHONY: all FORCE clean install uninstall +.PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 7febee0..0137866 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -197,7 +197,7 @@ _bpftool() local PROG_TYPE='id pinned tag' case $command in - show) + show|list) [[ $prev != "$command" ]] && return 0 COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) return 0 @@ -232,7 +232,7 @@ _bpftool() ;; *) [[ $prev == $object ]] && \ - COMPREPLY=( $( compgen -W 'dump help pin show' -- \ + COMPREPLY=( $( compgen -W 'dump help pin show list' -- \ "$cur" ) ) ;; esac @@ -240,7 +240,7 @@ _bpftool() map) local MAP_TYPE='id pinned' case $command in - show|dump) + show|list|dump) case $prev in $command) COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) @@ -343,7 +343,7 @@ _bpftool() *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'delete dump getnext help \ - lookup pin show update' -- "$cur" ) ) + lookup pin show list update' -- "$cur" ) ) ;; esac ;; diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c new file mode 100644 index 0000000..cae32a6 --- /dev/null +++ b/tools/bpf/bpftool/cgroup.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2017 Facebook +// Author: Roman Gushchin <guro@fb.com> + +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <bpf.h> + +#include "main.h" + +#define HELP_SPEC_ATTACH_FLAGS \ + "ATTACH_FLAGS := { multi | override }" + +#define HELP_SPEC_ATTACH_TYPES \ + "ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }" + +static const char * const attach_type_strings[] = { + [BPF_CGROUP_INET_INGRESS] = "ingress", + [BPF_CGROUP_INET_EGRESS] = "egress", + [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", + [BPF_CGROUP_SOCK_OPS] = "sock_ops", + [BPF_CGROUP_DEVICE] = "device", + [__MAX_BPF_ATTACH_TYPE] = NULL, +}; + +static enum bpf_attach_type parse_attach_type(const char *str) +{ + enum bpf_attach_type type; + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + if (attach_type_strings[type] && + is_prefix(str, attach_type_strings[type])) + return type; + } + + return __MAX_BPF_ATTACH_TYPE; +} + +static int show_bpf_prog(int id, const char *attach_type_str, + const char *attach_flags_str) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int prog_fd; + + prog_fd = bpf_prog_get_fd_by_id(id); + if (prog_fd < 0) + return -1; + + if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) { + close(prog_fd); + return -1; + } + + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "id", info.id); + jsonw_string_field(json_wtr, "attach_type", + attach_type_str); + jsonw_string_field(json_wtr, "attach_flags", + attach_flags_str); + jsonw_string_field(json_wtr, "name", info.name); + jsonw_end_object(json_wtr); + } else { + printf("%-8u %-15s %-15s %-15s\n", info.id, + attach_type_str, + attach_flags_str, + info.name); + } + + close(prog_fd); + return 0; +} + +static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) +{ + __u32 prog_ids[1024] = {0}; + char *attach_flags_str; + __u32 prog_cnt, iter; + __u32 attach_flags; + char buf[32]; + int ret; + + prog_cnt = ARRAY_SIZE(prog_ids); + ret = bpf_prog_query(cgroup_fd, type, 0, &attach_flags, prog_ids, + &prog_cnt); + if (ret) + return ret; + + if (prog_cnt == 0) + return 0; + + switch (attach_flags) { + case BPF_F_ALLOW_MULTI: + attach_flags_str = "multi"; + break; + case BPF_F_ALLOW_OVERRIDE: + attach_flags_str = "override"; + break; + case 0: + attach_flags_str = ""; + break; + default: + snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags); + attach_flags_str = buf; + } + + for (iter = 0; iter < prog_cnt; iter++) + show_bpf_prog(prog_ids[iter], attach_type_strings[type], + attach_flags_str); + + return 0; +} + +static int do_show(int argc, char **argv) +{ + enum bpf_attach_type type; + int cgroup_fd; + int ret = -1; + + if (argc < 1) { + p_err("too few parameters for cgroup show"); + goto exit; + } else if (argc > 1) { + p_err("too many parameters for cgroup show"); + goto exit; + } + + cgroup_fd = open(argv[0], O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s", argv[1]); + goto exit; + } + + if (json_output) + jsonw_start_array(json_wtr); + else + printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType", + "AttachFlags", "Name"); + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + /* + * Not all attach types may be supported, so it's expected, + * that some requests will fail. + * If we were able to get the show for at least one + * attach type, let's return 0. + */ + if (show_attached_bpf_progs(cgroup_fd, type) == 0) + ret = 0; + } + + if (json_output) + jsonw_end_array(json_wtr); + + close(cgroup_fd); +exit: + return ret; +} + +static int do_attach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int cgroup_fd, prog_fd; + int attach_flags = 0; + int ret = -1; + int i; + + if (argc < 4) { + p_err("too few parameters for cgroup attach"); + goto exit; + } + + cgroup_fd = open(argv[0], O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s", argv[1]); + goto exit; + } + + attach_type = parse_attach_type(argv[1]); + if (attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach type"); + goto exit_cgroup; + } + + argc -= 2; + argv = &argv[2]; + prog_fd = prog_parse_fd(&argc, &argv); + if (prog_fd < 0) + goto exit_cgroup; + + for (i = 0; i < argc; i++) { + if (is_prefix(argv[i], "multi")) { + attach_flags |= BPF_F_ALLOW_MULTI; + } else if (is_prefix(argv[i], "override")) { + attach_flags |= BPF_F_ALLOW_OVERRIDE; + } else { + p_err("unknown option: %s", argv[i]); + goto exit_cgroup; + } + } + + if (bpf_prog_attach(prog_fd, cgroup_fd, attach_type, attach_flags)) { + p_err("failed to attach program"); + goto exit_prog; + } + + if (json_output) + jsonw_null(json_wtr); + + ret = 0; + +exit_prog: + close(prog_fd); +exit_cgroup: + close(cgroup_fd); +exit: + return ret; +} + +static int do_detach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int prog_fd, cgroup_fd; + int ret = -1; + + if (argc < 4) { + p_err("too few parameters for cgroup detach"); + goto exit; + } + + cgroup_fd = open(argv[0], O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s", argv[1]); + goto exit; + } + + attach_type = parse_attach_type(argv[1]); + if (attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach type"); + goto exit_cgroup; + } + + argc -= 2; + argv = &argv[2]; + prog_fd = prog_parse_fd(&argc, &argv); + if (prog_fd < 0) + goto exit_cgroup; + + if (bpf_prog_detach2(prog_fd, cgroup_fd, attach_type)) { + p_err("failed to detach program"); + goto exit_prog; + } + + if (json_output) + jsonw_null(json_wtr); + + ret = 0; + +exit_prog: + close(prog_fd); +exit_cgroup: + close(cgroup_fd); +exit: + return ret; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s %s { show | list } CGROUP\n" + " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n" + " %s %s detach CGROUP ATTACH_TYPE PROG\n" + " %s %s help\n" + "\n" + " " HELP_SPEC_ATTACH_TYPES "\n" + " " HELP_SPEC_ATTACH_FLAGS "\n" + " " HELP_SPEC_PROGRAM "\n" + " " HELP_SPEC_OPTIONS "\n" + "", + bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "list", do_show }, + { "attach", do_attach }, + { "detach", do_detach }, + { "help", do_help }, + { 0 } +}; + +int do_cgroup(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 2bd3b28..0b482c0 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -34,6 +34,7 @@ /* Author: Jakub Kicinski <kubakici@wp.pl> */ #include <errno.h> +#include <fcntl.h> #include <fts.h> #include <libgen.h> #include <mntent.h> @@ -44,7 +45,9 @@ #include <unistd.h> #include <linux/limits.h> #include <linux/magic.h> +#include <net/if.h> #include <sys/mount.h> +#include <sys/stat.h> #include <sys/types.h> #include <sys/vfs.h> @@ -163,13 +166,49 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) return fd; } -int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +int do_pin_fd(int fd, const char *name) { char err_str[ERR_MAX_LEN]; - unsigned int id; - char *endptr; char *file; char *dir; + int err = 0; + + err = bpf_obj_pin(fd, name); + if (!err) + goto out; + + file = malloc(strlen(name) + 1); + strcpy(file, name); + dir = dirname(file); + + if (errno != EPERM || is_bpffs(dir)) { + p_err("can't pin the object (%s): %s", name, strerror(errno)); + goto out_free; + } + + /* Attempt to mount bpffs, then retry pinning. */ + err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); + if (!err) { + err = bpf_obj_pin(fd, name); + if (err) + p_err("can't pin the object (%s): %s", name, + strerror(errno)); + } else { + err_str[ERR_MAX_LEN - 1] = '\0'; + p_err("can't mount BPF file system to pin the object (%s): %s", + name, err_str); + } + +out_free: + free(file); +out: + return err; +} + +int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +{ + unsigned int id; + char *endptr; int err; int fd; @@ -195,35 +234,8 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) return -1; } - err = bpf_obj_pin(fd, *argv); - if (!err) - goto out_close; - - file = malloc(strlen(*argv) + 1); - strcpy(file, *argv); - dir = dirname(file); - - if (errno != EPERM || is_bpffs(dir)) { - p_err("can't pin the object (%s): %s", *argv, strerror(errno)); - goto out_free; - } - - /* Attempt to mount bpffs, then retry pinning. */ - err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); - if (!err) { - err = bpf_obj_pin(fd, *argv); - if (err) - p_err("can't pin the object (%s): %s", *argv, - strerror(errno)); - } else { - err_str[ERR_MAX_LEN - 1] = '\0'; - p_err("can't mount BPF file system to pin the object (%s): %s", - *argv, err_str); - } + err = do_pin_fd(fd, *argv); -out_free: - free(file); -out_close: close(fd); return err; } @@ -403,3 +415,124 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab) free(obj); } } + +static char * +ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) +{ + struct stat st; + int err; + + err = stat("/proc/self/ns/net", &st); + if (err) { + p_err("Can't stat /proc/self: %s", strerror(errno)); + return NULL; + } + + if (st.st_dev != ns_dev || st.st_ino != ns_ino) + return NULL; + + return if_indextoname(ifindex, buf); +} + +static int read_sysfs_hex_int(char *path) +{ + char vendor_id_buf[8]; + int len; + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) { + p_err("Can't open %s: %s", path, strerror(errno)); + return -1; + } + + len = read(fd, vendor_id_buf, sizeof(vendor_id_buf)); + close(fd); + if (len < 0) { + p_err("Can't read %s: %s", path, strerror(errno)); + return -1; + } + if (len >= (int)sizeof(vendor_id_buf)) { + p_err("Value in %s too long", path); + return -1; + } + + vendor_id_buf[len] = 0; + + return strtol(vendor_id_buf, NULL, 0); +} + +static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name) +{ + char full_path[64]; + + snprintf(full_path, sizeof(full_path), "/sys/class/net/%s/device/%s", + devname, entry_name); + + return read_sysfs_hex_int(full_path); +} + +const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino) +{ + char devname[IF_NAMESIZE]; + int vendor_id; + int device_id; + + if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) { + p_err("Can't get net device name for ifindex %d: %s", ifindex, + strerror(errno)); + return NULL; + } + + vendor_id = read_sysfs_netdev_hex_int(devname, "vendor"); + if (vendor_id < 0) { + p_err("Can't get device vendor id for %s", devname); + return NULL; + } + + switch (vendor_id) { + case 0x19ee: + device_id = read_sysfs_netdev_hex_int(devname, "device"); + if (device_id != 0x4000 && + device_id != 0x6000 && + device_id != 0x6003) + p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); + return "NFP-6xxx"; + default: + p_err("Can't get bfd arch name for device vendor id 0x%04x", + vendor_id); + return NULL; + } +} + +void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) +{ + char name[IF_NAMESIZE]; + + if (!ifindex) + return; + + printf(" dev "); + if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name)) + printf("%s", name); + else + printf("ifindex %u ns_dev %llu ns_ino %llu", + ifindex, ns_dev, ns_inode); +} + +void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) +{ + char name[IF_NAMESIZE]; + + if (!ifindex) + return; + + jsonw_name(json_wtr, "dev"); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "ifindex", ifindex); + jsonw_uint_field(json_wtr, "ns_dev", ns_dev); + jsonw_uint_field(json_wtr, "ns_inode", ns_inode); + if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name)) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_end_object(json_wtr); +} diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 1551d39..8743932 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -76,7 +76,8 @@ static int fprintf_json(void *out, const char *fmt, ...) return 0; } -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch) { disassembler_ftype disassemble; struct disassemble_info info; @@ -100,6 +101,19 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) else init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf); + + /* Update architecture info for offload. */ + if (arch) { + const bfd_arch_info_type *inf = bfd_scan_arch(arch); + + if (inf) { + bfdf->arch_info = inf; + } else { + p_err("No libfd support for %s", arch); + return; + } + } + info.arch = bfd_get_arch(bfdf); info.mach = bfd_get_mach(bfdf); info.buffer = image; @@ -107,7 +121,14 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) disassemble_init_for_target(&info); +#ifdef DISASM_FOUR_ARGS_SIGNATURE + disassemble = disassembler(info.arch, + bfd_big_endian(bfdf), + info.mach, + bfdf); +#else disassemble = disassembler(bfdf); +#endif assert(disassemble); if (json_output) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index d294bc8..3a0396d 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -38,7 +38,6 @@ #include <errno.h> #include <getopt.h> #include <linux/bpf.h> -#include <linux/version.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -85,7 +84,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map }\n" + " OBJECT := { prog | map | cgroup }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -95,21 +94,13 @@ static int do_help(int argc, char **argv) static int do_version(int argc, char **argv) { - unsigned int version[3]; - - version[0] = LINUX_VERSION_CODE >> 16; - version[1] = LINUX_VERSION_CODE >> 8 & 0xf; - version[2] = LINUX_VERSION_CODE & 0xf; - if (json_output) { jsonw_start_object(json_wtr); jsonw_name(json_wtr, "version"); - jsonw_printf(json_wtr, "\"%u.%u.%u\"", - version[0], version[1], version[2]); + jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION); jsonw_end_object(json_wtr); } else { - printf("%s v%u.%u.%u\n", bin_name, - version[0], version[1], version[2]); + printf("%s v%s\n", bin_name, BPFTOOL_VERSION); } return 0; } @@ -173,6 +164,7 @@ static const struct cmd cmds[] = { { "batch", do_batch }, { "prog", do_prog }, { "map", do_map }, + { "cgroup", do_cgroup }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index bff330b..b8e9584 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -96,6 +96,8 @@ struct pinned_obj { int build_pinned_obj_table(struct pinned_obj_table *table, enum bpf_obj_type type); void delete_pinned_obj_table(struct pinned_obj_table *tab); +void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); +void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); struct cmd { const char *cmd; @@ -111,13 +113,18 @@ char *get_fdinfo(int fd, const char *key); int open_obj_pinned(char *path); int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); +int do_pin_fd(int fd, const char *name); int do_prog(int argc, char **arg); int do_map(int argc, char **arg); +int do_cgroup(int argc, char **arg); int prog_parse_fd(int *argc, char ***argv); -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes); +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch); void print_hex_data_json(uint8_t *data, size_t len); +const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); + #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index a8c3a33..f95fa67 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -66,6 +66,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", [BPF_MAP_TYPE_DEVMAP] = "devmap", [BPF_MAP_TYPE_SOCKMAP] = "sockmap", + [BPF_MAP_TYPE_CPUMAP] = "cpumap", }; static unsigned int get_possible_cpus(void) @@ -428,6 +429,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_name(json_wtr, "flags"); jsonw_printf(json_wtr, "%#x", info->map_flags); + + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); + jsonw_uint_field(json_wtr, "bytes_key", info->key_size); jsonw_uint_field(json_wtr, "bytes_value", info->value_size); jsonw_uint_field(json_wtr, "max_entries", info->max_entries); @@ -469,7 +473,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (*info->name) printf("name %s ", info->name); - printf("flags 0x%x\n", info->map_flags); + printf("flags 0x%x", info->map_flags); + print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); + printf("\n"); printf("\tkey %uB value %uB max_entries %u", info->key_size, info->value_size, info->max_entries); @@ -861,7 +867,7 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s %s show [MAP]\n" + "Usage: %s %s { show | list } [MAP]\n" " %s %s dump MAP\n" " %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n" " %s %s lookup MAP key BYTES\n" @@ -885,6 +891,7 @@ static int do_help(int argc, char **argv) static const struct cmd cmds[] = { { "show", do_show }, + { "list", do_show }, { "help", do_help }, { "dump", do_dump }, { "update", do_update }, diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index dded773..e8e2baa 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -45,6 +45,7 @@ #include <sys/stat.h> #include <bpf.h> +#include <libbpf.h> #include "main.h" #include "disasm.h" @@ -65,6 +66,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", }; static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) @@ -229,6 +231,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) info->tag[0], info->tag[1], info->tag[2], info->tag[3], info->tag[4], info->tag[5], info->tag[6], info->tag[7]); + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); + if (info->load_time) { char buf[32]; @@ -286,6 +290,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); + print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); printf("\n"); if (info->load_time) { @@ -402,6 +407,88 @@ static int do_show(int argc, char **argv) return err; } +#define SYM_MAX_NAME 256 + +struct kernel_sym { + unsigned long address; + char name[SYM_MAX_NAME]; +}; + +struct dump_data { + unsigned long address_call_base; + struct kernel_sym *sym_mapping; + __u32 sym_count; + char scratch_buff[SYM_MAX_NAME]; +}; + +static int kernel_syms_cmp(const void *sym_a, const void *sym_b) +{ + return ((struct kernel_sym *)sym_a)->address - + ((struct kernel_sym *)sym_b)->address; +} + +static void kernel_syms_load(struct dump_data *dd) +{ + struct kernel_sym *sym; + char buff[256]; + void *tmp, *address; + FILE *fp; + + fp = fopen("/proc/kallsyms", "r"); + if (!fp) + return; + + while (!feof(fp)) { + if (!fgets(buff, sizeof(buff), fp)) + break; + tmp = realloc(dd->sym_mapping, + (dd->sym_count + 1) * + sizeof(*dd->sym_mapping)); + if (!tmp) { +out: + free(dd->sym_mapping); + dd->sym_mapping = NULL; + fclose(fp); + return; + } + dd->sym_mapping = tmp; + sym = &dd->sym_mapping[dd->sym_count]; + if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2) + continue; + sym->address = (unsigned long)address; + if (!strcmp(sym->name, "__bpf_call_base")) { + dd->address_call_base = sym->address; + /* sysctl kernel.kptr_restrict was set */ + if (!sym->address) + goto out; + } + if (sym->address) + dd->sym_count++; + } + + fclose(fp); + + qsort(dd->sym_mapping, dd->sym_count, + sizeof(*dd->sym_mapping), kernel_syms_cmp); +} + +static void kernel_syms_destroy(struct dump_data *dd) +{ + free(dd->sym_mapping); +} + +static struct kernel_sym *kernel_syms_search(struct dump_data *dd, + unsigned long key) +{ + struct kernel_sym sym = { + .address = key, + }; + + return dd->sym_mapping ? + bsearch(&sym, dd->sym_mapping, dd->sym_count, + sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL; +} + static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) { va_list args; @@ -411,8 +498,71 @@ static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) va_end(args); } -static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes) +static const char *print_call_pcrel(struct dump_data *dd, + struct kernel_sym *sym, + unsigned long address, + const struct bpf_insn *insn) +{ + if (sym) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%+d#%s", insn->off, sym->name); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%+d#0x%lx", insn->off, address); + return dd->scratch_buff; +} + +static const char *print_call_helper(struct dump_data *dd, + struct kernel_sym *sym, + unsigned long address) +{ + if (sym) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%s", sym->name); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "0x%lx", address); + return dd->scratch_buff; +} + +static const char *print_call(void *private_data, + const struct bpf_insn *insn) +{ + struct dump_data *dd = private_data; + unsigned long address = dd->address_call_base + insn->imm; + struct kernel_sym *sym; + + sym = kernel_syms_search(dd, address); + if (insn->src_reg == BPF_PSEUDO_CALL) + return print_call_pcrel(dd, sym, address, insn); + else + return print_call_helper(dd, sym, address); +} + +static const char *print_imm(void *private_data, + const struct bpf_insn *insn, + __u64 full_imm) +{ + struct dump_data *dd = private_data; + + if (insn->src_reg == BPF_PSEUDO_MAP_FD) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "map[id:%u]", insn->imm); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "0x%llx", (unsigned long long)full_imm); + return dd->scratch_buff; +} + +static void dump_xlated_plain(struct dump_data *dd, void *buf, + unsigned int len, bool opcodes) { + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn, + .cb_call = print_call, + .cb_imm = print_imm, + .private_data = dd, + }; struct bpf_insn *insn = buf; bool double_insn = false; unsigned int i; @@ -426,7 +576,7 @@ static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes) double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); printf("% 4d: ", i); - print_bpf_insn(print_insn, NULL, insn + i, true); + print_bpf_insn(&cbs, NULL, insn + i, true); if (opcodes) { printf(" "); @@ -455,8 +605,15 @@ static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...) va_end(args); } -static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) +static void dump_xlated_json(struct dump_data *dd, void *buf, + unsigned int len, bool opcodes) { + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn_json, + .cb_call = print_call, + .cb_imm = print_imm, + .private_data = dd, + }; struct bpf_insn *insn = buf; bool double_insn = false; unsigned int i; @@ -471,7 +628,7 @@ static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) jsonw_start_object(json_wtr); jsonw_name(json_wtr, "disasm"); - print_bpf_insn(print_insn_json, NULL, insn + i, true); + print_bpf_insn(&cbs, NULL, insn + i, true); if (opcodes) { jsonw_name(json_wtr, "opcodes"); @@ -506,6 +663,7 @@ static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) static int do_dump(int argc, char **argv) { struct bpf_prog_info info = {}; + struct dump_data dd = {}; __u32 len = sizeof(info); unsigned int buf_size; char *filepath = NULL; @@ -593,6 +751,14 @@ static int do_dump(int argc, char **argv) goto err_free; } + if ((member_len == &info.jited_prog_len && + info.jited_prog_insns == 0) || + (member_len == &info.xlated_prog_len && + info.xlated_prog_insns == 0)) { + p_err("error retrieving insn dump: kernel.kptr_restrict set?"); + goto err_free; + } + if (filepath) { fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd < 0) { @@ -609,17 +775,29 @@ static int do_dump(int argc, char **argv) goto err_free; } } else { - if (member_len == &info.jited_prog_len) - disasm_print_insn(buf, *member_len, opcodes); - else + if (member_len == &info.jited_prog_len) { + const char *name = NULL; + + if (info.ifindex) { + name = ifindex_to_bfd_name_ns(info.ifindex, + info.netns_dev, + info.netns_ino); + if (!name) + goto err_free; + } + + disasm_print_insn(buf, *member_len, opcodes, name); + } else { + kernel_syms_load(&dd); if (json_output) - dump_xlated_json(buf, *member_len, opcodes); + dump_xlated_json(&dd, buf, *member_len, opcodes); else - dump_xlated_plain(buf, *member_len, opcodes); + dump_xlated_plain(&dd, buf, *member_len, opcodes); + kernel_syms_destroy(&dd); + } } free(buf); - return 0; err_free: @@ -637,6 +815,30 @@ static int do_pin(int argc, char **argv) return err; } +static int do_load(int argc, char **argv) +{ + struct bpf_object *obj; + int prog_fd; + + if (argc != 2) + usage(); + + if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) { + p_err("failed to load program"); + return -1; + } + + if (do_pin_fd(prog_fd, argv[1])) { + p_err("failed to pin program"); + return -1; + } + + if (json_output) + jsonw_null(json_wtr); + + return 0; +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -645,26 +847,29 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s %s show [PROG]\n" + "Usage: %s %s { show | list } [PROG]\n" " %s %s dump xlated PROG [{ file FILE | opcodes }]\n" " %s %s dump jited PROG [{ file FILE | opcodes }]\n" " %s %s pin PROG FILE\n" + " %s %s load OBJ FILE\n" " %s %s help\n" "\n" " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); return 0; } static const struct cmd cmds[] = { { "show", do_show }, + { "list", do_show }, { "help", do_help }, { "dump", do_dump }, { "pin", do_pin }, + { "load", do_load }, { 0 } }; diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 59585fe..0a490cb 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -13,6 +13,7 @@ FILES= \ test-hello.bin \ test-libaudit.bin \ test-libbfd.bin \ + test-disassembler-four-args.bin \ test-liberty.bin \ test-liberty-z.bin \ test-cplus-demangle.bin \ @@ -197,6 +198,9 @@ $(OUTPUT)test-libpython-version.bin: $(OUTPUT)test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl +$(OUTPUT)test-disassembler-four-args.bin: + $(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes + $(OUTPUT)test-liberty.bin: $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty diff --git a/tools/build/feature/test-disassembler-four-args.c b/tools/build/feature/test-disassembler-four-args.c new file mode 100644 index 0000000..45ce65c --- /dev/null +++ b/tools/build/feature/test-disassembler-four-args.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bfd.h> +#include <dis-asm.h> + +int main(void) +{ + bfd *abfd = bfd_openr(NULL, NULL); + + disassembler(bfd_get_arch(abfd), + bfd_big_endian(abfd), + bfd_get_mach(abfd), + abfd); + + return 0; +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4c223ab..db6bdc3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -17,7 +17,7 @@ #define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ -#define BPF_DW 0x18 /* double word */ +#define BPF_DW 0x18 /* double word (64-bit) */ #define BPF_XADD 0xc0 /* exclusive add */ /* alu/jmp fields */ @@ -197,8 +197,14 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 +/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative + * offset to another bpf function + */ +#define BPF_PSEUDO_CALL 1 + /* flags for BPF_MAP_UPDATE_ELEM command */ #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ @@ -239,6 +245,7 @@ union bpf_attr { * BPF_F_NUMA_NODE is set). */ char map_name[BPF_OBJ_NAME_LEN]; + __u32 map_ifindex; /* ifindex of netdev to create on */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -635,6 +642,14 @@ union bpf_attr { * @optlen: length of optval in bytes * Return: 0 or negative error * + * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags) + * Set callback flags for sock_ops + * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct + * @flags: flags value + * Return: 0 for no error + * -EINVAL if there is no full tcp socket + * bits in flags that are not supported by current kernel + * * int bpf_skb_adjust_room(skb, len_diff, mode, flags) * Grow or shrink room in sk_buff. * @skb: pointer to skb @@ -677,6 +692,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +755,9 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), \ + FN(sock_ops_cb_flags_set), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -888,6 +909,9 @@ struct xdp_md { __u32 data; __u32 data_end; __u32 data_meta; + /* Below access go through struct xdp_rxq_info */ + __u32 ingress_ifindex; /* rxq->dev->ifindex */ + __u32 rx_queue_index; /* rxq->queue_index */ }; enum sk_action { @@ -910,6 +934,9 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); struct bpf_map_info { @@ -920,6 +947,9 @@ struct bpf_map_info { __u32 max_entries; __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops @@ -931,8 +961,9 @@ struct bpf_map_info { struct bpf_sock_ops { __u32 op; union { - __u32 reply; - __u32 replylong[4]; + __u32 args[4]; /* Optionally passed to bpf program */ + __u32 reply; /* Returned by bpf program */ + __u32 replylong[4]; /* Optionally returned by bpf prog */ }; __u32 family; __u32 remote_ip4; /* Stored in network byte order */ @@ -941,8 +972,45 @@ struct bpf_sock_ops { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 is_fullsock; /* Some TCP fields are only valid if + * there is a full socket. If not, the + * fields read as zero. + */ + __u32 snd_cwnd; + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ + __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ + __u32 state; + __u32 rtt_min; + __u32 snd_ssthresh; + __u32 rcv_nxt; + __u32 snd_nxt; + __u32 snd_una; + __u32 mss_cache; + __u32 ecn_flags; + __u32 rate_delivered; + __u32 rate_interval_us; + __u32 packets_out; + __u32 retrans_out; + __u32 total_retrans; + __u32 segs_in; + __u32 data_segs_in; + __u32 segs_out; + __u32 data_segs_out; + __u32 lost_out; + __u32 sacked_out; + __u32 sk_txhash; + __u64 bytes_received; + __u64 bytes_acked; }; +/* Definitions for bpf_sock_ops_cb_flags */ +#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) +#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) +#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently + * supported cb flags + */ + /* List of known BPF sock_ops operators. * New entries can only be added at the end */ @@ -976,6 +1044,43 @@ enum { * a congestion threshold. RTTs above * this indicate congestion */ + BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. + * Arg1: value of icsk_retransmits + * Arg2: value of icsk_rto + * Arg3: whether RTO has expired + */ + BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. + * Arg1: sequence number of 1st byte + * Arg2: # segments + * Arg3: return value of + * tcp_transmit_skb (0 => success) + */ + BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. + * Arg1: old_state + * Arg2: new_state + */ +}; + +/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect + * changes between the TCP and BPF versions. Ideally this should never happen. + * If it does, we need to add code to convert them before calling + * the BPF sock_ops function. + */ +enum { + BPF_TCP_ESTABLISHED = 1, + BPF_TCP_SYN_SENT, + BPF_TCP_SYN_RECV, + BPF_TCP_FIN_WAIT1, + BPF_TCP_FIN_WAIT2, + BPF_TCP_TIME_WAIT, + BPF_TCP_CLOSE, + BPF_TCP_CLOSE_WAIT, + BPF_TCP_LAST_ACK, + BPF_TCP_LISTEN, + BPF_TCP_CLOSING, /* Now a valid state */ + BPF_TCP_NEW_SYN_RECV, + + BPF_TCP_MAX_STATES /* Leave at the end! */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ @@ -995,7 +1100,8 @@ struct bpf_perf_event_value { #define BPF_DEVCG_DEV_CHAR (1ULL << 1) struct bpf_cgroup_dev_ctx { - __u32 access_type; /* (access << 16) | type */ + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ + __u32 access_type; __u32 major; __u32 minor; }; diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index c77c9a2..e0739a1 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -418,6 +418,27 @@ struct perf_event_attr { __u16 __reserved_2; /* align to __u64 */ }; +/* + * Structure used by below PERF_EVENT_IOC_QUERY_BPF command + * to query bpf programs attached to the same perf tracepoint + * as the given perf event. + */ +struct perf_event_query_bpf { + /* + * The below ids array length + */ + __u32 ids_len; + /* + * Set by the kernel to indicate the number of + * available programs + */ + __u32 prog_cnt; + /* + * User provided buffer to store program ids + */ + __u32 ids[0]; +}; + #define perf_flags(attr) (*(&(attr)->read_format + 1)) /* @@ -433,6 +454,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) #define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 4555304..83714ca 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -93,7 +93,6 @@ export prefix libdir src obj # Shell quotes libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) LIB_FILE = libbpf.a libbpf.so @@ -150,7 +149,7 @@ CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) -all: fixdep $(VERSION_FILES) all_cmd +all: fixdep all_cmd all_cmd: $(CMD_TARGETS) @@ -169,21 +168,11 @@ $(OUTPUT)libbpf.so: $(BPF_IN) $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ -define update_dir - (echo $1 > $@.tmp; \ - if [ -r $@ ] && cmp -s $@ $@.tmp; then \ - rm -f $@.tmp; \ - else \ - echo ' UPDATE $@'; \ - mv -f $@.tmp $@; \ - fi); -endef - define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ fi; \ - $(INSTALL) $1 '$(DESTDIR_SQ)$2' + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' endef install_lib: all_cmd @@ -192,7 +181,8 @@ install_lib: all_cmd install_headers: $(call QUIET_INSTALL, headers) \ - $(call do_install,bpf.h,$(prefix)/include/bpf,644) + $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ + $(call do_install,libbpf.h,$(prefix)/include/bpf,644); install: install_lib @@ -203,7 +193,7 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \ + $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \ $(RM) LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf @@ -213,10 +203,10 @@ PHONY += force elfdep bpfdep force: elfdep: - @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit -1 ; fi + @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi bpfdep: - @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit -1 ; fi + @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 6534889..9f44c19 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -40,7 +40,7 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, __u32 map_flags); /* Recommend log buffer size */ -#define BPF_LOG_BUF_SIZE 65536 +#define BPF_LOG_BUF_SIZE (256 * 1024) int bpf_load_program_name(enum bpf_prog_type type, const char *name, const struct bpf_insn *insns, size_t insns_cnt, const char *license, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5aa45f8..30c7763 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -174,12 +174,19 @@ struct bpf_program { char *name; char *section_name; struct bpf_insn *insns; - size_t insns_cnt; + size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; - struct { + struct reloc_desc { + enum { + RELO_LD64, + RELO_CALL, + } type; int insn_idx; - int map_idx; + union { + int map_idx; + int text_off; + }; } *reloc_desc; int nr_reloc; @@ -234,6 +241,7 @@ struct bpf_object { } *reloc; int nr_reloc; int maps_shndx; + int text_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -375,9 +383,13 @@ bpf_object__init_prog_names(struct bpf_object *obj) size_t pi, si; for (pi = 0; pi < obj->nr_programs; pi++) { - char *name = NULL; + const char *name = NULL; prog = &obj->programs[pi]; + if (prog->idx == obj->efile.text_shndx) { + name = ".text"; + goto skip_search; + } for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; si++) { @@ -387,6 +399,8 @@ bpf_object__init_prog_names(struct bpf_object *obj) continue; if (sym.st_shndx != prog->idx) continue; + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL) + continue; name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, @@ -403,7 +417,7 @@ bpf_object__init_prog_names(struct bpf_object *obj) prog->section_name); return -EINVAL; } - +skip_search: prog->name = strdup(name); if (!prog->name) { pr_warning("failed to allocate memory for prog sym %s\n", @@ -793,6 +807,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if ((sh.sh_type == SHT_PROGBITS) && (sh.sh_flags & SHF_EXECINSTR) && (data->d_size > 0)) { + if (strcmp(name, ".text") == 0) + obj->efile.text_shndx = idx; err = bpf_object__add_program(obj, data->d_buf, data->d_size, name, idx); if (err) { @@ -854,11 +870,14 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) } static int -bpf_program__collect_reloc(struct bpf_program *prog, - size_t nr_maps, GElf_Shdr *shdr, - Elf_Data *data, Elf_Data *symbols, - int maps_shndx, struct bpf_map *maps) +bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, + Elf_Data *data, struct bpf_object *obj) { + Elf_Data *symbols = obj->efile.symbols; + int text_shndx = obj->efile.text_shndx; + int maps_shndx = obj->efile.maps_shndx; + struct bpf_map *maps = obj->maps; + size_t nr_maps = obj->nr_maps; int i, nrels; pr_debug("collecting relocating info for: '%s'\n", @@ -891,8 +910,11 @@ bpf_program__collect_reloc(struct bpf_program *prog, GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } + pr_debug("relo for %lld value %lld name %d\n", + (long long) (rel.r_info >> 32), + (long long) sym.st_value, sym.st_name); - if (sym.st_shndx != maps_shndx) { + if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) { pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", prog->section_name, sym.st_shndx); return -LIBBPF_ERRNO__RELOC; @@ -901,6 +923,17 @@ bpf_program__collect_reloc(struct bpf_program *prog, insn_idx = rel.r_offset / sizeof(struct bpf_insn); pr_debug("relocation: insn_idx=%u\n", insn_idx); + if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) { + if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) { + pr_warning("incorrect bpf_call opcode\n"); + return -LIBBPF_ERRNO__RELOC; + } + prog->reloc_desc[i].type = RELO_CALL; + prog->reloc_desc[i].insn_idx = insn_idx; + prog->reloc_desc[i].text_off = sym.st_value; + continue; + } + if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", insn_idx, insns[insn_idx].code); @@ -922,6 +955,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__RELOC; } + prog->reloc_desc[i].type = RELO_LD64; prog->reloc_desc[i].insn_idx = insn_idx; prog->reloc_desc[i].map_idx = map_idx; } @@ -961,27 +995,76 @@ bpf_object__create_maps(struct bpf_object *obj) } static int +bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, + struct reloc_desc *relo) +{ + struct bpf_insn *insn, *new_insn; + struct bpf_program *text; + size_t new_cnt; + + if (relo->type != RELO_CALL) + return -LIBBPF_ERRNO__RELOC; + + if (prog->idx == obj->efile.text_shndx) { + pr_warning("relo in .text insn %d into off %d\n", + relo->insn_idx, relo->text_off); + return -LIBBPF_ERRNO__RELOC; + } + + if (prog->main_prog_cnt == 0) { + text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); + if (!text) { + pr_warning("no .text section found yet relo into text exist\n"); + return -LIBBPF_ERRNO__RELOC; + } + new_cnt = prog->insns_cnt + text->insns_cnt; + new_insn = realloc(prog->insns, new_cnt * sizeof(*insn)); + if (!new_insn) { + pr_warning("oom in prog realloc\n"); + return -ENOMEM; + } + memcpy(new_insn + prog->insns_cnt, text->insns, + text->insns_cnt * sizeof(*insn)); + prog->insns = new_insn; + prog->main_prog_cnt = prog->insns_cnt; + prog->insns_cnt = new_cnt; + } + insn = &prog->insns[relo->insn_idx]; + insn->imm += prog->main_prog_cnt - relo->insn_idx; + pr_debug("added %zd insn from %s to prog %s\n", + text->insns_cnt, text->section_name, prog->section_name); + return 0; +} + +static int bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) { - int i; + int i, err; if (!prog || !prog->reloc_desc) return 0; for (i = 0; i < prog->nr_reloc; i++) { - int insn_idx, map_idx; - struct bpf_insn *insns = prog->insns; + if (prog->reloc_desc[i].type == RELO_LD64) { + struct bpf_insn *insns = prog->insns; + int insn_idx, map_idx; - insn_idx = prog->reloc_desc[i].insn_idx; - map_idx = prog->reloc_desc[i].map_idx; + insn_idx = prog->reloc_desc[i].insn_idx; + map_idx = prog->reloc_desc[i].map_idx; - if (insn_idx >= (int)prog->insns_cnt) { - pr_warning("relocation out of range: '%s'\n", - prog->section_name); - return -LIBBPF_ERRNO__RELOC; + if (insn_idx >= (int)prog->insns_cnt) { + pr_warning("relocation out of range: '%s'\n", + prog->section_name); + return -LIBBPF_ERRNO__RELOC; + } + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + insns[insn_idx].imm = obj->maps[map_idx].fd; + } else { + err = bpf_program__reloc_text(prog, obj, + &prog->reloc_desc[i]); + if (err) + return err; } - insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; - insns[insn_idx].imm = obj->maps[map_idx].fd; } zfree(&prog->reloc_desc); @@ -1024,7 +1107,6 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) Elf_Data *data = obj->efile.reloc[i].data; int idx = shdr->sh_info; struct bpf_program *prog; - size_t nr_maps = obj->nr_maps; if (shdr->sh_type != SHT_REL) { pr_warning("internal error at %d\n", __LINE__); @@ -1038,11 +1120,9 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) return -LIBBPF_ERRNO__RELOC; } - err = bpf_program__collect_reloc(prog, nr_maps, + err = bpf_program__collect_reloc(prog, shdr, data, - obj->efile.symbols, - obj->efile.maps_shndx, - obj->maps); + obj); if (err) return err; } @@ -1195,6 +1275,8 @@ bpf_object__load_progs(struct bpf_object *obj) int err; for (i = 0; i < obj->nr_programs; i++) { + if (obj->programs[i].idx == obj->efile.text_shndx) + continue; err = bpf_program__load(&obj->programs[i], obj->license, obj->kern_version); @@ -1721,6 +1803,45 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); +#define BPF_PROG_SEC(string, type) { string, sizeof(string) - 1, type } +static const struct { + const char *sec; + size_t len; + enum bpf_prog_type prog_type; +} section_names[] = { + BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), + BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), + BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), + BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), + BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), + BPF_PROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK), + BPF_PROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE), + BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS), + BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB), +}; +#undef BPF_PROG_SEC + +static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog) +{ + int i; + + if (!prog->section_name) + goto err; + + for (i = 0; i < ARRAY_SIZE(section_names); i++) + if (strncmp(prog->section_name, section_names[i].sec, + section_names[i].len) == 0) + return section_names[i].prog_type; + +err: + pr_warning("failed to guess program type based on section name %s\n", + prog->section_name); + + return BPF_PROG_TYPE_UNSPEC; +} + int bpf_map__fd(struct bpf_map *map) { return map ? map->fd : -EINVAL; @@ -1818,7 +1939,7 @@ long libbpf_get_error(const void *ptr) int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd) { - struct bpf_program *prog; + struct bpf_program *prog, *first_prog = NULL; struct bpf_object *obj; int err; @@ -1826,13 +1947,30 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, if (IS_ERR(obj)) return -ENOENT; - prog = bpf_program__next(NULL, obj); - if (!prog) { + bpf_object__for_each_program(prog, obj) { + /* + * If type is not specified, try to guess it based on + * section name. + */ + if (type == BPF_PROG_TYPE_UNSPEC) { + type = bpf_program__guess_type(prog); + if (type == BPF_PROG_TYPE_UNSPEC) { + bpf_object__close(obj); + return -EINVAL; + } + } + + bpf_program__set_type(prog, type); + if (prog->idx != obj->efile.text_shndx && !first_prog) + first_prog = prog; + } + + if (!first_prog) { + pr_warning("object file doesn't contain bpf program\n"); bpf_object__close(obj); return -ENOENT; } - bpf_program__set_type(prog, type); err = bpf_object__load(obj); if (err) { bpf_object__close(obj); @@ -1840,6 +1978,6 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, } *pobj = obj; - *prog_fd = bpf_program__fd(prog); + *prog_fd = bpf_program__fd(first_prog); return 0; } diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 3fab179..fcb3ed0 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -99,5 +99,6 @@ ifneq ($(silent),1) QUIET_CLEAN = @printf ' CLEAN %s\n' $1; QUIET_INSTALL = @printf ' INSTALL %s\n' $1; + QUIET_UNINST = @printf ' UNINST %s\n' $1; endif endif diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 541d9d7..1e09d77 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -3,3 +3,10 @@ test_maps test_lru_map test_lpm_map test_tag +FEATURE-DUMP.libbpf +fixdep +test_align +test_dev_cgroup +test_progs +test_verifier_log +feature diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 9316e64..bf05bc5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -11,16 +11,19 @@ ifneq ($(wildcard $(GENHDR)),) endif CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include -LDLIBS += -lcap -lelf -lrt +LDLIBS += -lcap -lelf -lrt -lpthread TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_align test_verifier_log test_dev_cgroup + test_align test_verifier_log test_dev_cgroup test_tcpbpf_user TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o dev_cgroup.o + sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ + test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ + sample_map_ret0.o test_tcpbpf_kern.o -TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh +TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ + test_offload.py include ../lib.mk @@ -49,8 +52,13 @@ else CPU ?= generic endif +CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ + -Wno-compare-distinct-pointer-types + +$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline +$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline + %.o: %.c - $(CLANG) -I. -I./include/uapi -I../../../include/uapi \ - -Wno-compare-distinct-pointer-types \ + $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index fd9a17f..dde2c11 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -71,6 +71,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_getsockopt; +static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) = + (void *) BPF_FUNC_sock_ops_cb_flags_set; static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = (void *) BPF_FUNC_sk_redirect_map; static int (*bpf_sock_map_update)(void *map, void *key, void *value, @@ -82,7 +84,8 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_prog_read_value; - +static int (*bpf_override_return)(void *ctx, unsigned long rc) = + (void *) BPF_FUNC_override_return; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 52d53ed..983dd25 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -3,3 +3,5 @@ CONFIG_BPF_SYSCALL=y CONFIG_NET_CLS_BPF=m CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m +CONFIG_CGROUP_BPF=y +CONFIG_NETDEVSIM=m diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/sample_map_ret0.c new file mode 100644 index 0000000..0756303 --- /dev/null +++ b/tools/testing/selftests/bpf/sample_map_ret0.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") htab = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +struct bpf_map_def SEC("maps") array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +/* Sample program which should always load for testing control paths. */ +SEC(".text") int func() +{ + __u64 key64 = 0; + __u32 key = 0; + long *value; + + value = bpf_map_lookup_elem(&htab, &key); + if (!value) + return 1; + value = bpf_map_lookup_elem(&array, &key64); + if (!value) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/bpf/sample_ret0.c b/tools/testing/selftests/bpf/sample_ret0.c new file mode 100644 index 0000000..fec9975 --- /dev/null +++ b/tools/testing/selftests/bpf/sample_ret0.c @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ + +/* Sample program which should always load for testing control paths. */ +int func() +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py new file mode 100755 index 0000000..481dccd --- /dev/null +++ b/tools/testing/selftests/bpf/tcp_client.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python2 +# +# SPDX-License-Identifier: GPL-2.0 +# + +import sys, os, os.path, getopt +import socket, time +import subprocess +import select + +def read(sock, n): + buf = '' + while len(buf) < n: + rem = n - len(buf) + try: s = sock.recv(rem) + except (socket.error), e: return '' + buf += s + return buf + +def send(sock, s): + total = len(s) + count = 0 + while count < total: + try: n = sock.send(s) + except (socket.error), e: n = 0 + if n == 0: + return count; + count += n + return count + + +serverPort = int(sys.argv[1]) +HostName = socket.gethostname() + +# create active socket +sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) +try: + sock.connect((HostName, serverPort)) +except socket.error as e: + sys.exit(1) + +buf = '' +n = 0 +while n < 1000: + buf += '+' + n += 1 + +sock.settimeout(1); +n = send(sock, buf) +n = read(sock, 500) +sys.exit(0) diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py new file mode 100755 index 0000000..bc454d7 --- /dev/null +++ b/tools/testing/selftests/bpf/tcp_server.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python2 +# +# SPDX-License-Identifier: GPL-2.0 +# + +import sys, os, os.path, getopt +import socket, time +import subprocess +import select + +def read(sock, n): + buf = '' + while len(buf) < n: + rem = n - len(buf) + try: s = sock.recv(rem) + except (socket.error), e: return '' + buf += s + return buf + +def send(sock, s): + total = len(s) + count = 0 + while count < total: + try: n = sock.send(s) + except (socket.error), e: n = 0 + if n == 0: + return count; + count += n + return count + + +SERVER_PORT = 12877 +MAX_PORTS = 2 + +serverPort = SERVER_PORT +serverSocket = None + +HostName = socket.gethostname() + +# create passive socket +serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) +host = socket.gethostname() + +try: serverSocket.bind((host, 0)) +except socket.error as msg: + print 'bind fails: ', msg + +sn = serverSocket.getsockname() +serverPort = sn[1] + +cmdStr = ("./tcp_client.py %d &") % (serverPort) +os.system(cmdStr) + +buf = '' +n = 0 +while n < 500: + buf += '.' + n += 1 + +serverSocket.listen(MAX_PORTS) +readList = [serverSocket] + +while True: + readyRead, readyWrite, inError = \ + select.select(readList, [], [], 2) + + if len(readyRead) > 0: + waitCount = 0 + for sock in readyRead: + if sock == serverSocket: + (clientSocket, address) = serverSocket.accept() + address = str(address[0]) + readList.append(clientSocket) + else: + sock.settimeout(1); + s = read(sock, 1000) + n = send(sock, buf) + sock.close() + serverSocket.close() + sys.exit(0) + else: + print 'Select timeout!' + sys.exit(1) diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 471bbbd..ff8bd7e 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -64,11 +64,11 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv2"}, - {2, "R3=inv4"}, - {3, "R3=inv8"}, - {4, "R3=inv16"}, - {5, "R3=inv32"}, + {1, "R3_w=inv2"}, + {2, "R3_w=inv4"}, + {3, "R3_w=inv8"}, + {4, "R3_w=inv16"}, + {5, "R3_w=inv32"}, }, }, { @@ -92,17 +92,17 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv1"}, - {2, "R3=inv2"}, - {3, "R3=inv4"}, - {4, "R3=inv8"}, - {5, "R3=inv16"}, - {6, "R3=inv1"}, - {7, "R4=inv32"}, - {8, "R4=inv16"}, - {9, "R4=inv8"}, - {10, "R4=inv4"}, - {11, "R4=inv2"}, + {1, "R3_w=inv1"}, + {2, "R3_w=inv2"}, + {3, "R3_w=inv4"}, + {4, "R3_w=inv8"}, + {5, "R3_w=inv16"}, + {6, "R3_w=inv1"}, + {7, "R4_w=inv32"}, + {8, "R4_w=inv16"}, + {9, "R4_w=inv8"}, + {10, "R4_w=inv4"}, + {11, "R4_w=inv2"}, }, }, { @@ -121,12 +121,12 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv4"}, - {2, "R3=inv8"}, - {3, "R3=inv10"}, - {4, "R4=inv8"}, - {5, "R4=inv12"}, - {6, "R4=inv14"}, + {1, "R3_w=inv4"}, + {2, "R3_w=inv8"}, + {3, "R3_w=inv10"}, + {4, "R4_w=inv8"}, + {5, "R4_w=inv12"}, + {6, "R4_w=inv14"}, }, }, { @@ -143,10 +143,10 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv7"}, - {2, "R3=inv7"}, - {3, "R3=inv14"}, - {4, "R3=inv56"}, + {1, "R3_w=inv7"}, + {2, "R3_w=inv7"}, + {3, "R3_w=inv14"}, + {4, "R3_w=inv56"}, }, }, @@ -185,18 +185,18 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { {7, "R0=pkt(id=0,off=8,r=8,imm=0)"}, - {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, {18, "R3=pkt_end(id=0,off=0,imm=0)"}, - {18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, - {20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, - {21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, + {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, }, }, { @@ -217,16 +217,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, }, { @@ -257,14 +257,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, - {5, "R5=pkt(id=0,off=14,r=0,imm=0)"}, - {6, "R4=pkt(id=0,off=14,r=0,imm=0)"}, + {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, + {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, + {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, {10, "R2=pkt(id=0,off=0,r=18,imm=0)"}, {10, "R5=pkt(id=0,off=14,r=18,imm=0)"}, - {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, - {15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, }, }, { @@ -320,11 +320,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ - {11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * it's total offset is NET_IP_ALIGN + reg->off (0) + * reg->aux_off (14) which is 16. Then the variable @@ -336,11 +336,11 @@ static struct bpf_align_test tests[] = { /* Variable offset is added to R5 packet pointer, * resulting in auxiliary alignment of 4. */ - {18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte @@ -352,18 +352,18 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5=pkt(id=0,off=14,r=8"}, + {26, "R5_w=pkt(id=0,off=14,r=8"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). */ - {27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, + {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so @@ -410,11 +410,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* Packet pointer has (4n+2) offset */ - {11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -426,11 +426,11 @@ static struct bpf_align_test tests[] = { /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -446,11 +446,9 @@ static struct bpf_align_test tests[] = { .insns = { PREP_PKT_POINTERS, BPF_MOV64_IMM(BPF_REG_0, 0), - /* ptr & const => unknown & const */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 0x40), - /* ptr << const => unknown << const */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + /* (ptr - ptr) << 2 */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2), BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2), /* We have a (4n) value. Let's make a packet offset * out of it. First add 14, to make it a (4n+2) @@ -473,8 +471,26 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, - /* R5 bitwise operator &= on pointer prohibited */ + {4, "R5_w=pkt_end(id=0,off=0,imm=0)"}, + /* (ptr - ptr) << 2 == unknown, (4n) */ + {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, + /* (4n) + 14 == (4n+2). We blow our bounds, because + * the add could overflow. + */ + {7, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"}, + /* Checked s>=0 */ + {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + /* packet pointer + nonnegative (4n+2) */ + {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {13, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. + * We checked the bounds, but it might have been able + * to overflow if the packet pointer started in the + * upper half of the address space. + * So we did not get a 'range' on R6, and the access + * attempt will fail. + */ + {15, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, } }, { @@ -510,11 +526,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* New unknown value in R7 is (4n) */ - {11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Subtracting it from R6 blows our unsigned bounds */ {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"}, /* Checked s>= 0 */ @@ -563,15 +579,15 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, + {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, /* Adding 14 makes R6 be (4n+2) */ - {11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, + {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, + {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, /* New unknown value in R7 is (4n), >= 76 */ - {15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, + {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, + {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index 02c85d6..3489cc2 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -10,6 +10,8 @@ #include <string.h> #include <errno.h> #include <assert.h> +#include <sys/time.h> +#include <sys/resource.h> #include <linux/bpf.h> #include <bpf/bpf.h> @@ -19,19 +21,23 @@ #define DEV_CGROUP_PROG "./dev_cgroup.o" -#define TEST_CGROUP "test-bpf-based-device-cgroup/" +#define TEST_CGROUP "/test-bpf-based-device-cgroup/" int main(int argc, char **argv) { + struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; struct bpf_object *obj; int error = EXIT_FAILURE; int prog_fd, cgroup_fd; __u32 prog_cnt; + if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0) + perror("Unable to lift memlock rlimit"); + if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, &obj, &prog_fd)) { printf("Failed to load DEV_CGROUP program\n"); - goto err; + goto out; } if (setup_cgroup_environment()) { @@ -89,5 +95,6 @@ int main(int argc, char **argv) err: cleanup_cgroup_environment(); +out: return error; } diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/test_l4lb_noinline.c new file mode 100644 index 0000000..ba44a14 --- /dev/null +++ b/tools/testing/selftests/bpf/test_l4lb_noinline.c @@ -0,0 +1,473 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include "bpf_helpers.h" +#include "test_iptunnel_common.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +static __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +#define PCKT_FRAGMENTED 65343 +#define IPV4_HDR_LEN_NO_OPT 20 +#define IPV4_PLUS_ICMP_HDR 28 +#define IPV6_PLUS_ICMP_HDR 48 +#define RING_SIZE 2 +#define MAX_VIPS 12 +#define MAX_REALS 5 +#define CTL_MAP_SIZE 16 +#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE) +#define F_IPV6 (1 << 0) +#define F_HASH_NO_SRC_PORT (1 << 0) +#define F_ICMP (1 << 0) +#define F_SYN_SET (1 << 1) + +struct packet_description { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct vip_stats { + __u64 bytes; + __u64 pkts; +}; + +struct eth_hdr { + unsigned char eth_dest[ETH_ALEN]; + unsigned char eth_source[ETH_ALEN]; + unsigned short eth_proto; +}; + +struct bpf_map_def SEC("maps") vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct vip_meta), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = CH_RINGS_SIZE, +}; + +struct bpf_map_def SEC("maps") reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = MAX_REALS, +}; + +struct bpf_map_def SEC("maps") stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct vip_stats), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = CTL_MAP_SIZE, +}; + +static __u32 get_packet_hash(struct packet_description *pckt, + bool ipv6) +{ + if (ipv6) + return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), + pckt->ports, CH_RINGS_SIZE); + else + return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); +} + +static bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) +{ + __u32 hash = get_packet_hash(pckt, is_ipv6); + __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE; + __u32 *real_pos; + + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return 0; + + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return false; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return false; + return true; +} + +static int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG) + return TC_ACT_OK; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + pckt->proto = ip6h->nexthdr; + pckt->flags |= F_ICMP; + memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16); + memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16); + return TC_ACT_UNSPEC; +} + +static int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->type != ICMP_DEST_UNREACH || + icmp_hdr->code != ICMP_FRAG_NEEDED) + return TC_ACT_OK; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + pckt->proto = iph->protocol; + pckt->flags |= F_ICMP; + pckt->src = iph->daddr; + pckt->dst = iph->saddr; + return TC_ACT_UNSPEC; +} + +static bool parse_udp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return false; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = udp->source; + pckt->port16[1] = udp->dest; + } else { + pckt->port16[0] = udp->dest; + pckt->port16[1] = udp->source; + } + return true; +} + +static bool parse_tcp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return false; + + if (tcp->syn) + pckt->flags |= F_SYN_SET; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = tcp->source; + pckt->port16[1] = tcp->dest; + } else { + pckt->port16[0] = tcp->dest; + pckt->port16[1] = tcp->source; + } + return true; +} + +static int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct __sk_buff *skb) +{ + void *pkt_start = (void *)(long)skb->data; + struct packet_description pckt = {}; + struct eth_hdr *eth = pkt_start; + struct bpf_tunnel_key tkey = {}; + struct vip_stats *data_stats; + struct real_definition *dst; + struct vip_meta *vip_info; + struct ctl_value *cval; + __u32 v4_intf_pos = 1; + __u32 v6_intf_pos = 2; + struct ipv6hdr *ip6h; + struct vip vip = {}; + struct iphdr *iph; + int tun_flag = 0; + __u16 pkt_bytes; + __u64 iph_len; + __u32 ifindex; + __u8 protocol; + __u32 vip_num; + int action; + + tkey.tunnel_ttl = 64; + if (is_ipv6) { + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + + iph_len = sizeof(struct ipv6hdr); + protocol = ip6h->nexthdr; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(ip6h->payload_len); + off += iph_len; + if (protocol == IPPROTO_FRAGMENT) { + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_ICMPV6) { + action = parse_icmpv6(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV6_PLUS_ICMP_HDR; + } else { + memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16); + memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16); + } + } else { + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + + protocol = iph->protocol; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(iph->tot_len); + off += IPV4_HDR_LEN_NO_OPT; + + if (iph->frag_off & PCKT_FRAGMENTED) + return TC_ACT_SHOT; + if (protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV4_PLUS_ICMP_HDR; + } else { + pckt.src = iph->saddr; + pckt.dst = iph->daddr; + } + } + protocol = pckt.proto; + + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else { + return TC_ACT_SHOT; + } + + if (is_ipv6) + memcpy(vip.daddr.v6, pckt.dstv6, 16); + else + vip.daddr.v4 = pckt.dst; + + vip.dport = pckt.port16[1]; + vip.protocol = pckt.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.dport = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return TC_ACT_SHOT; + pckt.port16[1] = 0; + } + + if (vip_info->flags & F_HASH_NO_SRC_PORT) + pckt.port16[0] = 0; + + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) + return TC_ACT_SHOT; + + if (dst->flags & F_IPV6) { + cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + memcpy(tkey.remote_ipv6, dst->dstv6, 16); + tun_flag = BPF_F_TUNINFO_IPV6; + } else { + cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + tkey.remote_ipv4 = dst->dst; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return TC_ACT_SHOT; + data_stats->pkts++; + data_stats->bytes += pkt_bytes; + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag); + *(u32 *)eth->eth_dest = tkey.remote_ipv4; + return bpf_redirect(ifindex, 0); +} + +SEC("l4lb-demo") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return TC_ACT_SHOT; + eth_proto = eth->eth_proto; + if (eth_proto == bpf_htons(ETH_P_IP)) + return process_packet(data, nh_off, data_end, false, ctx); + else if (eth_proto == bpf_htons(ETH_P_IPV6)) + return process_packet(data, nh_off, data_end, true, ctx); + else + return TC_ACT_SHOT; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index f614806..2be87e9 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -14,6 +14,7 @@ #include <errno.h> #include <inttypes.h> #include <linux/bpf.h> +#include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -521,6 +522,218 @@ static void test_lpm_delete(void) close(map_fd); } +static void test_lpm_get_next_key(void) +{ + struct bpf_lpm_trie_key *key_p, *next_key_p; + size_t key_size; + __u32 value = 0; + int map_fd; + + key_size = sizeof(*key_p) + sizeof(__u32); + key_p = alloca(key_size); + next_key_p = alloca(key_size); + + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value), + 100, BPF_F_NO_PREALLOC); + assert(map_fd >= 0); + + /* empty tree. get_next_key should return ENOENT */ + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 && + errno == ENOENT); + + /* get and verify the first key, get the second one should fail. */ + key_p->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && + key_p->data[1] == 168); + + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* no exact matching key should get the first one in post order. */ + key_p->prefixlen = 8; + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && + key_p->data[1] == 168); + + /* add one more element (total two) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* Add one more element (total three) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* Add one more element (total four) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* no exact matching key should return the first one in post order */ + key_p->prefixlen = 22; + inet_pton(AF_INET, "192.168.1.0", key_p->data); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 0); + + close(map_fd); +} + +#define MAX_TEST_KEYS 4 +struct lpm_mt_test_info { + int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */ + int iter; + int map_fd; + struct { + __u32 prefixlen; + __u32 data; + } key[MAX_TEST_KEYS]; +}; + +static void *lpm_test_command(void *arg) +{ + int i, j, ret, iter, key_size; + struct lpm_mt_test_info *info = arg; + struct bpf_lpm_trie_key *key_p; + + key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32); + key_p = alloca(key_size); + for (iter = 0; iter < info->iter; iter++) + for (i = 0; i < MAX_TEST_KEYS; i++) { + /* first half of iterations in forward order, + * and second half in backward order. + */ + j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1; + key_p->prefixlen = info->key[j].prefixlen; + memcpy(key_p->data, &info->key[j].data, sizeof(__u32)); + if (info->cmd == 0) { + __u32 value = j; + /* update must succeed */ + assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0); + } else if (info->cmd == 1) { + ret = bpf_map_delete_elem(info->map_fd, key_p); + assert(ret == 0 || errno == ENOENT); + } else if (info->cmd == 2) { + __u32 value; + ret = bpf_map_lookup_elem(info->map_fd, key_p, &value); + assert(ret == 0 || errno == ENOENT); + } else { + struct bpf_lpm_trie_key *next_key_p = alloca(key_size); + ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p); + assert(ret == 0 || errno == ENOENT || errno == ENOMEM); + } + } + + // Pass successful exit info back to the main thread + pthread_exit((void *)info); +} + +static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd) +{ + info->iter = 2000; + info->map_fd = map_fd; + info->key[0].prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", &info->key[0].data); + info->key[1].prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", &info->key[1].data); + info->key[2].prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", &info->key[2].data); + info->key[3].prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", &info->key[3].data); +} + +static void test_lpm_multi_thread(void) +{ + struct lpm_mt_test_info info[4]; + size_t key_size, value_size; + pthread_t thread_id[4]; + int i, map_fd; + void *ret; + + /* create a trie */ + value_size = sizeof(__u32); + key_size = sizeof(struct bpf_lpm_trie_key) + value_size; + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size, + 100, BPF_F_NO_PREALLOC); + + /* create 4 threads to test update, delete, lookup and get_next_key */ + setup_lpm_mt_test_info(&info[0], map_fd); + for (i = 0; i < 4; i++) { + if (i != 0) + memcpy(&info[i], &info[0], sizeof(info[i])); + info[i].cmd = i; + assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0); + } + + for (i = 0; i < 4; i++) + assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]); + + close(map_fd); +} + int main(void) { struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; @@ -545,6 +758,10 @@ int main(void) test_lpm_delete(); + test_lpm_get_next_key(); + + test_lpm_multi_thread(); + printf("test_lpm: OK\n"); return 0; } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 040356e..436c4c7 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -242,7 +242,7 @@ static void test_hashmap_percpu(int task, void *data) static void test_hashmap_walk(int task, void *data) { - int fd, i, max_entries = 100000; + int fd, i, max_entries = 1000; long long key, value, next_key; bool next_key_valid = true; @@ -463,7 +463,7 @@ static void test_devmap(int task, void *data) #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o" static void test_sockmap(int tasks, void *data) { - int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc; + int one = 1, map_fd_rx = 0, map_fd_tx = 0, map_fd_break, s, sc, rc; struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break; int ports[] = {50200, 50201, 50202, 50204}; int err, i, fd, udp, sfd[6] = {0xdeadbeef}; @@ -868,9 +868,12 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } - /* Test map close sockets */ - for (i = 0; i < 6; i++) + /* Test map close sockets and empty maps */ + for (i = 0; i < 6; i++) { + bpf_map_delete_elem(map_fd_tx, &i); + bpf_map_delete_elem(map_fd_rx, &i); close(sfd[i]); + } close(fd); close(map_fd_rx); bpf_object__close(obj); @@ -881,8 +884,13 @@ out: printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno)); exit(1); out_sockmap: - for (i = 0; i < 6; i++) + for (i = 0; i < 6; i++) { + if (map_fd_tx) + bpf_map_delete_elem(map_fd_tx, &i); + if (map_fd_rx) + bpf_map_delete_elem(map_fd_rx, &i); close(sfd[i]); + } close(fd); exit(1); } @@ -931,8 +939,12 @@ static void test_map_large(void) close(fd); } -static void run_parallel(int tasks, void (*fn)(int task, void *data), - void *data) +#define run_parallel(N, FN, DATA) \ + printf("Fork %d tasks to '" #FN "'\n", N); \ + __run_parallel(N, FN, DATA) + +static void __run_parallel(int tasks, void (*fn)(int task, void *data), + void *data) { pid_t pid[tasks]; int i; @@ -972,7 +984,7 @@ static void test_map_stress(void) #define DO_UPDATE 1 #define DO_DELETE 0 -static void do_work(int fn, void *data) +static void test_update_delete(int fn, void *data) { int do_update = ((int *)data)[1]; int fd = ((int *)data)[0]; @@ -1012,7 +1024,7 @@ static void test_map_parallel(void) */ data[0] = fd; data[1] = DO_UPDATE; - run_parallel(TASKS, do_work, data); + run_parallel(TASKS, test_update_delete, data); /* Check that key=0 is already there. */ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && @@ -1035,7 +1047,7 @@ static void test_map_parallel(void) /* Now let's delete all elemenets in parallel. */ data[1] = DO_DELETE; - run_parallel(TASKS, do_work, data); + run_parallel(TASKS, test_update_delete, data); /* Nothing should be left. */ key = -1; diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py new file mode 100755 index 0000000..e78aad0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_offload.py @@ -0,0 +1,1085 @@ +#!/usr/bin/python3 + +# Copyright (C) 2017 Netronome Systems, Inc. +# +# This software is licensed under the GNU General License Version 2, +# June 1991 as shown in the file COPYING in the top-level directory of this +# source tree. +# +# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE +# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME +# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +from datetime import datetime +import argparse +import json +import os +import pprint +import random +import string +import struct +import subprocess +import time + +logfile = None +log_level = 1 +skip_extack = False +bpf_test_dir = os.path.dirname(os.path.realpath(__file__)) +pp = pprint.PrettyPrinter() +devs = [] # devices we created for clean up +files = [] # files to be removed +netns = [] # net namespaces to be removed + +def log_get_sec(level=0): + return "*" * (log_level + level) + +def log_level_inc(add=1): + global log_level + log_level += add + +def log_level_dec(sub=1): + global log_level + log_level -= sub + +def log_level_set(level): + global log_level + log_level = level + +def log(header, data, level=None): + """ + Output to an optional log. + """ + if logfile is None: + return + if level is not None: + log_level_set(level) + + if not isinstance(data, str): + data = pp.pformat(data) + + if len(header): + logfile.write("\n" + log_get_sec() + " ") + logfile.write(header) + if len(header) and len(data.strip()): + logfile.write("\n") + logfile.write(data) + +def skip(cond, msg): + if not cond: + return + print("SKIP: " + msg) + log("SKIP: " + msg, "", level=1) + os.sys.exit(0) + +def fail(cond, msg): + if not cond: + return + print("FAIL: " + msg) + log("FAIL: " + msg, "", level=1) + os.sys.exit(1) + +def start_test(msg): + log(msg, "", level=1) + log_level_inc() + print(msg) + +def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True): + """ + Run a command in subprocess and return tuple of (retval, stdout); + optionally return stderr as well as third value. + """ + proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if background: + msg = "%s START: %s" % (log_get_sec(1), + datetime.now().strftime("%H:%M:%S.%f")) + log("BKG " + proc.args, msg) + return proc + + return cmd_result(proc, include_stderr=include_stderr, fail=fail) + +def cmd_result(proc, include_stderr=False, fail=False): + stdout, stderr = proc.communicate() + stdout = stdout.decode("utf-8") + stderr = stderr.decode("utf-8") + proc.stdout.close() + proc.stderr.close() + + stderr = "\n" + stderr + if stderr[-1] == "\n": + stderr = stderr[:-1] + + sec = log_get_sec(1) + log("CMD " + proc.args, + "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" % + (proc.returncode, sec, stdout, sec, stderr, + sec, datetime.now().strftime("%H:%M:%S.%f"))) + + if proc.returncode != 0 and fail: + if len(stderr) > 0 and stderr[-1] == "\n": + stderr = stderr[:-1] + raise Exception("Command failed: %s\n%s" % (proc.args, stderr)) + + if include_stderr: + return proc.returncode, stdout, stderr + else: + return proc.returncode, stdout + +def rm(f): + cmd("rm -f %s" % (f)) + if f in files: + files.remove(f) + +def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False): + params = "" + if JSON: + params += "%s " % (flags["json"]) + + if ns != "": + ns = "ip netns exec %s " % (ns) + + if include_stderr: + ret, stdout, stderr = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=True) + else: + ret, stdout = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=False) + + if JSON and len(stdout.strip()) != 0: + out = json.loads(stdout) + else: + out = stdout + + if include_stderr: + return ret, out, stderr + else: + return ret, out + +def bpftool(args, JSON=True, ns="", fail=True): + return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail) + +def bpftool_prog_list(expected=None, ns=""): + _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs loaded, expected %d" % + (len(progs), expected)) + return progs + +def bpftool_map_list(expected=None, ns=""): + _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) + if expected is not None: + if len(maps) != expected: + fail(True, "%d BPF maps loaded, expected %d" % + (len(maps), expected)) + return maps + +def bpftool_prog_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nprogs = len(bpftool_prog_list()) + if nprogs == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) + +def bpftool_map_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nmaps = len(bpftool_map_list()) + if nmaps == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) + +def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False): + if force: + args = "-force " + args + return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) + +def tc(args, JSON=True, ns="", fail=True, include_stderr=False): + return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) + +def ethtool(dev, opt, args, fail=True): + return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) + +def bpf_obj(name, sec=".text", path=bpf_test_dir,): + return "obj %s sec %s" % (os.path.join(path, name), sec) + +def bpf_pinned(name): + return "pinned %s" % (name) + +def bpf_bytecode(bytecode): + return "bytecode \"%s\"" % (bytecode) + +def mknetns(n_retry=10): + for i in range(n_retry): + name = ''.join([random.choice(string.ascii_letters) for i in range(8)]) + ret, _ = ip("netns add %s" % (name), fail=False) + if ret == 0: + netns.append(name) + return name + return None + +def int2str(fmt, val): + ret = [] + for b in struct.pack(fmt, val): + ret.append(int(b)) + return " ".join(map(lambda x: str(x), ret)) + +def str2int(strtab): + inttab = [] + for i in strtab: + inttab.append(int(i, 16)) + ba = bytearray(inttab) + if len(strtab) == 4: + fmt = "I" + elif len(strtab) == 8: + fmt = "Q" + else: + raise Exception("String array of len %d can't be unpacked to an int" % + (len(strtab))) + return struct.unpack(fmt, ba)[0] + +class DebugfsDir: + """ + Class for accessing DebugFS directories as a dictionary. + """ + + def __init__(self, path): + self.path = path + self._dict = self._debugfs_dir_read(path) + + def __len__(self): + return len(self._dict.keys()) + + def __getitem__(self, key): + if type(key) is int: + key = list(self._dict.keys())[key] + return self._dict[key] + + def __setitem__(self, key, value): + log("DebugFS set %s = %s" % (key, value), "") + log_level_inc() + + cmd("echo '%s' > %s/%s" % (value, self.path, key)) + log_level_dec() + + _, out = cmd('cat %s/%s' % (self.path, key)) + self._dict[key] = out.strip() + + def _debugfs_dir_read(self, path): + dfs = {} + + log("DebugFS state for %s" % (path), "") + log_level_inc(add=2) + + _, out = cmd('ls ' + path) + for f in out.split(): + p = os.path.join(path, f) + if os.path.isfile(p): + _, out = cmd('cat %s/%s' % (path, f)) + dfs[f] = out.strip() + elif os.path.isdir(p): + dfs[f] = DebugfsDir(p) + else: + raise Exception("%s is neither file nor directory" % (p)) + + log_level_dec() + log("DebugFS state", dfs) + log_level_dec() + + return dfs + +class NetdevSim: + """ + Class for netdevsim netdevice and its attributes. + """ + + def __init__(self): + self.dev = self._netdevsim_create() + devs.append(self) + + self.ns = "" + + self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname']) + self.dfs_refresh() + + def __getitem__(self, key): + return self.dev[key] + + def _netdevsim_create(self): + _, old = ip("link show") + ip("link add sim%d type netdevsim") + _, new = ip("link show") + + for dev in new: + f = filter(lambda x: x["ifname"] == dev["ifname"], old) + if len(list(f)) == 0: + return dev + + raise Exception("failed to create netdevsim device") + + def remove(self): + devs.remove(self) + ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns) + + def dfs_refresh(self): + self.dfs = DebugfsDir(self.dfs_dir) + return self.dfs + + def dfs_num_bound_progs(self): + path = os.path.join(self.dfs_dir, "bpf_bound_progs") + _, progs = cmd('ls %s' % (path)) + return len(progs.split()) + + def dfs_get_bound_progs(self, expected): + progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs")) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs bound, expected %d" % + (len(progs), expected)) + return progs + + def wait_for_flush(self, bound=0, total=0, n_retry=20): + for i in range(n_retry): + nbound = self.dfs_num_bound_progs() + nprogs = len(bpftool_prog_list()) + if nbound == bound and nprogs == total: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs)) + + def set_ns(self, ns): + name = "1" if ns == "" else ns + ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns) + self.ns = ns + + def set_mtu(self, mtu, fail=True): + return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), + fail=fail) + + def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False, + fail=True, include_stderr=False): + if verbose: + bpf += " verbose" + return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) + + def unset_xdp(self, mode, force=False, JSON=True, + fail=True, include_stderr=False): + return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) + + def ip_link_show(self, xdp): + _, link = ip("link show dev %s" % (self['ifname'])) + if len(link) > 1: + raise Exception("Multiple objects on ip link show") + if len(link) < 1: + return {} + fail(xdp != "xdp" in link, + "XDP program not reporting in iplink (reported %s, expected %s)" % + ("xdp" in link, xdp)) + return link[0] + + def tc_add_ingress(self): + tc("qdisc add dev %s ingress" % (self['ifname'])) + + def tc_del_ingress(self): + tc("qdisc del dev %s ingress" % (self['ifname'])) + + def tc_flush_filters(self, bound=0, total=0): + self.tc_del_ingress() + self.tc_add_ingress() + self.wait_for_flush(bound=bound, total=total) + + def tc_show_ingress(self, expected=None): + # No JSON support, oh well... + flags = ["skip_sw", "skip_hw", "in_hw"] + named = ["protocol", "pref", "chain", "handle", "id", "tag"] + + args = "-s filter show dev %s ingress" % (self['ifname']) + _, out = tc(args, JSON=False) + + filters = [] + lines = out.split('\n') + for line in lines: + words = line.split() + if "handle" not in words: + continue + fltr = {} + for flag in flags: + fltr[flag] = flag in words + for name in named: + try: + idx = words.index(name) + fltr[name] = words[idx + 1] + except ValueError: + pass + filters.append(fltr) + + if expected is not None: + fail(len(filters) != expected, + "%d ingress filters loaded, expected %d" % + (len(filters), expected)) + return filters + + def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None, + chain=None, cls="", params="", + fail=True, include_stderr=False): + spec = "" + if prio is not None: + spec += " prio %d" % (prio) + if handle: + spec += " handle %s" % (handle) + if chain is not None: + spec += " chain %d" % (chain) + + return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\ + .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec, + cls=cls, params=params), + fail=fail, include_stderr=include_stderr) + + def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None, + chain=None, da=False, verbose=False, + skip_sw=False, skip_hw=False, + fail=True, include_stderr=False): + cls = "bpf " + bpf + + params = "" + if da: + params += " da" + if verbose: + params += " verbose" + if skip_sw: + params += " skip_sw" + if skip_hw: + params += " skip_hw" + + return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls, + chain=chain, params=params, + fail=fail, include_stderr=include_stderr) + + def set_ethtool_tc_offloads(self, enable, fail=True): + args = "hw-tc-offload %s" % ("on" if enable else "off") + return ethtool(self, "-K", args, fail=fail) + +################################################################################ +def clean_up(): + global files, netns, devs + + for dev in devs: + dev.remove() + for f in files: + cmd("rm -f %s" % (f)) + for ns in netns: + cmd("ip netns delete %s" % (ns)) + files = [] + netns = [] + +def pin_prog(file_name, idx=0): + progs = bpftool_prog_list(expected=(idx + 1)) + prog = progs[idx] + bpftool("prog pin id %d %s" % (prog["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def pin_map(file_name, idx=0, expected=1): + maps = bpftool_map_list(expected=expected) + m = maps[idx] + bpftool("map pin id %d %s" % (m["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def check_dev_info_removed(prog_file=None, map_file=None): + bpftool_prog_list(expected=0) + ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) + fail(ret == 0, "Showing prog with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing prog with removed device expected ENODEV, error is %s" % + (err["error"])) + + bpftool_map_list(expected=0) + ret, err = bpftool("map show pin %s" % (map_file), fail=False) + fail(ret == 0, "Showing map with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing map with removed device expected ENODEV, error is %s" % + (err["error"])) + +def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False): + progs = bpftool_prog_list(expected=1, ns=ns) + prog = progs[0] + + fail("dev" not in prog.keys(), "Device parameters not reported") + dev = prog["dev"] + fail("ifindex" not in dev.keys(), "Device parameters not reported") + fail("ns_dev" not in dev.keys(), "Device parameters not reported") + fail("ns_inode" not in dev.keys(), "Device parameters not reported") + + if not other_ns: + fail("ifname" not in dev.keys(), "Ifname not reported") + fail(dev["ifname"] != sim["ifname"], + "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"])) + else: + fail("ifname" in dev.keys(), "Ifname is reported for other ns") + + maps = bpftool_map_list(expected=2, ns=ns) + for m in maps: + fail("dev" not in m.keys(), "Device parameters not reported") + fail(dev != m["dev"], "Map's device different than program's") + +def check_extack(output, reference, args): + if skip_extack: + return + lines = output.split("\n") + comp = len(lines) >= 2 and lines[1] == reference + fail(not comp, "Missing or incorrect netlink extack message") + +def check_extack_nsim(output, reference, args): + check_extack(output, "Error: netdevsim: " + reference, args) + +def check_no_extack(res, needle): + fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"), + "Found '%s' in command output, leaky extack?" % (needle)) + +def check_verifier_log(output, reference): + lines = output.split("\n") + for l in reversed(lines): + if l == reference: + return + fail(True, "Missing or incorrect message from netdevsim in verifier log") + +def test_spurios_extack(sim, obj, skip_hw, needle): + res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw, + include_stderr=True) + check_no_extack(res, needle) + res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_hw=skip_hw, include_stderr=True) + check_no_extack(res, needle) + res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf", + include_stderr=True) + check_no_extack(res, needle) + + +# Parse command line +parser = argparse.ArgumentParser() +parser.add_argument("--log", help="output verbose log to given file") +args = parser.parse_args() +if args.log: + logfile = open(args.log, 'w+') + logfile.write("# -*-Org-*-") + +log("Prepare...", "", level=1) +log_level_inc() + +# Check permissions +skip(os.getuid() != 0, "test must be run as root") + +# Check tools +ret, progs = bpftool("prog", fail=False) +skip(ret != 0, "bpftool not installed") +# Check no BPF programs are loaded +skip(len(progs) != 0, "BPF programs already loaded on the system") + +# Check netdevsim +ret, out = cmd("modprobe netdevsim", fail=False) +skip(ret != 0, "netdevsim module could not be loaded") + +# Check debugfs +_, out = cmd("mount") +if out.find("/sys/kernel/debug type debugfs") == -1: + cmd("mount -t debugfs none /sys/kernel/debug") + +# Check samples are compiled +samples = ["sample_ret0.o", "sample_map_ret0.o"] +for s in samples: + ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False) + skip(ret != 0, "sample %s/%s not found, please compile it" % + (bpf_test_dir, s)) + +# Check if iproute2 is built with libmnl (needed by extack support) +_, _, err = cmd("tc qdisc delete dev lo handle 0", + fail=False, include_stderr=True) +if err.find("Error: Failed to find qdisc with specified handle.") == -1: + print("Warning: no extack message in iproute2 output, libmnl missing?") + log("Warning: no extack message in iproute2 output, libmnl missing?", "") + skip_extack = True + +# Check if net namespaces seem to work +ns = mknetns() +skip(ns is None, "Could not create a net namespace") +cmd("ip netns delete %s" % (ns)) +netns = [] + +try: + obj = bpf_obj("sample_ret0.o") + bytecode = bpf_bytecode("1,6 0 0 4294967295,") + + start_test("Test destruction of generic XDP...") + sim = NetdevSim() + sim.set_xdp(obj, "generic") + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.tc_add_ingress() + + start_test("Test TC non-offloaded...") + ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False) + fail(ret != 0, "Software TC filter did not load") + + start_test("Test TC non-offloaded isn't getting bound...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + + sim.tc_flush_filters() + + start_test("Test TC offloads are off by default...") + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC filter loaded without enabling TC offloads") + check_extack(err, "Error: TC offload is disabled on net device.", args) + sim.wait_for_flush() + + sim.set_ethtool_tc_offloads(True) + sim.dfs["bpf_tc_non_bound_accept"] = "Y" + + start_test("Test TC offload by default...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Filter not offloaded by default") + + sim.tc_flush_filters() + + start_test("Test TC cBPF bytcode tries offload by default...") + ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Bytecode not offloaded by default") + + sim.tc_flush_filters() + sim.dfs["bpf_tc_non_bound_accept"] = "N" + + start_test("Test TC cBPF unbound bytecode doesn't offload...") + ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC bytecode loaded for offload") + check_extack_nsim(err, "netdevsim configured to reject unbound programs.", + args) + sim.wait_for_flush() + + start_test("Test non-0 chain offload...") + ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1, + skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Offloaded a filter to chain other than 0") + check_extack(err, "Error: Driver supports only offload of chain 0.", args) + sim.tc_flush_filters() + + start_test("Test TC replace...") + sim.cls_bpf_add_filter(obj, prio=1, handle=1) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + start_test("Test TC replace bad flags...") + for i in range(3): + for j in range(3): + ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_sw=(j == 1), skip_hw=(j == 2), + fail=False) + fail(bool(ret) != bool(j), + "Software TC incorrect load in replace test, iteration %d" % + (j)) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + start_test("Test spurious extack from the driver...") + test_spurios_extack(sim, obj, False, "netdevsim") + test_spurios_extack(sim, obj, True, "netdevsim") + + sim.set_ethtool_tc_offloads(False) + + test_spurios_extack(sim, obj, False, "TC offload is disabled") + test_spurios_extack(sim, obj, True, "TC offload is disabled") + + sim.set_ethtool_tc_offloads(True) + + sim.tc_flush_filters() + + start_test("Test TC offloads work...") + ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret != 0, "TC filter did not load with TC offloads enabled") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + + start_test("Test TC offload basics...") + dfs = sim.dfs_get_bound_progs(expected=1) + progs = bpftool_prog_list(expected=1) + ingress = sim.tc_show_ingress(expected=1) + + dprog = dfs[0] + prog = progs[0] + fltr = ingress[0] + fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter") + fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter") + fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back") + + start_test("Test TC offload is device-bound...") + fail(str(prog["id"]) != fltr["id"], "Program IDs don't match") + fail(prog["tag"] != fltr["tag"], "Program tags don't match") + fail(fltr["id"] != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test disabling TC offloads is rejected while filters installed...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...") + + start_test("Test qdisc removal frees things...") + sim.tc_flush_filters() + sim.tc_show_ingress(expected=0) + + start_test("Test disabling TC offloads is OK without filters...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret != 0, + "Driver refused to disable TC offloads without filters installed...") + + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of TC filters...") + sim.cls_bpf_add_filter(obj, skip_sw=True) + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of XDP...") + sim.set_xdp(obj, "offload") + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + + start_test("Test XDP prog reporting...") + sim.set_xdp(obj, "drv") + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + + start_test("Test XDP prog replace without force...") + ret, _ = sim.set_xdp(obj, "drv", fail=False) + fail(ret == 0, "Replaced XDP program without -force") + sim.wait_for_flush(total=1) + + start_test("Test XDP prog replace with force...") + ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False) + fail(ret != 0, "Could not replace XDP program with -force") + bpftool_prog_list_wait(expected=1) + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + fail("dev" in progs[0].keys(), + "Device parameters reported for non-offloaded program") + + start_test("Test XDP prog replace with bad flags...") + ret, _, err = sim.set_xdp(obj, "offload", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Replaced XDP program with a program in different mode") + check_extack_nsim(err, "program loaded with different flags.", args) + ret, _, err = sim.set_xdp(obj, "", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Replaced XDP program with a program in different mode") + check_extack_nsim(err, "program loaded with different flags.", args) + + start_test("Test XDP prog remove with bad flags...") + ret, _, err = sim.unset_xdp("offload", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program with a bad mode mode") + check_extack_nsim(err, "program loaded with different flags.", args) + ret, _, err = sim.unset_xdp("", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program with a bad mode mode") + check_extack_nsim(err, "program loaded with different flags.", args) + + start_test("Test MTU restrictions...") + ret, _ = sim.set_mtu(9000, fail=False) + fail(ret == 0, + "Driver should refuse to increase MTU to 9000 with XDP loaded...") + sim.unset_xdp("drv") + bpftool_prog_list_wait(expected=0) + sim.set_mtu(9000) + ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True) + fail(ret == 0, "Driver should refuse to load program with MTU of 9000...") + check_extack_nsim(err, "MTU too large w/ XDP enabled.", args) + sim.set_mtu(1500) + + sim.wait_for_flush() + start_test("Test XDP offload...") + _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True) + ipl = sim.ip_link_show(xdp=True) + link_xdp = ipl["xdp"]["prog"] + progs = bpftool_prog_list(expected=1) + prog = progs[0] + fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + + start_test("Test XDP offload is device bound...") + dfs = sim.dfs_get_bound_progs(expected=1) + dprog = dfs[0] + + fail(prog["id"] != link_xdp["id"], "Program IDs don't match") + fail(prog["tag"] != link_xdp["tag"], "Program tags don't match") + fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test removing XDP program many times...") + sim.unset_xdp("offload") + sim.unset_xdp("offload") + sim.unset_xdp("drv") + sim.unset_xdp("drv") + sim.unset_xdp("") + sim.unset_xdp("") + bpftool_prog_list_wait(expected=0) + + start_test("Test attempt to use a program for a wrong device...") + sim2 = NetdevSim() + sim2.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) + fail(ret == 0, "Pinned program loaded for a different device accepted") + check_extack_nsim(err, "program bound to different dev.", args) + sim2.remove() + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) + fail(ret == 0, "Pinned program loaded for a removed device accepted") + check_extack_nsim(err, "xdpoffload of non-bound program.", args) + rm(pin_file) + bpftool_prog_list_wait(expected=0) + + start_test("Test mixing of TC and XDP...") + sim.tc_add_ingress() + sim.set_xdp(obj, "offload") + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Loading TC when XDP active should fail") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) + sim.unset_xdp("offload") + sim.wait_for_flush() + + sim.cls_bpf_add_filter(obj, skip_sw=True) + ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) + fail(ret == 0, "Loading XDP when TC active should fail") + check_extack_nsim(err, "TC program is already loaded.", args) + + start_test("Test binding TC from pinned...") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + sim.tc_flush_filters(bound=1, total=1) + sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True) + sim.tc_flush_filters(bound=1, total=1) + + start_test("Test binding XDP from pinned...") + sim.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1) + + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + + start_test("Test offload of wrong type fails...") + ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False) + fail(ret == 0, "Managed to attach XDP program to TC") + + start_test("Test asking for TC offload of two filters...") + sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) + ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Managed to offload two TC filters at the same time") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) + + sim.tc_flush_filters(bound=2, total=2) + + start_test("Test if netdev removal waits for translation...") + delay_msec = 500 + sim.dfs["bpf_bind_verifier_delay"] = delay_msec + start = time.time() + cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \ + (sim['ifname'], obj) + tc_proc = cmd(cmd_line, background=True, fail=False) + # Wait for the verifier to start + while sim.dfs_num_bound_progs() <= 2: + pass + sim.remove() + end = time.time() + ret, _ = cmd_result(tc_proc, fail=False) + time_diff = end - start + log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff)) + + fail(ret == 0, "Managed to load TC filter on a unregistering device") + delay_sec = delay_msec * 0.001 + fail(time_diff < delay_sec, "Removal process took %s, expected %s" % + (time_diff, delay_sec)) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + map_obj = bpf_obj("sample_map_ret0.o") + start_test("Test loading program with maps...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + + start_test("Test bpftool bound info reporting (own ns)...") + check_dev_info(False, "") + + start_test("Test bpftool bound info reporting (other ns)...") + ns = mknetns() + sim.set_ns(ns) + check_dev_info(True, "") + + start_test("Test bpftool bound info reporting (remote ns)...") + check_dev_info(False, ns) + + start_test("Test bpftool bound info reporting (back to own ns)...") + sim.set_ns("") + check_dev_info(False, "") + + prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog") + map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2) + sim.remove() + + start_test("Test bpftool bound info reporting (removed dev)...") + check_dev_info_removed(prog_file=prog_file, map_file=map_file) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + + start_test("Test map update (no flags)...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + maps = bpftool_map_list(expected=2) + array = maps[0] if maps[0]["type"] == "array" else maps[1] + htab = maps[0] if maps[0]["type"] == "hash" else maps[1] + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, _ = bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "added too many entries") + + start_test("Test map update (exists)...") + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, err = bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "updated non-existing key") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map update (noexist)...") + for m in maps: + for i in range(2): + ret, err = bpftool("map update id %d key %s value %s noexist" % + (m["id"], int2str("I", i), int2str("Q", i * 3)), + fail=False) + fail(ret == 0, "updated existing key") + fail(err["error"].find("File exists") == -1, + "expected EEXIST, error is '%s'" % (err["error"])) + + start_test("Test map dump...") + for m in maps: + _, entries = bpftool("map dump id %d" % (m["id"])) + for i in range(2): + key = str2int(entries[i]["key"]) + fail(key != i, "expected key %d, got %d" % (key, i)) + val = str2int(entries[i]["value"]) + fail(val != i * 3, "expected value %d, got %d" % (val, i * 3)) + + start_test("Test map getnext...") + for m in maps: + _, entry = bpftool("map getnext id %d" % (m["id"])) + key = str2int(entry["next_key"]) + fail(key != 0, "next key %d, expected %d" % (key, 0)) + _, entry = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 0))) + key = str2int(entry["next_key"]) + fail(key != 1, "next key %d, expected %d" % (key, 1)) + ret, err = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 1)), fail=False) + fail(ret == 0, "got next key past the end of map") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map delete (htab)...") + for i in range(2): + bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i))) + + start_test("Test map delete (array)...") + for i in range(2): + ret, err = bpftool("map delete id %d key %s" % + (htab["id"], int2str("I", i)), fail=False) + fail(ret == 0, "removed entry from an array") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map remove...") + sim.unset_xdp("offload") + bpftool_map_list_wait(expected=0) + sim.remove() + + sim = NetdevSim() + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + sim.remove() + bpftool_map_list_wait(expected=0) + + start_test("Test map creation fail path...") + sim = NetdevSim() + sim.dfs["bpf_map_accept"] = "N" + ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False) + fail(ret == 0, + "netdevsim didn't refuse to create a map with offload disabled") + + print("%s: OK" % (os.path.basename(__file__))) + +finally: + log("Clean up...", "", level=1) + log_level_inc() + clean_up() diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 6761be1..b549308 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -21,8 +21,10 @@ typedef __u16 __sum16; #include <linux/ipv6.h> #include <linux/tcp.h> #include <linux/filter.h> +#include <linux/perf_event.h> #include <linux/unistd.h> +#include <sys/ioctl.h> #include <sys/wait.h> #include <sys/resource.h> #include <sys/types.h> @@ -167,10 +169,9 @@ out: #define NUM_ITER 100000 #define VIP_NUM 5 -static void test_l4lb(void) +static void test_l4lb(const char *file) { unsigned int nr_cpus = bpf_num_possible_cpus(); - const char *file = "./test_l4lb.o"; struct vip key = {.protocol = 6}; struct vip_meta { __u32 flags; @@ -247,6 +248,95 @@ out: bpf_object__close(obj); } +static void test_l4lb_all(void) +{ + const char *file1 = "./test_l4lb.o"; + const char *file2 = "./test_l4lb_noinline.o"; + + test_l4lb(file1); + test_l4lb(file2); +} + +static void test_xdp_noinline(void) +{ + const char *file = "./test_xdp_noinline.o"; + unsigned int nr_cpus = bpf_num_possible_cpus(); + struct vip key = {.protocol = 6}; + struct vip_meta { + __u32 flags; + __u32 vip_num; + } value = {.vip_num = VIP_NUM}; + __u32 stats_key = VIP_NUM; + struct vip_stats { + __u64 bytes; + __u64 pkts; + } stats[nr_cpus]; + struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; + } real_def = {.dst = MAGIC_VAL}; + __u32 ch_key = 11, real_num = 3; + __u32 duration, retval, size; + int err, i, prog_fd, map_fd; + __u64 bytes = 0, pkts = 0; + struct bpf_object *obj; + char buf[128]; + u32 *magic = (u32 *)buf; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (err) { + error_cnt++; + return; + } + + map_fd = bpf_find_map(__func__, obj, "vip_map"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &key, &value, 0); + + map_fd = bpf_find_map(__func__, obj, "ch_rings"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &ch_key, &real_num, 0); + + map_fd = bpf_find_map(__func__, obj, "reals"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &real_num, &real_def, 0); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 1 || size != 54 || + *magic != MAGIC_VAL, "ipv4", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 1 || size != 74 || + *magic != MAGIC_VAL, "ipv6", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + map_fd = bpf_find_map(__func__, obj, "stats"); + if (map_fd < 0) + goto out; + bpf_map_lookup_elem(map_fd, &stats_key, stats); + for (i = 0; i < nr_cpus; i++) { + bytes += stats[i].bytes; + pkts += stats[i].pkts; + } + if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { + error_cnt++; + printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts); + } +out: + bpf_object__close(obj); +} + static void test_tcp_estats(void) { const char *file = "./test_tcp_estats.o"; @@ -617,6 +707,262 @@ static void test_obj_name(void) } } +static void test_tp_attach_query(void) +{ + const int num_progs = 3; + int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs]; + __u32 duration = 0, info_len, saved_prog_ids[num_progs]; + const char *file = "./test_tracepoint.o"; + struct perf_event_query_bpf *query; + struct perf_event_attr attr = {}; + struct bpf_object *obj[num_progs]; + struct bpf_prog_info prog_info; + char buf[256]; + + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + return; + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), + "read", "bytes %d errno %d\n", bytes, errno)) + return; + + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN; + attr.sample_period = 1; + attr.wakeup_events = 1; + + query = malloc(sizeof(*query) + sizeof(__u32) * num_progs); + for (i = 0; i < num_progs; i++) { + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i], + &prog_fd[i]); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + goto cleanup1; + + bzero(&prog_info, sizeof(prog_info)); + prog_info.jited_prog_len = 0; + prog_info.xlated_prog_len = 0; + prog_info.nr_map_ids = 0; + info_len = sizeof(prog_info); + err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len); + if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n", + err, errno)) + goto cleanup1; + saved_prog_ids[i] = prog_info.id; + + pmu_fd[i] = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(pmu_fd[i] < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd[i], errno)) + goto cleanup2; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", + err, errno)) + goto cleanup3; + + if (i == 0) { + /* check NULL prog array query */ + query->ids_len = num_progs; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != 0, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + } + + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[i]); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", + err, errno)) + goto cleanup3; + + if (i == 1) { + /* try to get # of programs only */ + query->ids_len = 0; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != 2, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + + /* try a few negative tests */ + /* invalid query pointer */ + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, + (struct perf_event_query_bpf *)0x1); + if (CHECK(!err || errno != EFAULT, + "perf_event_ioc_query_bpf", + "err %d errno %d\n", err, errno)) + goto cleanup3; + + /* no enough space */ + query->ids_len = 1; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(!err || errno != ENOSPC || query->prog_cnt != 2, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + } + + query->ids_len = num_progs; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != (i + 1), + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + for (j = 0; j < i + 1; j++) + if (CHECK(saved_prog_ids[j] != query->ids[j], + "perf_event_ioc_query_bpf", + "#%d saved_prog_id %x query prog_id %x\n", + j, saved_prog_ids[j], query->ids[j])) + goto cleanup3; + } + + i = num_progs - 1; + for (; i >= 0; i--) { + cleanup3: + ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE); + cleanup2: + close(pmu_fd[i]); + cleanup1: + bpf_object__close(obj[i]); + } + free(query); +} + +static int compare_map_keys(int map1_fd, int map2_fd) +{ + __u32 key, next_key; + char val_buf[PERF_MAX_STACK_DEPTH * sizeof(__u64)]; + int err; + + err = bpf_map_get_next_key(map1_fd, NULL, &key); + if (err) + return err; + err = bpf_map_lookup_elem(map2_fd, &key, val_buf); + if (err) + return err; + + while (bpf_map_get_next_key(map1_fd, &key, &next_key) == 0) { + err = bpf_map_lookup_elem(map2_fd, &next_key, val_buf); + if (err) + return err; + + key = next_key; + } + if (errno != ENOENT) + return -1; + + return 0; +} + +static void test_stacktrace_map() +{ + int control_map_fd, stackid_hmap_fd, stackmap_fd; + const char *file = "./test_stacktrace_map.o"; + int bytes, efd, err, pmu_fd, prog_fd; + struct perf_event_attr attr = {}; + __u32 key, val, duration = 0; + struct bpf_object *obj; + char buf[256]; + + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + goto out; + + /* Get the ID for the sched/sched_switch tracepoint */ + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + goto close_prog; + + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), + "read", "bytes %d errno %d\n", bytes, errno)) + goto close_prog; + + /* Open the perf event and attach bpf progrram */ + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN; + attr.sample_period = 1; + attr.wakeup_events = 1; + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd, errno)) + goto close_prog; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", + err, errno)) + goto close_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", + err, errno)) + goto disable_pmu; + + /* find map fds */ + control_map_fd = bpf_find_map(__func__, obj, "control_map"); + if (CHECK(control_map_fd < 0, "bpf_find_map control_map", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); + if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); + if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", + err, errno)) + goto disable_pmu; + + /* give some time for bpf program run */ + sleep(1); + + /* disable stack trace collection */ + key = 0; + val = 1; + bpf_map_update_elem(control_map_fd, &key, &val, 0); + + /* for every element in stackid_hmap, we can find a corresponding one + * in stackmap, and vise versa. + */ + err = compare_map_keys(stackid_hmap_fd, stackmap_fd); + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + err = compare_map_keys(stackmap_fd, stackid_hmap_fd); + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", + "err %d errno %d\n", err, errno)) + ; /* fall through */ + +disable_pmu: + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); + +close_pmu: + close(pmu_fd); + +close_prog: + bpf_object__close(obj); + +out: + return; +} + int main(void) { struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; @@ -625,11 +971,14 @@ int main(void) test_pkt_access(); test_xdp(); - test_l4lb(); + test_l4lb_all(); + test_xdp_noinline(); test_tcp_estats(); test_bpf_obj_id(); test_pkt_md_access(); test_obj_name(); + test_tp_attach_query(); + test_stacktrace_map(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c new file mode 100644 index 0000000..76d85c5d --- /dev/null +++ b/tools/testing/selftests/bpf/test_stacktrace_map.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +struct bpf_map_def SEC("maps") control_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") stackid_hmap = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 10000, +}; + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, + .max_entries = 10000, +}; + +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +struct sched_switch_args { + unsigned long long pad; + char prev_comm[16]; + int prev_pid; + int prev_prio; + long long prev_state; + char next_comm[16]; + int next_pid; + int next_prio; +}; + +SEC("tracepoint/sched/sched_switch") +int oncpu(struct sched_switch_args *ctx) +{ + __u32 key = 0, val = 0, *value_p; + + value_p = bpf_map_lookup_elem(&control_map, &key); + if (value_p && *value_p) + return 0; /* skip if non-zero *value_p */ + + /* The size of stackmap and stackid_hmap should be the same */ + key = bpf_get_stackid(ctx, &stackmap, 0); + if ((int)key >= 0) + bpf_map_update_elem(&stackid_hmap, &key, &val, 0); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h new file mode 100644 index 0000000..2fe4328 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpbpf.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _TEST_TCPBPF_H +#define _TEST_TCPBPF_H + +struct tcpbpf_globals { + __u32 event_map; + __u32 total_retrans; + __u32 data_segs_in; + __u32 data_segs_out; + __u32 bad_cb_test_rv; + __u32 good_cb_test_rv; + __u64 bytes_received; + __u64 bytes_acked; +}; +#endif diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c new file mode 100644 index 0000000..57119ad --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/in6.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/tcp.h> +#include <netinet/in.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_tcpbpf.h" + +struct bpf_map_def SEC("maps") global_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcpbpf_globals), + .max_entries = 2, +}; + +static inline void update_event_map(int event) +{ + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (gp == NULL) { + struct tcpbpf_globals g = {0}; + + g.event_map |= (1 << event); + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } else { + g = *gp; + g.event_map |= (1 << event); + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } +} + +int _version SEC("version") = 1; + +SEC("sockops") +int bpf_testcb(struct bpf_sock_ops *skops) +{ + int rv = -1; + int bad_call_rv = 0; + int good_call_rv = 0; + int op; + int v = 0; + + op = (int) skops->op; + + update_event_map(op); + + switch (op) { + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + /* Test failure to set largest cb flag (assumes not defined) */ + bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80); + /* Set callback */ + good_call_rv = bpf_sock_ops_cb_flags_set(skops, + BPF_SOCK_OPS_STATE_CB_FLAG); + /* Update results */ + { + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.bad_cb_test_rv = bad_call_rv; + g.good_cb_test_rv = good_call_rv; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + skops->sk_txhash = 0x12345f; + v = 0xff; + rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v, + sizeof(v)); + break; + case BPF_SOCK_OPS_RTO_CB: + break; + case BPF_SOCK_OPS_RETRANS_CB: + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->args[1] == BPF_TCP_CLOSE) { + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.total_retrans = skops->total_retrans; + g.data_segs_in = skops->data_segs_in; + g.data_segs_out = skops->data_segs_out; + g.bytes_received = skops->bytes_received; + g.bytes_acked = skops->bytes_acked; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } + break; + default: + rv = -1; + } + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c new file mode 100644 index 0000000..95a370f --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <assert.h> +#include <linux/perf_event.h> +#include <linux/ptrace.h> +#include <linux/bpf.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include "bpf_util.h" +#include <linux/perf_event.h> +#include "test_tcpbpf.h" + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) { + printf("%s:FAIL:map '%s' not found\n", test, name); + return -1; + } + return bpf_map__fd(map); +} + +#define SYSTEM(CMD) \ + do { \ + if (system(CMD)) { \ + printf("system(%s) FAILS!\n", CMD); \ + } \ + } while (0) + +int main(int argc, char **argv) +{ + const char *file = "test_tcpbpf_kern.o"; + struct tcpbpf_globals g = {0}; + int cg_fd, prog_fd, map_fd; + bool debug_flag = false; + int error = EXIT_FAILURE; + struct bpf_object *obj; + char cmd[100], *dir; + struct stat buffer; + __u32 key = 0; + int pid; + int rv; + + if (argc > 1 && strcmp(argv[1], "-d") == 0) + debug_flag = true; + + dir = "/tmp/cgroupv2/foo"; + + if (stat(dir, &buffer) != 0) { + SYSTEM("mkdir -p /tmp/cgroupv2"); + SYSTEM("mount -t cgroup2 none /tmp/cgroupv2"); + SYSTEM("mkdir -p /tmp/cgroupv2/foo"); + } + pid = (int) getpid(); + sprintf(cmd, "echo %d >> /tmp/cgroupv2/foo/cgroup.procs", pid); + SYSTEM(cmd); + + cg_fd = open(dir, O_DIRECTORY, O_RDONLY); + if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { + printf("FAILED: load_bpf_file failed for: %s\n", file); + goto err; + } + + rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (rv) { + printf("FAILED: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + goto err; + } + + SYSTEM("./tcp_server.py"); + + map_fd = bpf_find_map(__func__, obj, "global_map"); + if (map_fd < 0) + goto err; + + rv = bpf_map_lookup_elem(map_fd, &key, &g); + if (rv != 0) { + printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); + goto err; + } + + if (g.bytes_received != 501 || g.bytes_acked != 1002 || + g.data_segs_in != 1 || g.data_segs_out != 1 || + (g.event_map ^ 0x47e) != 0 || g.bad_cb_test_rv != 0x80 || + g.good_cb_test_rv != 0) { + printf("FAILED: Wrong stats\n"); + if (debug_flag) { + printf("\n"); + printf("bytes_received: %d (expecting 501)\n", + (int)g.bytes_received); + printf("bytes_acked: %d (expecting 1002)\n", + (int)g.bytes_acked); + printf("data_segs_in: %d (expecting 1)\n", + g.data_segs_in); + printf("data_segs_out: %d (expecting 1)\n", + g.data_segs_out); + printf("event_map: 0x%x (at least 0x47e)\n", + g.event_map); + printf("bad_cb_test_rv: 0x%x (expecting 0x80)\n", + g.bad_cb_test_rv); + printf("good_cb_test_rv:0x%x (expecting 0)\n", + g.good_cb_test_rv); + } + goto err; + } + printf("PASSED!\n"); + error = 0; +err: + bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + return error; + +} diff --git a/tools/testing/selftests/bpf/test_tracepoint.c b/tools/testing/selftests/bpf/test_tracepoint.c new file mode 100644 index 0000000..04bf084 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tracepoint.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +struct sched_switch_args { + unsigned long long pad; + char prev_comm[16]; + int prev_pid; + int prev_prio; + long long prev_state; + char next_comm[16]; + int next_pid; + int next_prio; +}; + +SEC("tracepoint/sched/sched_switch") +int oncpu(struct sched_switch_args *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 5ed4175..697bd83 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2,6 +2,7 @@ * Testsuite for eBPF verifier * * Copyright (c) 2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2017 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -20,6 +21,7 @@ #include <stddef.h> #include <stdbool.h> #include <sched.h> +#include <limits.h> #include <sys/capability.h> #include <sys/resource.h> @@ -28,6 +30,7 @@ #include <linux/filter.h> #include <linux/bpf_perf_event.h> #include <linux/bpf.h> +#include <linux/if_ether.h> #include <bpf/bpf.h> @@ -48,6 +51,8 @@ #define MAX_INSNS 512 #define MAX_FIXUPS 8 #define MAX_NR_MAPS 4 +#define POINTER_VALUE 0xcafe4all +#define TEST_DATA_LEN 64 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) @@ -61,6 +66,7 @@ struct bpf_test { int fixup_map_in_map[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; + uint32_t retval; enum { UNDEF, ACCEPT, @@ -94,6 +100,326 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = -3, + }, + { + "DIV32 by 0, zero check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "DIV32 by 0, zero check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "DIV64 by 0, zero check", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "MOD32 by 0, zero check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "MOD32 by 0, zero check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "MOD64 by 0, zero check", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "DIV32 by 0, zero check ok, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_MOV32_IMM(BPF_REG_2, 16), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 8, + }, + { + "DIV32 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV32 by 0, zero check 2, cls", + .insns = { + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 by 0, zero check, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD32 by 0, zero check ok, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 3), + BPF_MOV32_IMM(BPF_REG_2, 5), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + }, + { + "MOD32 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD32 by 0, zero check 2, cls", + .insns = { + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD64 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + }, + { + "MOD64 by 0, zero check 2, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, -1), + BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = -1, + }, + /* Just make sure that JITs used udiv/umod as otherwise we get + * an exception from INT_MIN/-1 overflow similarly as with div + * by zero. + */ + { + "DIV32 overflow, check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV32 overflow, check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 overflow, check 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 overflow, check 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD32 overflow, check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = INT_MIN, + }, + { + "MOD32 overflow, check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = INT_MIN, + }, + { + "MOD64 overflow, check 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD64 overflow, check 2", + .insns = { + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "xor32 zero extend check", + .insns = { + BPF_MOV32_IMM(BPF_REG_2, -1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32), + BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff), + BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "empty prog", + .insns = { + }, + .errstr = "unknown opcode 00", + .result = REJECT, + }, + { + "only exit insn", + .insns = { + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, }, { "unreachable", @@ -209,6 +535,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "test8 ld_imm64", @@ -280,7 +607,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_ARSH not supported for 32 bit ALU", + .errstr = "unknown opcode c4", }, { "arsh32 on reg", @@ -291,7 +618,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_ARSH not supported for 32 bit ALU", + .errstr = "unknown opcode cc", }, { "arsh64 on imm", @@ -317,7 +644,7 @@ static struct bpf_test tests[] = { .insns = { BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2), }, - .errstr = "jump out of range", + .errstr = "not an exit", .result = REJECT, }, { @@ -407,7 +734,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_CALL uses reserved", + .errstr = "unknown opcode 8d", .result = REJECT, }, { @@ -516,6 +843,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, + .retval = POINTER_VALUE, }, { "check valid spill/fill, skb mark", @@ -596,7 +924,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(0, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM", + .errstr = "unknown opcode 00", .result = REJECT, }, { @@ -614,7 +942,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(-1, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_ALU opcode f0", + .errstr = "unknown opcode ff", .result = REJECT, }, { @@ -623,7 +951,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(-1, -1, -1, -1, -1), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_ALU opcode f0", + .errstr = "unknown opcode ff", .result = REJECT, }, { @@ -802,6 +1130,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R1 pointer comparison", .result_unpriv = REJECT, .result = ACCEPT, + .retval = -ENOENT, }, { "jump test 4", @@ -1822,6 +2151,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 0xfaceb00c, }, { "PTR_TO_STACK store/load - bad alignment on off", @@ -1880,6 +2210,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr", + .retval = POINTER_VALUE, }, { "unpriv: add const to pointer", @@ -2053,6 +2384,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .result = ACCEPT, @@ -2840,6 +3172,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test12 (and, good access)", @@ -2864,6 +3197,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test13 (branches, good access)", @@ -2894,6 +3228,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)", @@ -2917,6 +3252,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test15 (spill with xadd)", @@ -3203,6 +3539,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test28 (marking on <=, bad access)", @@ -5700,7 +6037,7 @@ static struct bpf_test tests[] = { "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), @@ -5822,6 +6159,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 0 /* csum_diff of 64-byte packet */, }, { "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", @@ -5935,7 +6273,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), @@ -6190,6 +6528,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 42 /* ultimate return value */, }, { "ld_ind: check calling conv, r1", @@ -6261,6 +6600,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "check bpf_perf_event_data->sample_period byte load permitted", @@ -7248,6 +7588,7 @@ static struct bpf_test tests[] = { }, .fixup_map1 = { 3 }, .result = ACCEPT, + .retval = POINTER_VALUE, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr as return value" }, @@ -7268,6 +7609,7 @@ static struct bpf_test tests[] = { }, .fixup_map1 = { 3 }, .result = ACCEPT, + .retval = POINTER_VALUE, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr as return value" }, @@ -7434,7 +7776,7 @@ static struct bpf_test tests[] = { }, BPF_EXIT_INSN(), }, - .errstr = "BPF_END uses reserved fields", + .errstr = "unknown opcode d7", .result = REJECT, }, { @@ -7709,6 +8051,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = TEST_DATA_LEN, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -8656,6 +8999,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "check deducing bounds from const, 3", @@ -8826,6 +9170,1959 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, }, + { + "calls: basic sanity", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: not on unpriviledged", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: div by 0 in subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(BPF_REG_2, 0), + BPF_MOV32_IMM(BPF_REG_3, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: multiple ret types in subprog 1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + }, + { + "calls: multiple ret types in subprog 2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct __sk_buff, data)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 16 }, + .result = REJECT, + .errstr = "R0 min value is outside of the array range", + }, + { + "calls: overlapping caller/callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn is not an exit or jmp", + .result = REJECT, + }, + { + "calls: wrong recursive calls", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: wrong src reg", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, + }, + { + "calls: wrong off value", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, + }, + { + "calls: jump back loop", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn 0 to 0", + .result = REJECT, + }, + { + "calls: conditional call", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: conditional call 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: conditional call 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: conditional call 4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -5), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: conditional call 5", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: conditional call 6", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: using r0 returned by callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: using uninit r0 from callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "calls: callee is using r1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_ACT, + .result = ACCEPT, + .retval = TEST_DATA_LEN, + }, + { + "calls: callee using args1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, + }, + { + "calls: callee using wrong args2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "calls: callee using two args", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN, + }, + { + "calls: callee changing pkt pointers", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* clear_all_pkt_pointers() has to walk all frames + * to make sure that pkt pointers in the caller + * are cleared when callee is calling a helper that + * adjusts packet size + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_xdp_adjust_head), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R6 invalid mem access 'inv'", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "calls: two calls with args", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = TEST_DATA_LEN + TEST_DATA_LEN, + }, + { + "calls: calls with stack arith", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 42, + }, + { + "calls: calls with misaligned stack access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, + .errstr = "misaligned stack access", + .result = REJECT, + }, + { + "calls: calls control flow, jump test", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 43, + }, + { + "calls: calls control flow, jump test 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "jump out of range from insn 1 to 4", + .result = REJECT, + }, + { + "calls: two calls with bad jump", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range from insn 11 to 9", + .result = REJECT, + }, + { + "calls: recursive call. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, + }, + { + "calls: recursive call. test2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, + }, + { + "calls: unreachable code", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "unreachable insn 6", + .result = REJECT, + }, + { + "calls: invalid call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, + }, + { + "calls: invalid call 2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, + }, + { + "calls: jumping across function bodies. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: jumping across function bodies. test2", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: call without exit", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, + }, + { + "calls: call into middle of ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, + }, + { + "calls: call into middle of other call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, + }, + { + "calls: ld_abs with changing ctx data in callee", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_vlan_push), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed", + .result = REJECT, + }, + { + "calls: two calls with bad fallthrough", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, + }, + { + "calls: two calls with stack read", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: two calls with stack write", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_8), + /* write into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* read from stack frame of main prog */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: stack overflow using two frames (pre-call access)", + .insns = { + /* prog 1 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, + }, + { + "calls: stack overflow using two frames (post-call access)", + .insns = { + /* prog 1 */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, + }, + { + "calls: stack depth check using three frames. test1", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=256, stack_B=64 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, + }, + { + "calls: stack depth check using three frames. test2", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=64, stack_B=256 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, + }, + { + "calls: stack depth check using three frames. test3", + .insns = { + /* main */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 8), /* call B */ + BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 10, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_B, BPF_REG_10, -224, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + /* B */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -6), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=64, stack_A=224, stack_B=256 + * and max(main+A, main+A+B) > 512 + */ + .errstr = "combined stack", + .result = REJECT, + }, + { + "calls: stack depth check using three frames. test4", + /* void main(void) { + * func1(0); + * func1(1); + * func2(1); + * } + * void func1(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } else { + * func2(alloc_or_recurse); + * } + * } + * void func2(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } + * } + */ + .insns = { + /* main */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 7), /* call B */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = REJECT, + .errstr = "combined stack", + }, + { + "calls: stack depth check using three frames. test5", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */ + BPF_EXIT_INSN(), + /* A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */ + BPF_EXIT_INSN(), + /* C */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */ + BPF_EXIT_INSN(), + /* D */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */ + BPF_EXIT_INSN(), + /* E */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */ + BPF_EXIT_INSN(), + /* F */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */ + BPF_EXIT_INSN(), + /* G */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ + BPF_EXIT_INSN(), + /* H */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "call stack", + .result = REJECT, + }, + { + "calls: spill into caller stack frame", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot spill", + .result = REJECT, + }, + { + "calls: write into caller stack frame", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + .retval = 42, + }, + { + "calls: write into callee stack frame", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot return stack pointer", + .result = REJECT, + }, + { + "calls: two calls with stack write and void return", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* write into stack frame of main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), /* void return */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: ambiguous return value", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .errstr = "R0 !read_ok", + .result = REJECT, + }, + { + "calls: two calls that return map_value", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = ACCEPT, + }, + { + "calls: two calls that return map_value with bool condition", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = ACCEPT, + }, + { + "calls: two calls that return map_value with incorrect bool check", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = REJECT, + .errstr = "invalid read from stack off -16+0 size 8", + }, + { + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + }, + { + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = ACCEPT, + }, + { + "calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), // 26 + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34 + BPF_JMP_IMM(BPF_JA, 0, 0, -30), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -8), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + }, + { + "calls: two calls that receive map_value_ptr_or_null via arg. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = ACCEPT, + }, + { + "calls: two calls that receive map_value_ptr_or_null via arg. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 0 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + }, + { + "calls: pkt_ptr spill into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = POINTER_VALUE, + }, + { + "calls: pkt_ptr spill into caller stack 2", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* Marking is still kept, but not in all cases safe. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 3", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Marking is still kept and safe here. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: pkt_ptr spill into caller stack 4", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Check marking propagated. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: pkt_ptr spill into caller stack 5", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "same insn cannot be used with different", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 8", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: pkt_ptr spill into caller stack 9", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + }, + { + "calls: caller stack init to zero or map_value_or_null", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + /* fetch map_value_or_null or const_zero from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* store into map_value */ + BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* if (ctx == 0) return; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8), + /* else bpf_map_lookup() and *(fp - 8) = r0 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 13 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "calls: stack init to zero and pruning", + .insns = { + /* first make allocated_stack 16 byte */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + /* now fork the execution such that the false branch + * of JGT insn will be verified second and it skisp zero + * init of fp-8 stack slot. If stack liveness marking + * is missing live_read marks from call map_lookup + * processing then pruning will incorrectly assume + * that fp-8 stack slot was unused in the fall-through + * branch and will accept the program incorrectly + */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 6 }, + .errstr = "invalid indirect read from stack off -8+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "search pruning: all branches should be verified (nop operation)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_A(1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "R6 invalid mem access 'inv'", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "search pruning: all branches should be verified (invalid stack access)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_JMP_A(1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "invalid read from stack off -16+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -8937,10 +11234,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int fd_prog, expected_ret, reject_from_alignment; struct bpf_insn *prog = test->insns; int prog_len = probe_filter_length(prog); + char data_in[TEST_DATA_LEN] = {}; int prog_type = test->prog_type; int map_fds[MAX_NR_MAPS]; const char *expected_err; - int i; + uint32_t retval; + int i, err; for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; @@ -8983,6 +11282,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } } + if (fd_prog >= 0) { + err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in), + NULL, NULL, &retval, NULL); + if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { + printf("Unexpected bpf_prog_test_run error\n"); + goto fail_log; + } + if (!err && retval != test->retval && + test->retval != POINTER_VALUE) { + printf("FAIL retval %d != %d\n", retval, test->retval); + goto fail_log; + } + } (*passes)++; printf("OK%s\n", reject_from_alignment ? " (NOTE: reject due to unknown alignment)" : ""); diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c new file mode 100644 index 0000000..5e4aac7 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_noinline.c @@ -0,0 +1,833 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include "bpf_helpers.h" + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +static __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static __attribute__ ((noinline)) +u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static __attribute__ ((noinline)) +u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static __attribute__ ((noinline)) +u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +struct flow_key { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; +}; + +struct packet_description { + struct flow_key flow; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_definition { + union { + __be32 vip; + __be32 vipv6[4]; + }; + __u16 port; + __u16 family; + __u8 proto; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_pos_lru { + __u32 pos; + __u64 atime; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct lb_stats { + __u64 v2; + __u64 v1; +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip_definition), + .value_size = sizeof(struct vip_meta), + .max_entries = 512, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(struct flow_key), + .value_size = sizeof(struct real_pos_lru), + .max_entries = 300, + .map_flags = 1U << 1, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 12 * 655, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = 40, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct lb_stats), + .max_entries = 515, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = 16, + .map_flags = 0, +}; + +struct eth_hdr { + unsigned char eth_dest[6]; + unsigned char eth_source[6]; + unsigned short eth_proto; +}; + +static inline __u64 calc_offset(bool is_ipv6, bool is_icmp) +{ + __u64 off = sizeof(struct eth_hdr); + if (is_ipv6) { + off += sizeof(struct ipv6hdr); + if (is_icmp) + off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr); + } else { + off += sizeof(struct iphdr); + if (is_icmp) + off += sizeof(struct icmphdr) + sizeof(struct iphdr); + } + return off; +} + +static __attribute__ ((noinline)) +bool parse_udp(void *data, void *data_end, + bool is_ipv6, struct packet_description *pckt) +{ + + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); + __u64 off = calc_offset(is_ipv6, is_icmp); + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return 0; + if (!is_icmp) { + pckt->flow.port16[0] = udp->source; + pckt->flow.port16[1] = udp->dest; + } else { + pckt->flow.port16[0] = udp->dest; + pckt->flow.port16[1] = udp->source; + } + return 1; +} + +static __attribute__ ((noinline)) +bool parse_tcp(void *data, void *data_end, + bool is_ipv6, struct packet_description *pckt) +{ + + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); + __u64 off = calc_offset(is_ipv6, is_icmp); + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return 0; + if (tcp->syn) + pckt->flags |= (1 << 1); + if (!is_icmp) { + pckt->flow.port16[0] = tcp->source; + pckt->flow.port16[1] = tcp->dest; + } else { + pckt->flow.port16[0] = tcp->dest; + pckt->flow.port16[1] = tcp->source; + } + return 1; +} + +static __attribute__ ((noinline)) +bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, + struct packet_description *pckt, + struct real_definition *dst, __u32 pkt_bytes) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + struct ipv6hdr *ip6h; + __u32 ip_suffix; + void *data_end; + void *data; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) + return 0; + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + new_eth = data; + ip6h = data + sizeof(struct eth_hdr); + old_eth = data + sizeof(struct ipv6hdr); + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || ip6h + 1 > data_end) + return 0; + memcpy(new_eth->eth_dest, cval->mac, 6); + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); + new_eth->eth_proto = 56710; + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + + ip6h->nexthdr = IPPROTO_IPV6; + ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0]; + ip6h->payload_len = + __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr)); + ip6h->hop_limit = 4; + + ip6h->saddr.in6_u.u6_addr32[0] = 1; + ip6h->saddr.in6_u.u6_addr32[1] = 2; + ip6h->saddr.in6_u.u6_addr32[2] = 3; + ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; + memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); + return 1; +} + +static __attribute__ ((noinline)) +bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, + struct packet_description *pckt, + struct real_definition *dst, __u32 pkt_bytes) +{ + + __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]); + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + __u16 *next_iph_u16; + struct iphdr *iph; + __u32 csum = 0; + void *data_end; + void *data; + + ip_suffix <<= 15; + ip_suffix ^= pckt->flow.src; + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) + return 0; + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + new_eth = data; + iph = data + sizeof(struct eth_hdr); + old_eth = data + sizeof(struct iphdr); + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || iph + 1 > data_end) + return 0; + memcpy(new_eth->eth_dest, cval->mac, 6); + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); + new_eth->eth_proto = 8; + iph->version = 4; + iph->ihl = 5; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 1; + iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr)); + /* don't update iph->daddr, since it will overwrite old eth_proto + * and multiple iterations of bpf_prog_run() will fail + */ + + iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst; + iph->ttl = 4; + + next_iph_u16 = (__u16 *) iph; +#pragma clang loop unroll(full) + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_iph_u16++; + iph->check = ~((csum & 0xffff) + (csum >> 16)); + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) + return 0; + return 1; +} + +static __attribute__ ((noinline)) +bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + + old_eth = *data; + new_eth = *data + sizeof(struct ipv6hdr); + memcpy(new_eth->eth_source, old_eth->eth_source, 6); + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); + if (inner_v4) + new_eth->eth_proto = 8; + else + new_eth->eth_proto = 56710; + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) + return 0; + *data = (void *)(long)xdp->data; + *data_end = (void *)(long)xdp->data_end; + return 1; +} + +static __attribute__ ((noinline)) +bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + + old_eth = *data; + new_eth = *data + sizeof(struct iphdr); + memcpy(new_eth->eth_source, old_eth->eth_source, 6); + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); + new_eth->eth_proto = 8; + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) + return 0; + *data = (void *)(long)xdp->data; + *data_end = (void *)(long)xdp->data_end; + return 1; +} + +static __attribute__ ((noinline)) +int swap_mac_and_send(void *data, void *data_end) +{ + unsigned char tmp_mac[6]; + struct eth_hdr *eth; + + eth = data; + memcpy(tmp_mac, eth->eth_source, 6); + memcpy(eth->eth_source, eth->eth_dest, 6); + memcpy(eth->eth_dest, tmp_mac, 6); + return XDP_TX; +} + +static __attribute__ ((noinline)) +int send_icmp_reply(void *data, void *data_end) +{ + struct icmphdr *icmp_hdr; + __u16 *next_iph_u16; + __u32 tmp_addr = 0; + struct iphdr *iph; + __u32 csum1 = 0; + __u32 csum = 0; + __u64 off = 0; + + if (data + sizeof(struct eth_hdr) + + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end) + return XDP_DROP; + off += sizeof(struct eth_hdr); + iph = data + off; + off += sizeof(struct iphdr); + icmp_hdr = data + off; + icmp_hdr->type = 0; + icmp_hdr->checksum += 0x0007; + iph->ttl = 4; + tmp_addr = iph->daddr; + iph->daddr = iph->saddr; + iph->saddr = tmp_addr; + iph->check = 0; + next_iph_u16 = (__u16 *) iph; +#pragma clang loop unroll(full) + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_iph_u16++; + iph->check = ~((csum & 0xffff) + (csum >> 16)); + return swap_mac_and_send(data, data_end); +} + +static __attribute__ ((noinline)) +int send_icmp6_reply(void *data, void *data_end) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + __be32 tmp_addr[4]; + __u64 off = 0; + + if (data + sizeof(struct eth_hdr) + + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end) + return XDP_DROP; + off += sizeof(struct eth_hdr); + ip6h = data + off; + off += sizeof(struct ipv6hdr); + icmp_hdr = data + off; + icmp_hdr->icmp6_type = 129; + icmp_hdr->icmp6_cksum -= 0x0001; + ip6h->hop_limit = 4; + memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16); + memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16); + memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16); + return swap_mac_and_send(data, data_end); +} + +static __attribute__ ((noinline)) +int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return XDP_DROP; + if (icmp_hdr->icmp6_type == 128) + return send_icmp6_reply(data, data_end); + if (icmp_hdr->icmp6_type != 3) + return XDP_PASS; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return XDP_DROP; + pckt->flow.proto = ip6h->nexthdr; + pckt->flags |= (1 << 0); + memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16); + memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16); + return -1; +} + +static __attribute__ ((noinline)) +int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return XDP_DROP; + if (icmp_hdr->type == 8) + return send_icmp_reply(data, data_end); + if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4)) + return XDP_PASS; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return XDP_DROP; + if (iph->ihl != 5) + return XDP_DROP; + pckt->flow.proto = iph->protocol; + pckt->flags |= (1 << 0); + pckt->flow.src = iph->daddr; + pckt->flow.dst = iph->saddr; + return -1; +} + +static __attribute__ ((noinline)) +__u32 get_packet_hash(struct packet_description *pckt, + bool hash_16bytes) +{ + if (hash_16bytes) + return jhash_2words(jhash(pckt->flow.srcv6, 16, 12), + pckt->flow.ports, 24); + else + return jhash_2words(pckt->flow.src, pckt->flow.ports, + 24); +} + +__attribute__ ((noinline)) +static bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6, void *lru_map) +{ + struct real_pos_lru new_dst_lru = { }; + bool hash_16bytes = is_ipv6; + __u32 *real_pos, hash, key; + __u64 cur_time; + + if (vip_info->flags & (1 << 2)) + hash_16bytes = 1; + if (vip_info->flags & (1 << 3)) { + pckt->flow.port16[0] = pckt->flow.port16[1]; + memset(pckt->flow.srcv6, 0, 16); + } + hash = get_packet_hash(pckt, hash_16bytes); + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return 0; + key = 2 * vip_info->vip_num + hash % 2; + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return 0; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return 0; + if (!(vip_info->flags & (1 << 1))) { + __u32 conn_rate_key = 512 + 2; + struct lb_stats *conn_rate_stats = + bpf_map_lookup_elem(&stats, &conn_rate_key); + + if (!conn_rate_stats) + return 1; + cur_time = bpf_ktime_get_ns(); + if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) { + conn_rate_stats->v1 = 1; + conn_rate_stats->v2 = cur_time; + } else { + conn_rate_stats->v1 += 1; + if (conn_rate_stats->v1 >= 1) + return 1; + } + if (pckt->flow.proto == IPPROTO_UDP) + new_dst_lru.atime = cur_time; + new_dst_lru.pos = key; + bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0); + } + return 1; +} + +__attribute__ ((noinline)) +static void connection_table_lookup(struct real_definition **real, + struct packet_description *pckt, + void *lru_map) +{ + + struct real_pos_lru *dst_lru; + __u64 cur_time; + __u32 key; + + dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow); + if (!dst_lru) + return; + if (pckt->flow.proto == IPPROTO_UDP) { + cur_time = bpf_ktime_get_ns(); + if (cur_time - dst_lru->atime > 300000) + return; + dst_lru->atime = cur_time; + } + key = dst_lru->pos; + *real = bpf_map_lookup_elem(&reals, &key); +} + +/* don't believe your eyes! + * below function has 6 arguments whereas bpf and llvm allow maximum of 5 + * but since it's _static_ llvm can optimize one argument away + */ +__attribute__ ((noinline)) +static int process_l3_headers_v6(struct packet_description *pckt, + __u8 *protocol, __u64 off, + __u16 *pkt_bytes, void *data, + void *data_end) +{ + struct ipv6hdr *ip6h; + __u64 iph_len; + int action; + + ip6h = data + off; + if (ip6h + 1 > data_end) + return XDP_DROP; + iph_len = sizeof(struct ipv6hdr); + *protocol = ip6h->nexthdr; + pckt->flow.proto = *protocol; + *pkt_bytes = __builtin_bswap16(ip6h->payload_len); + off += iph_len; + if (*protocol == 45) { + return XDP_DROP; + } else if (*protocol == 59) { + action = parse_icmpv6(data, data_end, off, pckt); + if (action >= 0) + return action; + } else { + memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16); + memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16); + } + return -1; +} + +__attribute__ ((noinline)) +static int process_l3_headers_v4(struct packet_description *pckt, + __u8 *protocol, __u64 off, + __u16 *pkt_bytes, void *data, + void *data_end) +{ + struct iphdr *iph; + __u64 iph_len; + int action; + + iph = data + off; + if (iph + 1 > data_end) + return XDP_DROP; + if (iph->ihl != 5) + return XDP_DROP; + *protocol = iph->protocol; + pckt->flow.proto = *protocol; + *pkt_bytes = __builtin_bswap16(iph->tot_len); + off += 20; + if (iph->frag_off & 65343) + return XDP_DROP; + if (*protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, pckt); + if (action >= 0) + return action; + } else { + pckt->flow.src = iph->saddr; + pckt->flow.dst = iph->daddr; + } + return -1; +} + +__attribute__ ((noinline)) +static int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct xdp_md *xdp) +{ + + struct real_definition *dst = NULL; + struct packet_description pckt = { }; + struct vip_definition vip = { }; + struct lb_stats *data_stats; + struct eth_hdr *eth = data; + void *lru_map = &lru_cache; + struct vip_meta *vip_info; + __u32 lru_stats_key = 513; + __u32 mac_addr_pos = 0; + __u32 stats_key = 512; + struct ctl_value *cval; + __u16 pkt_bytes; + __u64 iph_len; + __u8 protocol; + __u32 vip_num; + int action; + + if (is_ipv6) + action = process_l3_headers_v6(&pckt, &protocol, off, + &pkt_bytes, data, data_end); + else + action = process_l3_headers_v4(&pckt, &protocol, off, + &pkt_bytes, data, data_end); + if (action >= 0) + return action; + protocol = pckt.flow.proto; + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, data_end, is_ipv6, &pckt)) + return XDP_DROP; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, data_end, is_ipv6, &pckt)) + return XDP_DROP; + } else { + return XDP_TX; + } + + if (is_ipv6) + memcpy(vip.vipv6, pckt.flow.dstv6, 16); + else + vip.vip = pckt.flow.dst; + vip.port = pckt.flow.port16[1]; + vip.proto = pckt.flow.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.port = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return XDP_PASS; + if (!(vip_info->flags & (1 << 4))) + pckt.flow.port16[1] = 0; + } + if (data_end - data > 1400) + return XDP_DROP; + data_stats = bpf_map_lookup_elem(&stats, &stats_key); + if (!data_stats) + return XDP_DROP; + data_stats->v1 += 1; + if (!dst) { + if (vip_info->flags & (1 << 0)) + pckt.flow.port16[0] = 0; + if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1))) + connection_table_lookup(&dst, &pckt, lru_map); + if (dst) + goto out; + if (pckt.flow.proto == IPPROTO_TCP) { + struct lb_stats *lru_stats = + bpf_map_lookup_elem(&stats, &lru_stats_key); + + if (!lru_stats) + return XDP_DROP; + if (pckt.flags & (1 << 1)) + lru_stats->v1 += 1; + else + lru_stats->v2 += 1; + } + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map)) + return XDP_DROP; + data_stats->v2 += 1; + } +out: + cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos); + if (!cval) + return XDP_DROP; + if (dst->flags & (1 << 0)) { + if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes)) + return XDP_DROP; + } else { + if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes)) + return XDP_DROP; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return XDP_DROP; + data_stats->v1 += 1; + data_stats->v2 += pkt_bytes; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + if (data + 4 > data_end) + return XDP_DROP; + *(u32 *)data = dst->dst; + return XDP_DROP; +} + +__attribute__ ((section("xdp-test"), used)) +int balancer_ingress(struct xdp_md *ctx) +{ + void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return XDP_DROP; + eth_proto = eth->eth_proto; + if (eth_proto == 8) + return process_packet(data, nh_off, data_end, 0, ctx); + else if (eth_proto == 56710) + return process_packet(data, nh_off, data_end, 1, ctx); + else + return XDP_DROP; +} + +char _license[] __attribute__ ((section("license"), used)) = "GPL"; +int _version __attribute__ ((section("version"), used)) = 1; diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 500c74d..d7c30d3 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,6 +5,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh +TEST_PROGS += fib_tests.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh new file mode 100755 index 0000000..a9154ee --- /dev/null +++ b/tools/testing/selftests/net/fib_tests.sh @@ -0,0 +1,429 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking IPv4 and IPv6 FIB behavior in response to +# different events. + +ret=0 + +check_err() +{ + if [ $ret -eq 0 ]; then + ret=$1 + fi +} + +check_fail() +{ + if [ $1 -eq 0 ]; then + ret=1 + fi +} + +netns_create() +{ + local testns=$1 + + ip netns add $testns + ip netns exec $testns ip link set dev lo up +} + +fib_unreg_unicast_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_err $? + + ip netns exec testns ip link del dev dummy0 + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_fail $? + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: unicast route test" + return 1 + fi + echo "PASS: unicast route test" +} + +fib_unreg_multipath_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip link add dummy1 type dummy + ip netns exec testns ip link set dev dummy1 up + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip address add 192.0.2.1/24 dev dummy1 + ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1 + + ip netns exec testns ip route add 203.0.113.0/24 \ + nexthop via 198.51.100.2 dev dummy0 \ + nexthop via 192.0.2.2 dev dummy1 + ip netns exec testns ip -6 route add 2001:db8:3::/64 \ + nexthop via 2001:db8:1::2 dev dummy0 \ + nexthop via 2001:db8:2::2 dev dummy1 + + ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + check_err $? + + ip netns exec testns ip link del dev dummy0 + check_err $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + # In IPv6 we do not flush the entire multipath route. + check_err $? + + ip netns exec testns ip link del dev dummy1 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: multipath route test" + return 1 + fi + echo "PASS: multipath route test" +} + +fib_unreg_test() +{ + echo "Running netdev unregister tests" + + fib_unreg_unicast_test + fib_unreg_multipath_test +} + +fib_down_unicast_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_err $? + + ip netns exec testns ip link set dev dummy0 down + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_fail $? + + ip netns exec testns ip link del dev dummy0 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: unicast route test" + return 1 + fi + echo "PASS: unicast route test" +} + +fib_down_multipath_test_do() +{ + local down_dev=$1 + local up_dev=$2 + + ip netns exec testns ip route get fibmatch 203.0.113.1 \ + oif $down_dev &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \ + oif $down_dev &> /dev/null + check_fail $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 \ + oif $up_dev &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \ + oif $up_dev &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 | \ + grep $down_dev | grep -q "dead linkdown" + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \ + grep $down_dev | grep -q "dead linkdown" + check_err $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 | \ + grep $up_dev | grep -q "dead linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \ + grep $up_dev | grep -q "dead linkdown" + check_fail $? +} + +fib_down_multipath_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip link add dummy1 type dummy + ip netns exec testns ip link set dev dummy1 up + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip address add 192.0.2.1/24 dev dummy1 + ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1 + + ip netns exec testns ip route add 203.0.113.0/24 \ + nexthop via 198.51.100.2 dev dummy0 \ + nexthop via 192.0.2.2 dev dummy1 + ip netns exec testns ip -6 route add 2001:db8:3::/64 \ + nexthop via 2001:db8:1::2 dev dummy0 \ + nexthop via 2001:db8:2::2 dev dummy1 + + ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + check_err $? + + ip netns exec testns ip link set dev dummy0 down + check_err $? + + fib_down_multipath_test_do "dummy0" "dummy1" + + ip netns exec testns ip link set dev dummy0 up + check_err $? + ip netns exec testns ip link set dev dummy1 down + check_err $? + + fib_down_multipath_test_do "dummy1" "dummy0" + + ip netns exec testns ip link set dev dummy0 down + check_err $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + check_fail $? + + ip netns exec testns ip link del dev dummy1 + ip netns exec testns ip link del dev dummy0 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: multipath route test" + return 1 + fi + echo "PASS: multipath route test" +} + +fib_down_test() +{ + echo "Running netdev down tests" + + fib_down_unicast_test + fib_down_multipath_test +} + +fib_carrier_local_test() +{ + ret=0 + + # Local routes should not be affected when carrier changes. + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip link set dev dummy0 carrier on + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.1 | \ + grep -q "linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \ + grep -q "linkdown" + check_fail $? + + ip netns exec testns ip link set dev dummy0 carrier off + + ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.1 | \ + grep -q "linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \ + grep -q "linkdown" + check_fail $? + + ip netns exec testns ip address add 192.0.2.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 192.0.2.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 192.0.2.1 | \ + grep -q "linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 | \ + grep -q "linkdown" + check_fail $? + + ip netns exec testns ip link del dev dummy0 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: local route carrier test" + return 1 + fi + echo "PASS: local route carrier test" +} + +fib_carrier_unicast_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip link set dev dummy0 carrier on + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.2 | \ + grep -q "linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \ + grep -q "linkdown" + check_fail $? + + ip netns exec testns ip link set dev dummy0 carrier off + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.2 | \ + grep -q "linkdown" + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \ + grep -q "linkdown" + check_err $? + + ip netns exec testns ip address add 192.0.2.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 192.0.2.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 192.0.2.2 | \ + grep -q "linkdown" + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 | \ + grep -q "linkdown" + check_err $? + + ip netns exec testns ip link del dev dummy0 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: unicast route carrier test" + return 1 + fi + echo "PASS: unicast route carrier test" +} + +fib_carrier_test() +{ + echo "Running netdev carrier change tests" + + fib_carrier_local_test + fib_carrier_unicast_test +} + +fib_test() +{ + fib_unreg_test + fib_down_test + fib_carrier_test +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +ip route help 2>&1 | grep -q fibmatch +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing fibmatch" + exit 0 +fi + +fib_test + +exit $ret diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 3ab6ec4..e11fe84 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -259,22 +259,28 @@ static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len) return sizeof(*ip6h); } -static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) + +static void setup_sockaddr(int domain, const char *str_addr, + struct sockaddr_storage *sockaddr) { struct sockaddr_in6 *addr6 = (void *) sockaddr; struct sockaddr_in *addr4 = (void *) sockaddr; switch (domain) { case PF_INET: + memset(addr4, 0, sizeof(*addr4)); addr4->sin_family = AF_INET; addr4->sin_port = htons(cfg_port); - if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + if (str_addr && + inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) error(1, 0, "ipv4 parse error: %s", str_addr); break; case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); addr6->sin6_family = AF_INET6; addr6->sin6_port = htons(cfg_port); - if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + if (str_addr && + inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) error(1, 0, "ipv6 parse error: %s", str_addr); break; default: @@ -603,6 +609,7 @@ static void parse_opts(int argc, char **argv) sizeof(struct tcphdr) - 40 /* max tcp options */; int c; + char *daddr = NULL, *saddr = NULL; cfg_payload_len = max_payload_len; @@ -627,7 +634,7 @@ static void parse_opts(int argc, char **argv) cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': - setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + daddr = optarg; break; case 'i': cfg_ifindex = if_nametoindex(optarg); @@ -638,7 +645,7 @@ static void parse_opts(int argc, char **argv) cfg_cork_mixed = true; break; case 'p': - cfg_port = htons(strtoul(optarg, NULL, 0)); + cfg_port = strtoul(optarg, NULL, 0); break; case 'r': cfg_rx = true; @@ -647,7 +654,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = strtoul(optarg, NULL, 0); break; case 'S': - setup_sockaddr(cfg_family, optarg, &cfg_src_addr); + saddr = optarg; break; case 't': cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000; @@ -660,6 +667,8 @@ static void parse_opts(int argc, char **argv) break; } } + setup_sockaddr(cfg_family, daddr, &cfg_dst_addr); + setup_sockaddr(cfg_family, saddr, &cfg_src_addr); if (cfg_payload_len > max_payload_len) error(1, 0, "-s: payload exceeds max (%d)", max_payload_len); diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 5215493..a622eee 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -502,6 +502,231 @@ kci_test_macsec() echo "PASS: macsec" } +kci_test_gretap() +{ + testns="testns" + DEV_NS=gretap00 + ret=0 + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP gretap tests: cannot add net namespace $testns" + return 1 + fi + + ip link help gretap 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: gretap: iproute2 too old" + return 1 + fi + + # test native tunnel + ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap seq \ + key 102 local 172.16.1.100 remote 172.16.1.200 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: gretap" + return 1 + fi + echo "PASS: gretap" + + ip netns del "$testns" +} + +kci_test_ip6gretap() +{ + testns="testns" + DEV_NS=ip6gretap00 + ret=0 + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP ip6gretap tests: cannot add net namespace $testns" + return 1 + fi + + ip link help ip6gretap 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: ip6gretap: iproute2 too old" + return 1 + fi + + # test native tunnel + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap seq \ + key 102 local fc00:100::1 remote fc00:100::2 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" fc00:200::1/96 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: ip6gretap" + return 1 + fi + echo "PASS: ip6gretap" + + ip netns del "$testns" +} + +kci_test_erspan() +{ + testns="testns" + DEV_NS=erspan00 + ret=0 + + ip link help erspan 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: erspan: iproute2 too old" + return 1 + fi + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP erspan tests: cannot add net namespace $testns" + return 1 + fi + + # test native tunnel erspan v1 + ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \ + key 102 local 172.16.1.100 remote 172.16.1.200 \ + erspan_ver 1 erspan 488 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test native tunnel erspan v2 + ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \ + key 102 local 172.16.1.100 remote 172.16.1.200 \ + erspan_ver 2 erspan_dir ingress erspan_hwid 7 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: erspan" + return 1 + fi + echo "PASS: erspan" + + ip netns del "$testns" +} + +kci_test_ip6erspan() +{ + testns="testns" + DEV_NS=ip6erspan00 + ret=0 + + ip link help ip6erspan 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: ip6erspan: iproute2 too old" + return 1 + fi + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP ip6erspan tests: cannot add net namespace $testns" + return 1 + fi + + # test native tunnel ip6erspan v1 + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \ + key 102 local fc00:100::1 remote fc00:100::2 \ + erspan_ver 1 erspan 488 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test native tunnel ip6erspan v2 + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \ + key 102 local fc00:100::1 remote fc00:100::2 \ + erspan_ver 2 erspan_dir ingress erspan_hwid 7 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" \ + type ip6erspan external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: ip6erspan" + return 1 + fi + echo "PASS: ip6erspan" + + ip netns del "$testns" +} + kci_test_rtnl() { kci_add_dummy @@ -514,6 +739,10 @@ kci_test_rtnl() kci_test_route_get kci_test_tc kci_test_gre + kci_test_gretap + kci_test_ip6gretap + kci_test_erspan + kci_test_ip6erspan kci_test_bridge kci_test_addrlabel kci_test_ifalias diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 395521a..fca8381 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -118,7 +118,7 @@ static inline void free_page(unsigned long addr) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n")) +#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0) #define min(x, y) ({ \ typeof(x) _min1 = (x); \ diff --git a/tools/virtio/linux/thread_info.h b/tools/virtio/linux/thread_info.h new file mode 100644 index 0000000..e0f610d --- /dev/null +++ b/tools/virtio/linux/thread_info.h @@ -0,0 +1 @@ +#define check_copy_size(A, B, C) (1) diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 5706e07..301d59b 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -111,7 +111,7 @@ static inline void busy_wait(void) } #if defined(__x86_64__) || defined(__i386__) -#define smp_mb() asm volatile("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") +#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") #else /* * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized @@ -134,4 +134,61 @@ static inline void busy_wait(void) barrier(); \ } while (0) +#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) +#define smp_wmb() barrier() +#else +#define smp_wmb() smp_release() +#endif + +#ifdef __alpha__ +#define smp_read_barrier_depends() smp_acquire() +#else +#define smp_read_barrier_depends() do {} while(0) +#endif + +static __always_inline +void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { \ + case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \ + case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \ + case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \ + case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \ + default: \ + barrier(); \ + __builtin_memcpy((void *)res, (const void *)p, size); \ + barrier(); \ + } \ +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break; + case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break; + case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break; + case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ + __u.__val; \ +}) + +#define WRITE_ONCE(x, val) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__val = (typeof(x)) (val) }; \ + __write_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) + #endif diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c index e6e8130..477899c 100644 --- a/tools/virtio/ringtest/ptr_ring.c +++ b/tools/virtio/ringtest/ptr_ring.c @@ -187,7 +187,7 @@ bool enable_kick() bool avail_empty() { - return !__ptr_ring_peek(&array); + return __ptr_ring_empty(&array); } bool use_buf(unsigned *lenp, void **bufp) |