summaryrefslogtreecommitdiffstats
path: root/samples
diff options
context:
space:
mode:
Diffstat (limited to 'samples')
-rw-r--r--samples/bpf/Makefile32
-rw-r--r--samples/bpf/bpf_helpers.h8
-rw-r--r--samples/bpf/bpf_load.c112
-rw-r--r--samples/bpf/bpf_load.h2
-rw-r--r--samples/bpf/cgroup_helpers.c177
-rw-r--r--samples/bpf/cgroup_helpers.h16
-rw-r--r--samples/bpf/libbpf.c21
-rw-r--r--samples/bpf/libbpf.h5
-rw-r--r--samples/bpf/lwt_len_hist.sh37
-rw-r--r--samples/bpf/lwt_len_hist_kern.c82
-rw-r--r--samples/bpf/lwt_len_hist_user.c76
-rw-r--r--samples/bpf/map_perf_test_kern.c39
-rw-r--r--samples/bpf/map_perf_test_user.c32
-rw-r--r--samples/bpf/parse_ldabs.c1
-rw-r--r--samples/bpf/parse_simple.c1
-rw-r--r--samples/bpf/parse_varlen.c1
-rw-r--r--samples/bpf/sampleip_kern.c2
-rw-r--r--samples/bpf/sock_flags_kern.c44
-rw-r--r--samples/bpf/sockex2_kern.c2
-rwxr-xr-xsamples/bpf/tc_l2_redirect.sh173
-rw-r--r--samples/bpf/tc_l2_redirect_kern.c236
-rw-r--r--samples/bpf/tc_l2_redirect_user.c73
-rw-r--r--samples/bpf/tcbpf1_kern.c1
-rw-r--r--samples/bpf/tcbpf2_kern.c1
-rw-r--r--samples/bpf/test_cgrp2_attach.c167
-rw-r--r--samples/bpf/test_cgrp2_attach2.c132
-rw-r--r--samples/bpf/test_cgrp2_sock.c83
-rwxr-xr-xsamples/bpf/test_cgrp2_sock.sh47
-rw-r--r--samples/bpf/test_cgrp2_sock2.c66
-rwxr-xr-xsamples/bpf/test_cgrp2_sock2.sh81
-rw-r--r--samples/bpf/test_cgrp2_tc_kern.c1
-rw-r--r--samples/bpf/test_current_task_under_cgroup_user.c108
-rw-r--r--samples/bpf/test_lru_dist.c541
-rw-r--r--samples/bpf/test_lwt_bpf.c253
-rw-r--r--samples/bpf/test_lwt_bpf.sh399
-rw-r--r--samples/bpf/test_maps.c503
-rw-r--r--samples/bpf/test_verifier.c2529
-rw-r--r--samples/bpf/trace_event_kern.c2
-rw-r--r--samples/bpf/tracex2_user.c4
-rw-r--r--samples/bpf/tracex3_user.c6
-rw-r--r--samples/bpf/xdp1_user.c97
-rw-r--r--samples/bpf/xdp_tx_iptunnel_common.h37
-rw-r--r--samples/bpf/xdp_tx_iptunnel_kern.c236
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c256
-rw-r--r--samples/seccomp/Makefile4
-rw-r--r--samples/seccomp/bpf-helper.c38
-rw-r--r--samples/seccomp/dropper.c7
-rw-r--r--samples/trace_events/trace-events-sample.c3
-rw-r--r--samples/trace_events/trace-events-sample.h2
-rw-r--r--samples/vfio-mdev/Makefile13
-rw-r--r--samples/vfio-mdev/mtty.c1503
51 files changed, 5042 insertions, 3250 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index b1e3228..13315ff 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -2,7 +2,7 @@
obj- := dummy.o
# List of programs to build
-hostprogs-y := test_verifier test_maps
+hostprogs-y := test_lru_dist
hostprogs-y += sock_example
hostprogs-y += fds_example
hostprogs-y += sockex1
@@ -22,14 +22,20 @@ hostprogs-y += spintest
hostprogs-y += map_perf_test
hostprogs-y += test_overhead
hostprogs-y += test_cgrp2_array_pin
+hostprogs-y += test_cgrp2_attach
+hostprogs-y += test_cgrp2_attach2
+hostprogs-y += test_cgrp2_sock
+hostprogs-y += test_cgrp2_sock2
hostprogs-y += xdp1
hostprogs-y += xdp2
hostprogs-y += test_current_task_under_cgroup
hostprogs-y += trace_event
hostprogs-y += sampleip
+hostprogs-y += tc_l2_redirect
+hostprogs-y += lwt_len_hist
+hostprogs-y += xdp_tx_iptunnel
-test_verifier-objs := test_verifier.o libbpf.o
-test_maps-objs := test_maps.o libbpf.o
+test_lru_dist-objs := test_lru_dist.o libbpf.o
sock_example-objs := sock_example.o libbpf.o
fds_example-objs := bpf_load.o libbpf.o fds_example.o
sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
@@ -49,13 +55,20 @@ spintest-objs := bpf_load.o libbpf.o spintest_user.o
map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
+test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o
+test_cgrp2_attach2-objs := libbpf.o test_cgrp2_attach2.o cgroup_helpers.o
+test_cgrp2_sock-objs := libbpf.o test_cgrp2_sock.o
+test_cgrp2_sock2-objs := bpf_load.o libbpf.o test_cgrp2_sock2.o
xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
# reuse xdp1 source intentionally
xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
-test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \
+test_current_task_under_cgroup-objs := bpf_load.o libbpf.o cgroup_helpers.o \
test_current_task_under_cgroup_user.o
trace_event-objs := bpf_load.o libbpf.o trace_event_user.o
sampleip-objs := bpf_load.o libbpf.o sampleip_user.o
+tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o
+lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o
+xdp_tx_iptunnel-objs := bpf_load.o libbpf.o xdp_tx_iptunnel_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -68,10 +81,12 @@ always += tracex3_kern.o
always += tracex4_kern.o
always += tracex5_kern.o
always += tracex6_kern.o
+always += sock_flags_kern.o
always += test_probe_write_user_kern.o
always += trace_output_kern.o
always += tcbpf1_kern.o
always += tcbpf2_kern.o
+always += tc_l2_redirect_kern.o
always += lathist_kern.o
always += offwaketime_kern.o
always += spintest_kern.o
@@ -85,8 +100,11 @@ always += xdp2_kern.o
always += test_current_task_under_cgroup_kern.o
always += trace_event_kern.o
always += sampleip_kern.o
+always += lwt_len_hist_kern.o
+always += xdp_tx_iptunnel_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
+HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
HOSTLOADLIBES_fds_example += -lelf
@@ -99,6 +117,7 @@ HOSTLOADLIBES_tracex3 += -lelf
HOSTLOADLIBES_tracex4 += -lelf -lrt
HOSTLOADLIBES_tracex5 += -lelf
HOSTLOADLIBES_tracex6 += -lelf
+HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
HOSTLOADLIBES_test_probe_write_user += -lelf
HOSTLOADLIBES_trace_output += -lelf -lrt
HOSTLOADLIBES_lathist += -lelf
@@ -111,6 +130,9 @@ HOSTLOADLIBES_xdp2 += -lelf
HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
HOSTLOADLIBES_trace_event += -lelf
HOSTLOADLIBES_sampleip += -lelf
+HOSTLOADLIBES_tc_l2_redirect += -l elf
+HOSTLOADLIBES_lwt_len_hist += -l elf
+HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
@@ -152,4 +174,6 @@ $(obj)/%.o: $(src)/%.c
$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
-D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
-Wno-compare-distinct-pointer-types \
+ -Wno-gnu-variable-sized-type-not-at-end \
+ -Wno-address-of-packed-member -Wno-tautological-compare \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 90f44bd..faaffe2 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -57,6 +57,8 @@ static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
(void *) BPF_FUNC_skb_set_tunnel_opt;
static unsigned long long (*bpf_get_prandom_u32)(void) =
(void *) BPF_FUNC_get_prandom_u32;
+static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
+ (void *) BPF_FUNC_xdp_adjust_head;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -80,6 +82,8 @@ struct bpf_map_def {
unsigned int map_flags;
};
+static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
+ (void *) BPF_FUNC_skb_load_bytes;
static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
(void *) BPF_FUNC_skb_store_bytes;
static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
@@ -88,6 +92,8 @@ static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flag
(void *) BPF_FUNC_l4_csum_replace;
static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
(void *) BPF_FUNC_skb_under_cgroup;
+static int (*bpf_skb_change_head)(void *, int len, int flags) =
+ (void *) BPF_FUNC_skb_change_head;
#if defined(__x86_64__)
@@ -113,7 +119,7 @@ static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */
#define PT_REGS_RC(x) ((x)->gprs[2])
#define PT_REGS_SP(x) ((x)->gprs[15])
-#define PT_REGS_IP(x) ((x)->ip)
+#define PT_REGS_IP(x) ((x)->psw.addr)
#elif defined(__aarch64__)
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 97913e1..e30b6de 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -12,6 +12,10 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/perf_event.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/types.h>
+#include <sys/socket.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
@@ -52,6 +56,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
bool is_xdp = strncmp(event, "xdp", 3) == 0;
bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
+ bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
+ bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
enum bpf_prog_type prog_type;
char buf[256];
int fd, efd, err, id;
@@ -72,6 +78,10 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_XDP;
} else if (is_perf_event) {
prog_type = BPF_PROG_TYPE_PERF_EVENT;
+ } else if (is_cgroup_skb) {
+ prog_type = BPF_PROG_TYPE_CGROUP_SKB;
+ } else if (is_cgroup_sk) {
+ prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
} else {
printf("Unknown event '%s'\n", event);
return -1;
@@ -85,7 +95,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_fd[prog_cnt++] = fd;
- if (is_xdp || is_perf_event)
+ if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
return 0;
if (is_socket) {
@@ -317,6 +327,10 @@ int load_bpf_file(char *path)
&shdr_prog, &data_prog))
continue;
+ if (shdr_prog.sh_type != SHT_PROGBITS ||
+ !(shdr_prog.sh_flags & SHF_EXECINSTR))
+ continue;
+
insns = (struct bpf_insn *) data_prog->d_buf;
processed_sec[shdr.sh_info] = true;
@@ -330,7 +344,8 @@ int load_bpf_file(char *path)
memcmp(shname_prog, "tracepoint/", 11) == 0 ||
memcmp(shname_prog, "xdp", 3) == 0 ||
memcmp(shname_prog, "perf_event", 10) == 0 ||
- memcmp(shname_prog, "socket", 6) == 0)
+ memcmp(shname_prog, "socket", 6) == 0 ||
+ memcmp(shname_prog, "cgroup/", 7) == 0)
load_and_attach(shname_prog, insns, data_prog->d_size);
}
}
@@ -349,7 +364,8 @@ int load_bpf_file(char *path)
memcmp(shname, "tracepoint/", 11) == 0 ||
memcmp(shname, "xdp", 3) == 0 ||
memcmp(shname, "perf_event", 10) == 0 ||
- memcmp(shname, "socket", 6) == 0)
+ memcmp(shname, "socket", 6) == 0 ||
+ memcmp(shname, "cgroup/", 7) == 0)
load_and_attach(shname, data->d_buf, data->d_size);
}
@@ -438,3 +454,93 @@ struct ksym *ksym_search(long key)
/* out of range. return _stext */
return &syms[0];
}
+
+int set_link_xdp_fd(int ifindex, int fd)
+{
+ struct sockaddr_nl sa;
+ int sock, seq = 0, len, ret = -1;
+ char buf[4096];
+ struct nlattr *nla, *nla_xdp;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifinfo;
+ char attrbuf[64];
+ } req;
+ struct nlmsghdr *nh;
+ struct nlmsgerr *err;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0) {
+ printf("open netlink socket: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ printf("bind to netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.nh.nlmsg_pid = 0;
+ req.nh.nlmsg_seq = ++seq;
+ req.ifinfo.ifi_family = AF_UNSPEC;
+ req.ifinfo.ifi_index = ifindex;
+ nla = (struct nlattr *)(((char *)&req)
+ + NLMSG_ALIGN(req.nh.nlmsg_len));
+ nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+
+ nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
+ nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+ nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
+
+ req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ printf("send to netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ printf("recv from netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != getpid()) {
+ printf("Wrong pid %d, expected %d\n",
+ nh->nlmsg_pid, getpid());
+ goto cleanup;
+ }
+ if (nh->nlmsg_seq != seq) {
+ printf("Wrong seq %d, expected %d\n",
+ nh->nlmsg_seq, seq);
+ goto cleanup;
+ }
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ printf("nlmsg error %s\n", strerror(-err->error));
+ goto cleanup;
+ case NLMSG_DONE:
+ break;
+ }
+ }
+
+ ret = 0;
+
+cleanup:
+ close(sock);
+ return ret;
+}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index dfa57fe..fb46a42 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -7,6 +7,7 @@
extern int map_fd[MAX_MAPS];
extern int prog_fd[MAX_PROGS];
extern int event_fd[MAX_PROGS];
+extern int prog_cnt;
/* parses elf file compiled by llvm .c->.o
* . parses 'maps' section and creates maps via BPF syscall
@@ -30,4 +31,5 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
+int set_link_xdp_fd(int ifindex, int fd);
#endif
diff --git a/samples/bpf/cgroup_helpers.c b/samples/bpf/cgroup_helpers.c
new file mode 100644
index 0000000..9d1be94
--- /dev/null
+++ b/samples/bpf/cgroup_helpers.c
@@ -0,0 +1,177 @@
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/limits.h>
+#include <stdio.h>
+#include <linux/sched.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <ftw.h>
+
+
+#include "cgroup_helpers.h"
+
+/*
+ * To avoid relying on the system setup, when setup_cgroup_env is called
+ * we create a new mount namespace, and cgroup namespace. The cgroup2
+ * root is mounted at CGROUP_MOUNT_PATH
+ *
+ * Unfortunately, most people don't have cgroupv2 enabled at this point in time.
+ * It's easier to create our own mount namespace and manage it ourselves.
+ *
+ * We assume /mnt exists.
+ */
+
+#define WALK_FD_LIMIT 16
+#define CGROUP_MOUNT_PATH "/mnt"
+#define CGROUP_WORK_DIR "/cgroup-test-work-dir"
+#define format_cgroup_path(buf, path) \
+ snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
+ CGROUP_WORK_DIR, path)
+
+/**
+ * setup_cgroup_environment() - Setup the cgroup environment
+ *
+ * After calling this function, cleanup_cgroup_environment should be called
+ * once testing is complete.
+ *
+ * This function will print an error to stderr and return 1 if it is unable
+ * to setup the cgroup environment. If setup is successful, 0 is returned.
+ */
+int setup_cgroup_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_workdir, "");
+
+ if (unshare(CLONE_NEWNS)) {
+ log_err("unshare");
+ return 1;
+ }
+
+ if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
+ log_err("mount fakeroot");
+ return 1;
+ }
+
+ if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) {
+ log_err("mount cgroup2");
+ return 1;
+ }
+
+ /* Cleanup existing failed runs, now that the environment is setup */
+ cleanup_cgroup_environment();
+
+ if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
+ log_err("mkdir cgroup work dir");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int nftwfunc(const char *filename, const struct stat *statptr,
+ int fileflags, struct FTW *pfwt)
+{
+ if ((fileflags & FTW_D) && rmdir(filename))
+ log_err("Removing cgroup: %s", filename);
+ return 0;
+}
+
+
+static int join_cgroup_from_top(char *cgroup_path)
+{
+ char cgroup_procs_path[PATH_MAX + 1];
+ pid_t pid = getpid();
+ int fd, rc = 0;
+
+ snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
+ "%s/cgroup.procs", cgroup_path);
+
+ fd = open(cgroup_procs_path, O_WRONLY);
+ if (fd < 0) {
+ log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
+ return 1;
+ }
+
+ if (dprintf(fd, "%d\n", pid) < 0) {
+ log_err("Joining Cgroup");
+ rc = 1;
+ }
+
+ close(fd);
+ return rc;
+}
+
+/**
+ * join_cgroup() - Join a cgroup
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir, and it joins it. For example, passing "/my-cgroup" as the path
+ * would actually put the calling process into the cgroup
+ * "/cgroup-test-work-dir/my-cgroup"
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_cgroup(char *path)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_path, path);
+ return join_cgroup_from_top(cgroup_path);
+}
+
+/**
+ * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
+ *
+ * This is an idempotent function to delete all temporary cgroups that
+ * have been created during the test, including the cgroup testing work
+ * directory.
+ *
+ * At call time, it moves the calling process to the root cgroup, and then
+ * runs the deletion process. It is idempotent, and should not fail, unless
+ * a process is lingering.
+ *
+ * On failure, it will print an error to stderr, and try to continue.
+ */
+void cleanup_cgroup_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_workdir, "");
+ join_cgroup_from_top(CGROUP_MOUNT_PATH);
+ nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
+
+/**
+ * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * This function creates a cgroup under the top level workdir and returns the
+ * file descriptor. It is idempotent.
+ *
+ * On success, it returns the file descriptor. On failure it returns 0.
+ * If there is a failure, it prints the error to stderr.
+ */
+int create_and_get_cgroup(char *path)
+{
+ char cgroup_path[PATH_MAX + 1];
+ int fd;
+
+ format_cgroup_path(cgroup_path, path);
+ if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
+ log_err("mkdiring cgroup");
+ return 0;
+ }
+
+ fd = open(cgroup_path, O_RDONLY);
+ if (fd < 0) {
+ log_err("Opening Cgroup");
+ return 0;
+ }
+
+ return fd;
+}
diff --git a/samples/bpf/cgroup_helpers.h b/samples/bpf/cgroup_helpers.h
new file mode 100644
index 0000000..78c5520
--- /dev/null
+++ b/samples/bpf/cgroup_helpers.h
@@ -0,0 +1,16 @@
+#ifndef __CGROUP_HELPERS_H
+#define __CGROUP_HELPERS_H
+#include <errno.h>
+#include <string.h>
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+
+
+int create_and_get_cgroup(char *path);
+int join_cgroup(char *path);
+int setup_cgroup_environment(void);
+void cleanup_cgroup_environment(void);
+
+#endif
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
index 9969e35..9ce707b 100644
--- a/samples/bpf/libbpf.c
+++ b/samples/bpf/libbpf.c
@@ -104,6 +104,27 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
}
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr = {
+ .target_fd = target_fd,
+ .attach_bpf_fd = prog_fd,
+ .attach_type = type,
+ };
+
+ return syscall(__NR_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr = {
+ .target_fd = target_fd,
+ .attach_type = type,
+ };
+
+ return syscall(__NR_bpf, BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr = {
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index ac6edb6..94a901d 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -15,10 +15,13 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int insn_len,
const char *license, int kern_version);
+int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type);
+int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
+
int bpf_obj_pin(int fd, const char *pathname);
int bpf_obj_get(const char *pathname);
-#define LOG_BUF_SIZE 65536
+#define LOG_BUF_SIZE (256 * 1024)
extern char bpf_log_buf[LOG_BUF_SIZE];
/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh
new file mode 100644
index 0000000..7d56774
--- /dev/null
+++ b/samples/bpf/lwt_len_hist.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+NS1=lwt_ns1
+VETH0=tst_lwt1a
+VETH1=tst_lwt1b
+
+TRACE_ROOT=/sys/kernel/debug/tracing
+
+function cleanup {
+ ip route del 192.168.253.2/32 dev $VETH0 2> /dev/null
+ ip link del $VETH0 2> /dev/null
+ ip link del $VETH1 2> /dev/null
+ ip netns exec $NS1 killall netserver
+ ip netns delete $NS1 2> /dev/null
+}
+
+cleanup
+
+ip netns add $NS1
+ip link add $VETH0 type veth peer name $VETH1
+ip link set dev $VETH0 up
+ip addr add 192.168.253.1/24 dev $VETH0
+ip link set $VETH1 netns $NS1
+ip netns exec $NS1 ip link set dev $VETH1 up
+ip netns exec $NS1 ip addr add 192.168.253.2/24 dev $VETH1
+ip netns exec $NS1 netserver
+
+echo 1 > ${TRACE_ROOT}/tracing_on
+cp /dev/null ${TRACE_ROOT}/trace
+ip route add 192.168.253.2/32 encap bpf out obj lwt_len_hist_kern.o section len_hist dev $VETH0
+netperf -H 192.168.253.2 -t TCP_STREAM
+cat ${TRACE_ROOT}/trace | grep -v '^#'
+./lwt_len_hist
+cleanup
+echo 0 > ${TRACE_ROOT}/tracing_on
+
+exit 0
diff --git a/samples/bpf/lwt_len_hist_kern.c b/samples/bpf/lwt_len_hist_kern.c
new file mode 100644
index 0000000..df75383
--- /dev/null
+++ b/samples/bpf/lwt_len_hist_kern.c
@@ -0,0 +1,82 @@
+/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/in.h>
+#include "bpf_helpers.h"
+
+# define printk(fmt, ...) \
+ ({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+ })
+
+struct bpf_elf_map {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+ __u32 flags;
+ __u32 id;
+ __u32 pinning;
+};
+
+struct bpf_elf_map SEC("maps") lwt_len_hist_map = {
+ .type = BPF_MAP_TYPE_PERCPU_HASH,
+ .size_key = sizeof(__u64),
+ .size_value = sizeof(__u64),
+ .pinning = 2,
+ .max_elem = 1024,
+};
+
+static unsigned int log2(unsigned int v)
+{
+ unsigned int r;
+ unsigned int shift;
+
+ r = (v > 0xFFFF) << 4; v >>= r;
+ shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
+ shift = (v > 0xF) << 2; v >>= shift; r |= shift;
+ shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+ r |= (v >> 1);
+ return r;
+}
+
+static unsigned int log2l(unsigned long v)
+{
+ unsigned int hi = v >> 32;
+ if (hi)
+ return log2(hi) + 32;
+ else
+ return log2(v);
+}
+
+SEC("len_hist")
+int do_len_hist(struct __sk_buff *skb)
+{
+ __u64 *value, key, init_val = 1;
+
+ key = log2l(skb->len);
+
+ value = bpf_map_lookup_elem(&lwt_len_hist_map, &key);
+ if (value)
+ __sync_fetch_and_add(value, 1);
+ else
+ bpf_map_update_elem(&lwt_len_hist_map, &key, &init_val, BPF_ANY);
+
+ return BPF_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/lwt_len_hist_user.c b/samples/bpf/lwt_len_hist_user.c
new file mode 100644
index 0000000..05d783f
--- /dev/null
+++ b/samples/bpf/lwt_len_hist_user.c
@@ -0,0 +1,76 @@
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <arpa/inet.h>
+
+#include "libbpf.h"
+#include "bpf_util.h"
+
+#define MAX_INDEX 64
+#define MAX_STARS 38
+
+static void stars(char *str, long val, long max, int width)
+{
+ int i;
+
+ for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
+ str[i] = '*';
+ if (val > max)
+ str[i - 1] = '+';
+ str[i] = '\0';
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ const char *map_filename = "/sys/fs/bpf/tc/globals/lwt_len_hist_map";
+ uint64_t values[nr_cpus], sum, max_value = 0, data[MAX_INDEX] = {};
+ uint64_t key = 0, next_key, max_key = 0;
+ char starstr[MAX_STARS];
+ int i, map_fd;
+
+ map_fd = bpf_obj_get(map_filename);
+ if (map_fd < 0) {
+ fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
+ map_filename, strerror(errno), errno);
+ return -1;
+ }
+
+ while (bpf_get_next_key(map_fd, &key, &next_key) == 0) {
+ if (next_key >= MAX_INDEX) {
+ fprintf(stderr, "Key %lu out of bounds\n", next_key);
+ continue;
+ }
+
+ bpf_lookup_elem(map_fd, &next_key, values);
+
+ sum = 0;
+ for (i = 0; i < nr_cpus; i++)
+ sum += values[i];
+
+ data[next_key] = sum;
+ if (sum && next_key > max_key)
+ max_key = next_key;
+
+ if (sum > max_value)
+ max_value = sum;
+
+ key = next_key;
+ }
+
+ for (i = 1; i <= max_key + 1; i++) {
+ stars(starstr, data[i - 1], max_value, MAX_STARS);
+ printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
+ (1l << i) >> 1, (1l << i) - 1, data[i - 1],
+ MAX_STARS, starstr);
+ }
+
+ close(map_fd);
+
+ return 0;
+}
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index 311538e..7ee1574 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -19,6 +19,21 @@ struct bpf_map_def SEC("maps") hash_map = {
.max_entries = MAX_ENTRIES,
};
+struct bpf_map_def SEC("maps") lru_hash_map = {
+ .type = BPF_MAP_TYPE_LRU_HASH,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") percpu_lru_hash_map = {
+ .type = BPF_MAP_TYPE_LRU_HASH,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = 10000,
+ .map_flags = BPF_F_NO_COMMON_LRU,
+};
+
struct bpf_map_def SEC("maps") percpu_hash_map = {
.type = BPF_MAP_TYPE_PERCPU_HASH,
.key_size = sizeof(u32),
@@ -53,6 +68,7 @@ int stress_hmap(struct pt_regs *ctx)
value = bpf_map_lookup_elem(&hash_map, &key);
if (value)
bpf_map_delete_elem(&hash_map, &key);
+
return 0;
}
@@ -96,5 +112,28 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx)
bpf_map_delete_elem(&percpu_hash_map_alloc, &key);
return 0;
}
+
+SEC("kprobe/sys_getpid")
+int stress_lru_hmap_alloc(struct pt_regs *ctx)
+{
+ u32 key = bpf_get_prandom_u32();
+ long val = 1;
+
+ bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
+
+ return 0;
+}
+
+SEC("kprobe/sys_getppid")
+int stress_percpu_lru_hmap_alloc(struct pt_regs *ctx)
+{
+ u32 key = bpf_get_prandom_u32();
+ long val = 1;
+
+ bpf_map_update_elem(&percpu_lru_hash_map, &key, &val, BPF_ANY);
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 3147377..9505b4d 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -35,6 +35,8 @@ static __u64 time_get_ns(void)
#define PERCPU_HASH_PREALLOC (1 << 1)
#define HASH_KMALLOC (1 << 2)
#define PERCPU_HASH_KMALLOC (1 << 3)
+#define LRU_HASH_PREALLOC (1 << 4)
+#define PERCPU_LRU_HASH_PREALLOC (1 << 5)
static int test_flags = ~0;
@@ -50,6 +52,30 @@ static void test_hash_prealloc(int cpu)
cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
}
+static void test_lru_hash_prealloc(int cpu)
+{
+ __u64 start_time;
+ int i;
+
+ start_time = time_get_ns();
+ for (i = 0; i < MAX_CNT; i++)
+ syscall(__NR_getpid);
+ printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n",
+ cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
+static void test_percpu_lru_hash_prealloc(int cpu)
+{
+ __u64 start_time;
+ int i;
+
+ start_time = time_get_ns();
+ for (i = 0; i < MAX_CNT; i++)
+ syscall(__NR_getppid);
+ printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n",
+ cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
static void test_percpu_hash_prealloc(int cpu)
{
__u64 start_time;
@@ -105,6 +131,12 @@ static void loop(int cpu)
if (test_flags & PERCPU_HASH_KMALLOC)
test_percpu_hash_kmalloc(cpu);
+
+ if (test_flags & LRU_HASH_PREALLOC)
+ test_lru_hash_prealloc(cpu);
+
+ if (test_flags & PERCPU_LRU_HASH_PREALLOC)
+ test_percpu_lru_hash_prealloc(cpu);
}
static void run_perf_test(int tasks)
diff --git a/samples/bpf/parse_ldabs.c b/samples/bpf/parse_ldabs.c
index d175501..6db6b21 100644
--- a/samples/bpf/parse_ldabs.c
+++ b/samples/bpf/parse_ldabs.c
@@ -4,6 +4,7 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
+#define KBUILD_MODNAME "foo"
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/in.h>
diff --git a/samples/bpf/parse_simple.c b/samples/bpf/parse_simple.c
index cf2511c..10af53d 100644
--- a/samples/bpf/parse_simple.c
+++ b/samples/bpf/parse_simple.c
@@ -4,6 +4,7 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
+#define KBUILD_MODNAME "foo"
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/in.h>
diff --git a/samples/bpf/parse_varlen.c b/samples/bpf/parse_varlen.c
index edab34d..95c1632 100644
--- a/samples/bpf/parse_varlen.c
+++ b/samples/bpf/parse_varlen.c
@@ -4,6 +4,7 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
+#define KBUILD_MODNAME "foo"
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c
index 774a681..ceabf31 100644
--- a/samples/bpf/sampleip_kern.c
+++ b/samples/bpf/sampleip_kern.c
@@ -25,7 +25,7 @@ int do_sample(struct bpf_perf_event_data *ctx)
u64 ip;
u32 *value, init_val = 1;
- ip = ctx->regs.ip;
+ ip = PT_REGS_IP(&ctx->regs);
value = bpf_map_lookup_elem(&ip_map, &ip);
if (value)
*value += 1;
diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c
new file mode 100644
index 0000000..533dd11a6
--- /dev/null
+++ b/samples/bpf/sock_flags_kern.c
@@ -0,0 +1,44 @@
+#include <uapi/linux/bpf.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/in6.h>
+#include "bpf_helpers.h"
+
+SEC("cgroup/sock1")
+int bpf_prog1(struct bpf_sock *sk)
+{
+ char fmt[] = "socket: family %d type %d protocol %d\n";
+
+ bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
+
+ /* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets
+ * ie., make ping6 fail
+ */
+ if (sk->family == PF_INET6 &&
+ sk->type == SOCK_RAW &&
+ sk->protocol == IPPROTO_ICMPV6)
+ return 0;
+
+ return 1;
+}
+
+SEC("cgroup/sock2")
+int bpf_prog2(struct bpf_sock *sk)
+{
+ char fmt[] = "socket: family %d type %d protocol %d\n";
+
+ bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
+
+ /* block PF_INET, SOCK_RAW, IPPROTO_ICMP sockets
+ * ie., make ping fail
+ */
+ if (sk->family == PF_INET &&
+ sk->type == SOCK_RAW &&
+ sk->protocol == IPPROTO_ICMP)
+ return 0;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
index 44e5846..f58acfc 100644
--- a/samples/bpf/sockex2_kern.c
+++ b/samples/bpf/sockex2_kern.c
@@ -198,7 +198,7 @@ struct bpf_map_def SEC("maps") hash_map = {
SEC("socket2")
int bpf_prog2(struct __sk_buff *skb)
{
- struct bpf_flow_keys flow;
+ struct bpf_flow_keys flow = {};
struct pair *value;
u32 key;
diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh
new file mode 100755
index 0000000..80a05591
--- /dev/null
+++ b/samples/bpf/tc_l2_redirect.sh
@@ -0,0 +1,173 @@
+#!/bin/bash
+
+[[ -z $TC ]] && TC='tc'
+[[ -z $IP ]] && IP='ip'
+
+REDIRECT_USER='./tc_l2_redirect'
+REDIRECT_BPF='./tc_l2_redirect_kern.o'
+
+RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter)
+IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding)
+
+function config_common {
+ local tun_type=$1
+
+ $IP netns add ns1
+ $IP netns add ns2
+ $IP link add ve1 type veth peer name vens1
+ $IP link add ve2 type veth peer name vens2
+ $IP link set dev ve1 up
+ $IP link set dev ve2 up
+ $IP link set dev ve1 mtu 1500
+ $IP link set dev ve2 mtu 1500
+ $IP link set dev vens1 netns ns1
+ $IP link set dev vens2 netns ns2
+
+ $IP -n ns1 link set dev lo up
+ $IP -n ns1 link set dev vens1 up
+ $IP -n ns1 addr add 10.1.1.101/24 dev vens1
+ $IP -n ns1 addr add 2401:db01::65/64 dev vens1 nodad
+ $IP -n ns1 route add default via 10.1.1.1 dev vens1
+ $IP -n ns1 route add default via 2401:db01::1 dev vens1
+
+ $IP -n ns2 link set dev lo up
+ $IP -n ns2 link set dev vens2 up
+ $IP -n ns2 addr add 10.2.1.102/24 dev vens2
+ $IP -n ns2 addr add 2401:db02::66/64 dev vens2 nodad
+ $IP -n ns2 addr add 10.10.1.102 dev lo
+ $IP -n ns2 addr add 2401:face::66/64 dev lo nodad
+ $IP -n ns2 link add ipt2 type ipip local 10.2.1.102 remote 10.2.1.1
+ $IP -n ns2 link add ip6t2 type ip6tnl mode any local 2401:db02::66 remote 2401:db02::1
+ $IP -n ns2 link set dev ipt2 up
+ $IP -n ns2 link set dev ip6t2 up
+ $IP netns exec ns2 $TC qdisc add dev vens2 clsact
+ $IP netns exec ns2 $TC filter add dev vens2 ingress bpf da obj $REDIRECT_BPF sec drop_non_tun_vip
+ if [[ $tun_type == "ipip" ]]; then
+ $IP -n ns2 route add 10.1.1.0/24 dev ipt2
+ $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ipt2.rp_filter=0
+ else
+ $IP -n ns2 route add 10.1.1.0/24 dev ip6t2
+ $IP -n ns2 route add 2401:db01::/64 dev ip6t2
+ $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ip6t2.rp_filter=0
+ fi
+
+ $IP addr add 10.1.1.1/24 dev ve1
+ $IP addr add 2401:db01::1/64 dev ve1 nodad
+ $IP addr add 10.2.1.1/24 dev ve2
+ $IP addr add 2401:db02::1/64 dev ve2 nodad
+
+ $TC qdisc add dev ve2 clsact
+ $TC filter add dev ve2 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_forward
+
+ sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+function cleanup {
+ set +e
+ [[ -z $DEBUG ]] || set +x
+ $IP netns delete ns1 >& /dev/null
+ $IP netns delete ns2 >& /dev/null
+ $IP link del ve1 >& /dev/null
+ $IP link del ve2 >& /dev/null
+ $IP link del ipt >& /dev/null
+ $IP link del ip6t >& /dev/null
+ sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER
+ sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING
+ rm -f /sys/fs/bpf/tc/globals/tun_iface
+ [[ -z $DEBUG ]] || set -x
+ set -e
+}
+
+function l2_to_ipip {
+ echo -n "l2_to_ipip $1: "
+
+ local dir=$1
+
+ config_common ipip
+
+ $IP link add ipt type ipip external
+ $IP link set dev ipt up
+ sysctl -q -w net.ipv4.conf.ipt.rp_filter=0
+ sysctl -q -w net.ipv4.conf.ipt.forwarding=1
+
+ if [[ $dir == "egress" ]]; then
+ $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2
+ $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect
+ sysctl -q -w net.ipv4.conf.ve1.forwarding=1
+ else
+ $TC qdisc add dev ve1 clsact
+ $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect
+ fi
+
+ $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ipt/ifindex)
+
+ $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null
+
+ if [[ $dir == "egress" ]]; then
+ # test direct egress to ve2 (i.e. not forwarding from
+ # ve1 to ve2).
+ ping -c1 10.10.1.102 >& /dev/null
+ fi
+
+ cleanup
+
+ echo "OK"
+}
+
+function l2_to_ip6tnl {
+ echo -n "l2_to_ip6tnl $1: "
+
+ local dir=$1
+
+ config_common ip6tnl
+
+ $IP link add ip6t type ip6tnl mode any external
+ $IP link set dev ip6t up
+ sysctl -q -w net.ipv4.conf.ip6t.rp_filter=0
+ sysctl -q -w net.ipv4.conf.ip6t.forwarding=1
+
+ if [[ $dir == "egress" ]]; then
+ $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2
+ $IP route add 2401:face::/64 via 2401:db02::66 dev ve2
+ $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect
+ sysctl -q -w net.ipv4.conf.ve1.forwarding=1
+ else
+ $TC qdisc add dev ve1 clsact
+ $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect
+ fi
+
+ $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ip6t/ifindex)
+
+ $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null
+ $IP netns exec ns1 ping -6 -c1 2401:face::66 >& /dev/null
+
+ if [[ $dir == "egress" ]]; then
+ # test direct egress to ve2 (i.e. not forwarding from
+ # ve1 to ve2).
+ ping -c1 10.10.1.102 >& /dev/null
+ ping -6 -c1 2401:face::66 >& /dev/null
+ fi
+
+ cleanup
+
+ echo "OK"
+}
+
+cleanup
+test_names="l2_to_ipip l2_to_ip6tnl"
+test_dirs="ingress egress"
+if [[ $# -ge 2 ]]; then
+ test_names=$1
+ test_dirs=$2
+elif [[ $# -ge 1 ]]; then
+ test_names=$1
+fi
+
+for t in $test_names; do
+ for d in $test_dirs; do
+ $t $d
+ done
+done
diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c
new file mode 100644
index 0000000..92a4472
--- /dev/null
+++ b/samples/bpf/tc_l2_redirect_kern.c
@@ -0,0 +1,236 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/filter.h>
+#include <uapi/linux/pkt_cls.h>
+#include <net/ipv6.h>
+#include "bpf_helpers.h"
+
+#define _htonl __builtin_bswap32
+
+#define PIN_GLOBAL_NS 2
+struct bpf_elf_map {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+ __u32 flags;
+ __u32 id;
+ __u32 pinning;
+};
+
+/* copy of 'struct ethhdr' without __packed */
+struct eth_hdr {
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
+ unsigned short h_proto;
+};
+
+struct bpf_elf_map SEC("maps") tun_iface = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .size_key = sizeof(int),
+ .size_value = sizeof(int),
+ .pinning = PIN_GLOBAL_NS,
+ .max_elem = 1,
+};
+
+static __always_inline bool is_vip_addr(__be16 eth_proto, __be32 daddr)
+{
+ if (eth_proto == htons(ETH_P_IP))
+ return (_htonl(0xffffff00) & daddr) == _htonl(0x0a0a0100);
+ else if (eth_proto == htons(ETH_P_IPV6))
+ return (daddr == _htonl(0x2401face));
+
+ return false;
+}
+
+SEC("l2_to_iptun_ingress_forward")
+int _l2_to_iptun_ingress_forward(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key tkey = {};
+ void *data = (void *)(long)skb->data;
+ struct eth_hdr *eth = data;
+ void *data_end = (void *)(long)skb->data_end;
+ int key = 0, *ifindex;
+
+ int ret;
+
+ if (data + sizeof(*eth) > data_end)
+ return TC_ACT_OK;
+
+ ifindex = bpf_map_lookup_elem(&tun_iface, &key);
+ if (!ifindex)
+ return TC_ACT_OK;
+
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ char fmt4[] = "ingress forward to ifindex:%d daddr4:%x\n";
+ struct iphdr *iph = data + sizeof(*eth);
+
+ if (data + sizeof(*eth) + sizeof(*iph) > data_end)
+ return TC_ACT_OK;
+
+ if (iph->protocol != IPPROTO_IPIP)
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt4, sizeof(fmt4), *ifindex,
+ _htonl(iph->daddr));
+ return bpf_redirect(*ifindex, BPF_F_INGRESS);
+ } else if (eth->h_proto == htons(ETH_P_IPV6)) {
+ char fmt6[] = "ingress forward to ifindex:%d daddr6:%x::%x\n";
+ struct ipv6hdr *ip6h = data + sizeof(*eth);
+
+ if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+ return TC_ACT_OK;
+
+ if (ip6h->nexthdr != IPPROTO_IPIP &&
+ ip6h->nexthdr != IPPROTO_IPV6)
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt6, sizeof(fmt6), *ifindex,
+ _htonl(ip6h->daddr.s6_addr32[0]),
+ _htonl(ip6h->daddr.s6_addr32[3]));
+ return bpf_redirect(*ifindex, BPF_F_INGRESS);
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("l2_to_iptun_ingress_redirect")
+int _l2_to_iptun_ingress_redirect(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key tkey = {};
+ void *data = (void *)(long)skb->data;
+ struct eth_hdr *eth = data;
+ void *data_end = (void *)(long)skb->data_end;
+ int key = 0, *ifindex;
+
+ int ret;
+
+ if (data + sizeof(*eth) > data_end)
+ return TC_ACT_OK;
+
+ ifindex = bpf_map_lookup_elem(&tun_iface, &key);
+ if (!ifindex)
+ return TC_ACT_OK;
+
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n";
+ struct iphdr *iph = data + sizeof(*eth);
+ __be32 daddr = iph->daddr;
+
+ if (data + sizeof(*eth) + sizeof(*iph) > data_end)
+ return TC_ACT_OK;
+
+ if (!is_vip_addr(eth->h_proto, daddr))
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(daddr), *ifindex);
+ } else {
+ return TC_ACT_OK;
+ }
+
+ tkey.tunnel_id = 10000;
+ tkey.tunnel_ttl = 64;
+ tkey.remote_ipv4 = 0x0a020166; /* 10.2.1.102 */
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), 0);
+ return bpf_redirect(*ifindex, 0);
+}
+
+SEC("l2_to_ip6tun_ingress_redirect")
+int _l2_to_ip6tun_ingress_redirect(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key tkey = {};
+ void *data = (void *)(long)skb->data;
+ struct eth_hdr *eth = data;
+ void *data_end = (void *)(long)skb->data_end;
+ int key = 0, *ifindex;
+
+ if (data + sizeof(*eth) > data_end)
+ return TC_ACT_OK;
+
+ ifindex = bpf_map_lookup_elem(&tun_iface, &key);
+ if (!ifindex)
+ return TC_ACT_OK;
+
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n";
+ struct iphdr *iph = data + sizeof(*eth);
+
+ if (data + sizeof(*eth) + sizeof(*iph) > data_end)
+ return TC_ACT_OK;
+
+ if (!is_vip_addr(eth->h_proto, iph->daddr))
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(iph->daddr),
+ *ifindex);
+ } else if (eth->h_proto == htons(ETH_P_IPV6)) {
+ char fmt6[] = "e/ingress redirect daddr6:%x to ifindex:%d\n";
+ struct ipv6hdr *ip6h = data + sizeof(*eth);
+
+ if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+ return TC_ACT_OK;
+
+ if (!is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0]))
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt6, sizeof(fmt6),
+ _htonl(ip6h->daddr.s6_addr32[0]), *ifindex);
+ } else {
+ return TC_ACT_OK;
+ }
+
+ tkey.tunnel_id = 10000;
+ tkey.tunnel_ttl = 64;
+ /* 2401:db02:0:0:0:0:0:66 */
+ tkey.remote_ipv6[0] = _htonl(0x2401db02);
+ tkey.remote_ipv6[1] = 0;
+ tkey.remote_ipv6[2] = 0;
+ tkey.remote_ipv6[3] = _htonl(0x00000066);
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), BPF_F_TUNINFO_IPV6);
+ return bpf_redirect(*ifindex, 0);
+}
+
+SEC("drop_non_tun_vip")
+int _drop_non_tun_vip(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key tkey = {};
+ void *data = (void *)(long)skb->data;
+ struct eth_hdr *eth = data;
+ void *data_end = (void *)(long)skb->data_end;
+
+ if (data + sizeof(*eth) > data_end)
+ return TC_ACT_OK;
+
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+
+ if (data + sizeof(*eth) + sizeof(*iph) > data_end)
+ return TC_ACT_OK;
+
+ if (is_vip_addr(eth->h_proto, iph->daddr))
+ return TC_ACT_SHOT;
+ } else if (eth->h_proto == htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ip6h = data + sizeof(*eth);
+
+ if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+ return TC_ACT_OK;
+
+ if (is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0]))
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c
new file mode 100644
index 0000000..4013c53
--- /dev/null
+++ b/samples/bpf/tc_l2_redirect_user.c
@@ -0,0 +1,73 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+
+#include "libbpf.h"
+
+static void usage(void)
+{
+ printf("Usage: tc_l2_ipip_redirect [...]\n");
+ printf(" -U <file> Update an already pinned BPF array\n");
+ printf(" -i <ifindex> Interface index\n");
+ printf(" -h Display this help\n");
+}
+
+int main(int argc, char **argv)
+{
+ const char *pinned_file = NULL;
+ int ifindex = -1;
+ int array_key = 0;
+ int array_fd = -1;
+ int ret = -1;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "F:U:i:")) != -1) {
+ switch (opt) {
+ /* General args */
+ case 'U':
+ pinned_file = optarg;
+ break;
+ case 'i':
+ ifindex = atoi(optarg);
+ break;
+ default:
+ usage();
+ goto out;
+ }
+ }
+
+ if (ifindex < 0 || !pinned_file) {
+ usage();
+ goto out;
+ }
+
+ array_fd = bpf_obj_get(pinned_file);
+ if (array_fd < 0) {
+ fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
+ pinned_file, strerror(errno), errno);
+ goto out;
+ }
+
+ /* bpf_tunnel_key.remote_ipv4 expects host byte orders */
+ ret = bpf_update_elem(array_fd, &array_key, &ifindex, 0);
+ if (ret) {
+ perror("bpf_update_elem");
+ goto out;
+ }
+
+out:
+ if (array_fd != -1)
+ close(array_fd);
+ return ret;
+}
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
index fa051b3..274c884 100644
--- a/samples/bpf/tcbpf1_kern.c
+++ b/samples/bpf/tcbpf1_kern.c
@@ -1,3 +1,4 @@
+#define KBUILD_MODNAME "foo"
#include <uapi/linux/bpf.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/if_packet.h>
diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index 3303bb8..9c823a6 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -5,6 +5,7 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
+#define KBUILD_MODNAME "foo"
#include <uapi/linux/bpf.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/if_packet.h>
diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c
new file mode 100644
index 0000000..a19484c
--- /dev/null
+++ b/samples/bpf/test_cgrp2_attach.c
@@ -0,0 +1,167 @@
+/* eBPF example program:
+ *
+ * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
+ *
+ * - Loads eBPF program
+ *
+ * The eBPF program accesses the map passed in to store two pieces of
+ * information. The number of invocations of the program, which maps
+ * to the number of packets received, is stored to key 0. Key 1 is
+ * incremented on each iteration by the number of bytes stored in
+ * the skb.
+ *
+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
+ *
+ * - Every second, reads map[0] and map[1] to see how many bytes and
+ * packets were seen on any socket of tasks in the given cgroup.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+
+enum {
+ MAP_KEY_PACKETS,
+ MAP_KEY_BYTES,
+};
+
+static int prog_load(int map_fd, int verdict)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), /* save r6 so it's not clobbered by BPF_CALL */
+
+ /* Count packets */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd), /* load map fd to r1 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ /* Count bytes */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, sizeof(prog), "GPL", 0);
+}
+
+static int usage(const char *argv0)
+{
+ printf("Usage: %s [-d] [-D] <cg-path> <egress|ingress>\n", argv0);
+ printf(" -d Drop Traffic\n");
+ printf(" -D Detach filter, and exit\n");
+ return EXIT_FAILURE;
+}
+
+static int attach_filter(int cg_fd, int type, int verdict)
+{
+ int prog_fd, map_fd, ret, key;
+ long long pkt_cnt, byte_cnt;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY,
+ sizeof(key), sizeof(byte_cnt),
+ 256, 0);
+ if (map_fd < 0) {
+ printf("Failed to create map: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ prog_fd = prog_load(map_fd, verdict);
+ printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+
+ if (prog_fd < 0) {
+ printf("Failed to load prog: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ ret = bpf_prog_attach(prog_fd, cg_fd, type);
+ if (ret < 0) {
+ printf("Failed to attach prog to cgroup: '%s'\n",
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ while (1) {
+ key = MAP_KEY_PACKETS;
+ assert(bpf_lookup_elem(map_fd, &key, &pkt_cnt) == 0);
+
+ key = MAP_KEY_BYTES;
+ assert(bpf_lookup_elem(map_fd, &key, &byte_cnt) == 0);
+
+ printf("cgroup received %lld packets, %lld bytes\n",
+ pkt_cnt, byte_cnt);
+ sleep(1);
+ }
+
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+ int detach_only = 0, verdict = 1;
+ enum bpf_attach_type type;
+ int opt, cg_fd, ret;
+
+ while ((opt = getopt(argc, argv, "Dd")) != -1) {
+ switch (opt) {
+ case 'd':
+ verdict = 0;
+ break;
+ case 'D':
+ detach_only = 1;
+ break;
+ default:
+ return usage(argv[0]);
+ }
+ }
+
+ if (argc - optind < 2)
+ return usage(argv[0]);
+
+ if (strcmp(argv[optind + 1], "ingress") == 0)
+ type = BPF_CGROUP_INET_INGRESS;
+ else if (strcmp(argv[optind + 1], "egress") == 0)
+ type = BPF_CGROUP_INET_EGRESS;
+ else
+ return usage(argv[0]);
+
+ cg_fd = open(argv[optind], O_DIRECTORY | O_RDONLY);
+ if (cg_fd < 0) {
+ printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ if (detach_only) {
+ ret = bpf_prog_detach(cg_fd, type);
+ printf("bpf_prog_detach() returned '%s' (%d)\n",
+ strerror(errno), errno);
+ } else
+ ret = attach_filter(cg_fd, type, verdict);
+
+ return ret;
+}
diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c
new file mode 100644
index 0000000..ddfac42
--- /dev/null
+++ b/samples/bpf/test_cgrp2_attach2.c
@@ -0,0 +1,132 @@
+/* eBPF example program:
+ *
+ * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
+ *
+ * - Loads eBPF program
+ *
+ * The eBPF program accesses the map passed in to store two pieces of
+ * information. The number of invocations of the program, which maps
+ * to the number of packets received, is stored to key 0. Key 1 is
+ * incremented on each iteration by the number of bytes stored in
+ * the skb.
+ *
+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
+ *
+ * - Every second, reads map[0] and map[1] to see how many bytes and
+ * packets were seen on any socket of tasks in the given cgroup.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+#include "cgroup_helpers.h"
+
+#define FOO "/foo"
+#define BAR "/foo/bar/"
+#define PING_CMD "ping -c1 -w1 127.0.0.1"
+
+static int prog_load(int verdict)
+{
+ int ret;
+ struct bpf_insn prog[] = {
+ BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+ BPF_EXIT_INSN(),
+ };
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, sizeof(prog), "GPL", 0);
+
+ if (ret < 0) {
+ log_err("Loading program");
+ printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
+ return 0;
+ }
+ return ret;
+}
+
+
+int main(int argc, char **argv)
+{
+ int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0;
+
+ allow_prog = prog_load(1);
+ if (!allow_prog)
+ goto err;
+
+ drop_prog = prog_load(0);
+ if (!drop_prog)
+ goto err;
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ /* Create cgroup /foo, get fd, and join it */
+ foo = create_and_get_cgroup(FOO);
+ if (!foo)
+ goto err;
+
+ if (join_cgroup(FOO))
+ goto err;
+
+ if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS)) {
+ log_err("Attaching prog to /foo");
+ goto err;
+ }
+
+ assert(system(PING_CMD) != 0);
+
+ /* Create cgroup /foo/bar, get fd, and join it */
+ bar = create_and_get_cgroup(BAR);
+ if (!bar)
+ goto err;
+
+ if (join_cgroup(BAR))
+ goto err;
+
+ assert(system(PING_CMD) != 0);
+
+ if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) {
+ log_err("Attaching prog to /foo/bar");
+ goto err;
+ }
+
+ assert(system(PING_CMD) == 0);
+
+
+ if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
+ log_err("Detaching program from /foo/bar");
+ goto err;
+ }
+
+ assert(system(PING_CMD) != 0);
+
+ if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) {
+ log_err("Attaching prog to /foo/bar");
+ goto err;
+ }
+
+ if (bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
+ log_err("Detaching program from /foo");
+ goto err;
+ }
+
+ assert(system(PING_CMD) == 0);
+
+ goto out;
+
+err:
+ rc = 1;
+
+out:
+ close(foo);
+ close(bar);
+ cleanup_cgroup_environment();
+ return rc;
+}
diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c
new file mode 100644
index 0000000..d467b3c
--- /dev/null
+++ b/samples/bpf/test_cgrp2_sock.c
@@ -0,0 +1,83 @@
+/* eBPF example program:
+ *
+ * - Loads eBPF program
+ *
+ * The eBPF program sets the sk_bound_dev_if index in new AF_INET{6}
+ * sockets opened by processes in the cgroup.
+ *
+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+
+static int prog_load(int idx)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_3, idx),
+ BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)),
+ BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog),
+ "GPL", 0);
+}
+
+static int usage(const char *argv0)
+{
+ printf("Usage: %s cg-path device-index\n", argv0);
+ return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+ int cg_fd, prog_fd, ret;
+ unsigned int idx;
+
+ if (argc < 2)
+ return usage(argv[0]);
+
+ idx = if_nametoindex(argv[2]);
+ if (!idx) {
+ printf("Invalid device name\n");
+ return EXIT_FAILURE;
+ }
+
+ cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
+ if (cg_fd < 0) {
+ printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ prog_fd = prog_load(idx);
+ printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+
+ if (prog_fd < 0) {
+ printf("Failed to load prog: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE);
+ if (ret < 0) {
+ printf("Failed to attach prog to cgroup: '%s'\n",
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
new file mode 100755
index 0000000..925fd46
--- /dev/null
+++ b/samples/bpf/test_cgrp2_sock.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+function config_device {
+ ip netns add at_ns0
+ ip link add veth0 type veth peer name veth0b
+ ip link set veth0b up
+ ip link set veth0 netns at_ns0
+ ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+ ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip link add foo type vrf table 1234
+ ip link set foo up
+ ip addr add 172.16.1.101/24 dev veth0b
+ ip addr add 2401:db00::2/64 dev veth0b nodad
+ ip link set veth0b master foo
+}
+
+function attach_bpf {
+ rm -rf /tmp/cgroupv2
+ mkdir -p /tmp/cgroupv2
+ mount -t cgroup2 none /tmp/cgroupv2
+ mkdir -p /tmp/cgroupv2/foo
+ test_cgrp2_sock /tmp/cgroupv2/foo foo
+ echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
+}
+
+function cleanup {
+ set +ex
+ ip netns delete at_ns0
+ ip link del veth0
+ ip link del foo
+ umount /tmp/cgroupv2
+ rm -rf /tmp/cgroupv2
+ set -ex
+}
+
+function do_test {
+ ping -c1 -w1 172.16.1.100
+ ping6 -c1 -w1 2401:db00::1
+}
+
+cleanup 2>/dev/null
+config_device
+attach_bpf
+do_test
+cleanup
+echo "*** PASS ***"
diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c
new file mode 100644
index 0000000..455ef0d
--- /dev/null
+++ b/samples/bpf/test_cgrp2_sock2.c
@@ -0,0 +1,66 @@
+/* eBPF example program:
+ *
+ * - Loads eBPF program
+ *
+ * The eBPF program loads a filter from file and attaches the
+ * program to a cgroup using BPF_PROG_ATTACH
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+static int usage(const char *argv0)
+{
+ printf("Usage: %s cg-path filter-path [filter-id]\n", argv0);
+ return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+ int cg_fd, ret, filter_id = 0;
+
+ if (argc < 3)
+ return usage(argv[0]);
+
+ cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
+ if (cg_fd < 0) {
+ printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ if (load_bpf_file(argv[2]))
+ return EXIT_FAILURE;
+
+ printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+
+ if (argc > 3)
+ filter_id = atoi(argv[3]);
+
+ if (filter_id > prog_cnt) {
+ printf("Invalid program id; program not found in file\n");
+ return EXIT_FAILURE;
+ }
+
+ ret = bpf_prog_attach(prog_fd[filter_id], cg_fd,
+ BPF_CGROUP_INET_SOCK_CREATE);
+ if (ret < 0) {
+ printf("Failed to attach prog to cgroup: '%s'\n",
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh
new file mode 100755
index 0000000..891f12a
--- /dev/null
+++ b/samples/bpf/test_cgrp2_sock2.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+function config_device {
+ ip netns add at_ns0
+ ip link add veth0 type veth peer name veth0b
+ ip link set veth0b up
+ ip link set veth0 netns at_ns0
+ ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+ ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add 172.16.1.101/24 dev veth0b
+ ip addr add 2401:db00::2/64 dev veth0b nodad
+}
+
+function config_cgroup {
+ rm -rf /tmp/cgroupv2
+ mkdir -p /tmp/cgroupv2
+ mount -t cgroup2 none /tmp/cgroupv2
+ mkdir -p /tmp/cgroupv2/foo
+ echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
+}
+
+
+function attach_bpf {
+ test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1
+ [ $? -ne 0 ] && exit 1
+}
+
+function cleanup {
+ ip link del veth0b
+ ip netns delete at_ns0
+ umount /tmp/cgroupv2
+ rm -rf /tmp/cgroupv2
+}
+
+cleanup 2>/dev/null
+
+set -e
+config_device
+config_cgroup
+set +e
+
+#
+# Test 1 - fail ping6
+#
+attach_bpf 0
+ping -c1 -w1 172.16.1.100
+if [ $? -ne 0 ]; then
+ echo "ping failed when it should succeed"
+ cleanup
+ exit 1
+fi
+
+ping6 -c1 -w1 2401:db00::1
+if [ $? -eq 0 ]; then
+ echo "ping6 succeeded when it should not"
+ cleanup
+ exit 1
+fi
+
+#
+# Test 2 - fail ping
+#
+attach_bpf 1
+ping6 -c1 -w1 2401:db00::1
+if [ $? -ne 0 ]; then
+ echo "ping6 failed when it should succeed"
+ cleanup
+ exit 1
+fi
+
+ping -c1 -w1 172.16.1.100
+if [ $? -eq 0 ]; then
+ echo "ping succeeded when it should not"
+ cleanup
+ exit 1
+fi
+
+cleanup
+echo
+echo "*** PASS ***"
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c
index 10ff734..1547b36 100644
--- a/samples/bpf/test_cgrp2_tc_kern.c
+++ b/samples/bpf/test_cgrp2_tc_kern.c
@@ -4,6 +4,7 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
+#define KBUILD_MODNAME "foo"
#include <uapi/linux/if_ether.h>
#include <uapi/linux/in6.h>
#include <uapi/linux/ipv6.h>
diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
index 30b0bce..95aaaa8 100644
--- a/samples/bpf/test_current_task_under_cgroup_user.c
+++ b/samples/bpf/test_current_task_under_cgroup_user.c
@@ -11,50 +11,16 @@
#include <unistd.h>
#include "libbpf.h"
#include "bpf_load.h"
-#include <string.h>
-#include <fcntl.h>
-#include <errno.h>
#include <linux/bpf.h>
-#include <sched.h>
-#include <sys/mount.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <linux/limits.h>
+#include "cgroup_helpers.h"
-#define CGROUP_MOUNT_PATH "/mnt"
-#define CGROUP_PATH "/mnt/my-cgroup"
-
-#define clean_errno() (errno == 0 ? "None" : strerror(errno))
-#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
- __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
-
-static int join_cgroup(char *path)
-{
- int fd, rc = 0;
- pid_t pid = getpid();
- char cgroup_path[PATH_MAX + 1];
-
- snprintf(cgroup_path, sizeof(cgroup_path), "%s/cgroup.procs", path);
-
- fd = open(cgroup_path, O_WRONLY);
- if (fd < 0) {
- log_err("Opening Cgroup");
- return 1;
- }
-
- if (dprintf(fd, "%d\n", pid) < 0) {
- log_err("Joining Cgroup");
- rc = 1;
- }
- close(fd);
- return rc;
-}
+#define CGROUP_PATH "/my-cgroup"
int main(int argc, char **argv)
{
- char filename[256];
- int cg2, idx = 0;
pid_t remote_pid, local_pid = getpid();
+ int cg2, idx = 0, rc = 0;
+ char filename[256];
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (load_bpf_file(filename)) {
@@ -62,47 +28,22 @@ int main(int argc, char **argv)
return 1;
}
- /*
- * This is to avoid interfering with existing cgroups. Unfortunately,
- * most people don't have cgroupv2 enabled at this point in time.
- * It's easier to create our own mount namespace and manage it
- * ourselves.
- */
- if (unshare(CLONE_NEWNS)) {
- log_err("unshare");
- return 1;
- }
-
- if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
- log_err("mount fakeroot");
- return 1;
- }
-
- if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) {
- log_err("mount cgroup2");
- return 1;
- }
+ if (setup_cgroup_environment())
+ goto err;
- if (mkdir(CGROUP_PATH, 0777) && errno != EEXIST) {
- log_err("mkdir cgroup");
- return 1;
- }
+ cg2 = create_and_get_cgroup(CGROUP_PATH);
- cg2 = open(CGROUP_PATH, O_RDONLY);
- if (cg2 < 0) {
- log_err("opening target cgroup");
- goto cleanup_cgroup_err;
- }
+ if (!cg2)
+ goto err;
if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) {
log_err("Adding target cgroup to map");
- goto cleanup_cgroup_err;
- }
- if (join_cgroup("/mnt/my-cgroup")) {
- log_err("Leaving target cgroup");
- goto cleanup_cgroup_err;
+ goto err;
}
+ if (join_cgroup(CGROUP_PATH))
+ goto err;
+
/*
* The installed helper program catched the sync call, and should
* write it to the map.
@@ -115,12 +56,12 @@ int main(int argc, char **argv)
fprintf(stderr,
"BPF Helper didn't write correct PID to map, but: %d\n",
remote_pid);
- goto leave_cgroup_err;
+ goto err;
}
/* Verify the negative scenario; leave the cgroup */
- if (join_cgroup(CGROUP_MOUNT_PATH))
- goto leave_cgroup_err;
+ if (join_cgroup("/"))
+ goto err;
remote_pid = 0;
bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY);
@@ -130,16 +71,15 @@ int main(int argc, char **argv)
if (local_pid == remote_pid) {
fprintf(stderr, "BPF cgroup negative test did not work\n");
- goto cleanup_cgroup_err;
+ goto err;
}
- rmdir(CGROUP_PATH);
- return 0;
+ goto out;
+err:
+ rc = 1;
- /* Error condition, cleanup */
-leave_cgroup_err:
- join_cgroup(CGROUP_MOUNT_PATH);
-cleanup_cgroup_err:
- rmdir(CGROUP_PATH);
- return 1;
+out:
+ close(cg2);
+ cleanup_cgroup_environment();
+ return rc;
}
diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c
new file mode 100644
index 0000000..316230a
--- /dev/null
+++ b/samples/bpf/test_lru_dist.c
@@ -0,0 +1,541 @@
+/*
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include <linux/types.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <sched.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "libbpf.h"
+#include "bpf_util.h"
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+static int nr_cpus;
+static unsigned long long *dist_keys;
+static unsigned int dist_key_counts;
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+static inline int list_empty(const struct list_head *head)
+{
+ return head->next == head;
+}
+
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+static inline void __list_del(struct list_head *prev, struct list_head *next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+static inline void __list_del_entry(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+ __list_del_entry(list);
+ list_add(list, head);
+}
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+#define list_last_entry(ptr, type, member) \
+ list_entry((ptr)->prev, type, member)
+
+struct pfect_lru_node {
+ struct list_head list;
+ unsigned long long key;
+};
+
+struct pfect_lru {
+ struct list_head list;
+ struct pfect_lru_node *free_nodes;
+ unsigned int cur_size;
+ unsigned int lru_size;
+ unsigned int nr_unique;
+ unsigned int nr_misses;
+ unsigned int total;
+ int map_fd;
+};
+
+static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
+ unsigned int nr_possible_elems)
+{
+ lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
+ sizeof(unsigned long long),
+ sizeof(struct pfect_lru_node *),
+ nr_possible_elems, 0);
+ assert(lru->map_fd != -1);
+
+ lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
+ assert(lru->free_nodes);
+
+ INIT_LIST_HEAD(&lru->list);
+ lru->cur_size = 0;
+ lru->lru_size = lru_size;
+ lru->nr_unique = lru->nr_misses = lru->total = 0;
+}
+
+static void pfect_lru_destroy(struct pfect_lru *lru)
+{
+ close(lru->map_fd);
+ free(lru->free_nodes);
+}
+
+static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
+ unsigned long long key)
+{
+ struct pfect_lru_node *node = NULL;
+ int seen = 0;
+
+ lru->total++;
+ if (!bpf_lookup_elem(lru->map_fd, &key, &node)) {
+ if (node) {
+ list_move(&node->list, &lru->list);
+ return 1;
+ }
+ seen = 1;
+ }
+
+ if (lru->cur_size < lru->lru_size) {
+ node = &lru->free_nodes[lru->cur_size++];
+ INIT_LIST_HEAD(&node->list);
+ } else {
+ struct pfect_lru_node *null_node = NULL;
+
+ node = list_last_entry(&lru->list,
+ struct pfect_lru_node,
+ list);
+ bpf_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST);
+ }
+
+ node->key = key;
+ list_move(&node->list, &lru->list);
+
+ lru->nr_misses++;
+ if (seen) {
+ assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_EXIST));
+ } else {
+ lru->nr_unique++;
+ assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST));
+ }
+
+ return seen;
+}
+
+static unsigned int read_keys(const char *dist_file,
+ unsigned long long **keys)
+{
+ struct stat fst;
+ unsigned long long *retkeys;
+ unsigned int counts = 0;
+ int dist_fd;
+ char *b, *l;
+ int i;
+
+ dist_fd = open(dist_file, 0);
+ assert(dist_fd != -1);
+
+ assert(fstat(dist_fd, &fst) == 0);
+ b = malloc(fst.st_size);
+ assert(b);
+
+ assert(read(dist_fd, b, fst.st_size) == fst.st_size);
+ close(dist_fd);
+ for (i = 0; i < fst.st_size; i++) {
+ if (b[i] == '\n')
+ counts++;
+ }
+ counts++; /* in case the last line has no \n */
+
+ retkeys = malloc(counts * sizeof(unsigned long long));
+ assert(retkeys);
+
+ counts = 0;
+ for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n"))
+ retkeys[counts++] = strtoull(l, NULL, 10);
+ free(b);
+
+ *keys = retkeys;
+
+ return counts;
+}
+
+static int create_map(int map_type, int map_flags, unsigned int size)
+{
+ int map_fd;
+
+ map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
+ sizeof(unsigned long long), size, map_flags);
+
+ if (map_fd == -1)
+ perror("bpf_create_map");
+
+ return map_fd;
+}
+
+static int sched_next_online(int pid, int next_to_try)
+{
+ cpu_set_t cpuset;
+
+ if (next_to_try == nr_cpus)
+ return -1;
+
+ while (next_to_try < nr_cpus) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(next_to_try++, &cpuset);
+ if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
+ break;
+ }
+
+ return next_to_try;
+}
+
+static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data),
+ void *data)
+{
+ int next_sched_cpu = 0;
+ pid_t pid[tasks];
+ int i;
+
+ for (i = 0; i < tasks; i++) {
+ pid[i] = fork();
+ if (pid[i] == 0) {
+ next_sched_cpu = sched_next_online(0, next_sched_cpu);
+ fn(i, data);
+ exit(0);
+ } else if (pid[i] == -1) {
+ printf("couldn't spawn #%d process\n", i);
+ exit(1);
+ }
+ /* It is mostly redundant and just allow the parent
+ * process to update next_shced_cpu for the next child
+ * process
+ */
+ next_sched_cpu = sched_next_online(pid[i], next_sched_cpu);
+ }
+ for (i = 0; i < tasks; i++) {
+ int status;
+
+ assert(waitpid(pid[i], &status, 0) == pid[i]);
+ assert(status == 0);
+ }
+}
+
+static void do_test_lru_dist(int task, void *data)
+{
+ unsigned int nr_misses = 0;
+ struct pfect_lru pfect_lru;
+ unsigned long long key, value = 1234;
+ unsigned int i;
+
+ unsigned int lru_map_fd = ((unsigned int *)data)[0];
+ unsigned int lru_size = ((unsigned int *)data)[1];
+ unsigned long long key_offset = task * dist_key_counts;
+
+ pfect_lru_init(&pfect_lru, lru_size, dist_key_counts);
+
+ for (i = 0; i < dist_key_counts; i++) {
+ key = dist_keys[i] + key_offset;
+
+ pfect_lru_lookup_or_insert(&pfect_lru, key);
+
+ if (!bpf_lookup_elem(lru_map_fd, &key, &value))
+ continue;
+
+ if (bpf_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) {
+ printf("bpf_update_elem(lru_map_fd, %llu): errno:%d\n",
+ key, errno);
+ assert(0);
+ }
+
+ nr_misses++;
+ }
+
+ printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
+ task, pfect_lru.nr_unique, dist_key_counts, nr_misses,
+ dist_key_counts);
+ printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
+ task, pfect_lru.nr_unique, pfect_lru.total,
+ pfect_lru.nr_misses, pfect_lru.total);
+
+ pfect_lru_destroy(&pfect_lru);
+ close(lru_map_fd);
+}
+
+static void test_parallel_lru_dist(int map_type, int map_flags,
+ int nr_tasks, unsigned int lru_size)
+{
+ int child_data[2];
+ int lru_map_fd;
+
+ printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
+ map_flags);
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ lru_map_fd = create_map(map_type, map_flags,
+ nr_cpus * lru_size);
+ else
+ lru_map_fd = create_map(map_type, map_flags,
+ nr_tasks * lru_size);
+ assert(lru_map_fd != -1);
+
+ child_data[0] = lru_map_fd;
+ child_data[1] = lru_size;
+
+ run_parallel(nr_tasks, do_test_lru_dist, child_data);
+
+ close(lru_map_fd);
+}
+
+static void test_lru_loss0(int map_type, int map_flags)
+{
+ unsigned long long key, value[nr_cpus];
+ unsigned int old_unused_losses = 0;
+ unsigned int new_unused_losses = 0;
+ unsigned int used_losses = 0;
+ int map_fd;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, 0) != -1);
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ map_fd = create_map(map_type, map_flags, 900 * nr_cpus);
+ else
+ map_fd = create_map(map_type, map_flags, 900);
+
+ assert(map_fd != -1);
+
+ value[0] = 1234;
+
+ for (key = 1; key <= 1000; key++) {
+ int start_key, end_key;
+
+ assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
+
+ start_key = 101;
+ end_key = min(key, 900);
+
+ while (start_key <= end_key) {
+ bpf_lookup_elem(map_fd, &start_key, value);
+ start_key++;
+ }
+ }
+
+ for (key = 1; key <= 1000; key++) {
+ if (bpf_lookup_elem(map_fd, &key, value)) {
+ if (key <= 100)
+ old_unused_losses++;
+ else if (key <= 900)
+ used_losses++;
+ else
+ new_unused_losses++;
+ }
+ }
+
+ close(map_fd);
+
+ printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) "
+ "newer-elem-losses:%d(/100)\n",
+ old_unused_losses, used_losses, new_unused_losses);
+}
+
+static void test_lru_loss1(int map_type, int map_flags)
+{
+ unsigned long long key, value[nr_cpus];
+ int map_fd;
+ unsigned int nr_losses = 0;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, 0) != -1);
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ map_fd = create_map(map_type, map_flags, 1000 * nr_cpus);
+ else
+ map_fd = create_map(map_type, map_flags, 1000);
+
+ assert(map_fd != -1);
+
+ value[0] = 1234;
+
+ for (key = 1; key <= 1000; key++)
+ assert(!bpf_update_elem(map_fd, &key, value, BPF_NOEXIST));
+
+ for (key = 1; key <= 1000; key++) {
+ if (bpf_lookup_elem(map_fd, &key, value))
+ nr_losses++;
+ }
+
+ close(map_fd);
+
+ printf("nr_losses:%d(/1000)\n", nr_losses);
+}
+
+static void do_test_parallel_lru_loss(int task, void *data)
+{
+ const unsigned int nr_stable_elems = 1000;
+ const unsigned int nr_repeats = 100000;
+
+ int map_fd = *(int *)data;
+ unsigned long long stable_base;
+ unsigned long long key, value[nr_cpus];
+ unsigned long long next_ins_key;
+ unsigned int nr_losses = 0;
+ unsigned int i;
+
+ stable_base = task * nr_repeats * 2 + 1;
+ next_ins_key = stable_base;
+ value[0] = 1234;
+ for (i = 0; i < nr_stable_elems; i++) {
+ assert(bpf_update_elem(map_fd, &next_ins_key, value,
+ BPF_NOEXIST) == 0);
+ next_ins_key++;
+ }
+
+ for (i = 0; i < nr_repeats; i++) {
+ int rn;
+
+ rn = rand();
+
+ if (rn % 10) {
+ key = rn % nr_stable_elems + stable_base;
+ bpf_lookup_elem(map_fd, &key, value);
+ } else {
+ bpf_update_elem(map_fd, &next_ins_key, value,
+ BPF_NOEXIST);
+ next_ins_key++;
+ }
+ }
+
+ key = stable_base;
+ for (i = 0; i < nr_stable_elems; i++) {
+ if (bpf_lookup_elem(map_fd, &key, value))
+ nr_losses++;
+ key++;
+ }
+
+ printf(" task:%d nr_losses:%u\n", task, nr_losses);
+}
+
+static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
+{
+ int map_fd;
+
+ printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
+ map_flags);
+
+ /* Give 20% more than the active working set */
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ map_fd = create_map(map_type, map_flags,
+ nr_cpus * (1000 + 200));
+ else
+ map_fd = create_map(map_type, map_flags,
+ nr_tasks * (1000 + 200));
+
+ assert(map_fd != -1);
+
+ run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd);
+
+ close(map_fd);
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
+ const char *dist_file;
+ int nr_tasks = 1;
+ int lru_size;
+ int f;
+
+ if (argc < 4) {
+ printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n",
+ argv[0]);
+ return -1;
+ }
+
+ dist_file = argv[1];
+ lru_size = atoi(argv[2]);
+ nr_tasks = atoi(argv[3]);
+
+ setbuf(stdout, NULL);
+
+ assert(!setrlimit(RLIMIT_MEMLOCK, &r));
+
+ srand(time(NULL));
+
+ nr_cpus = bpf_num_possible_cpus();
+ assert(nr_cpus != -1);
+ printf("nr_cpus:%d\n\n", nr_cpus);
+
+ nr_tasks = min(nr_tasks, nr_cpus);
+
+ dist_key_counts = read_keys(dist_file, &dist_keys);
+ if (!dist_key_counts) {
+ printf("%s has no key\n", dist_file);
+ return -1;
+ }
+
+ for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
+ test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
+ test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
+ test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
+ nr_tasks);
+ test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
+ nr_tasks, lru_size);
+ printf("\n");
+ }
+
+ free(dist_keys);
+
+ return 0;
+}
diff --git a/samples/bpf/test_lwt_bpf.c b/samples/bpf/test_lwt_bpf.c
new file mode 100644
index 0000000..bacc801
--- /dev/null
+++ b/samples/bpf/test_lwt_bpf.c
@@ -0,0 +1,253 @@
+/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmpv6.h>
+#include <linux/if_ether.h>
+#include "bpf_helpers.h"
+#include <string.h>
+
+# define printk(fmt, ...) \
+ ({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+ })
+
+#define CB_MAGIC 1234
+
+/* Test: Pass all packets through */
+SEC("nop")
+int do_nop(struct __sk_buff *skb)
+{
+ return BPF_OK;
+}
+
+/* Test: Verify context information can be accessed */
+SEC("test_ctx")
+int do_test_ctx(struct __sk_buff *skb)
+{
+ skb->cb[0] = CB_MAGIC;
+ printk("len %d hash %d protocol %d\n", skb->len, skb->hash,
+ skb->protocol);
+ printk("cb %d ingress_ifindex %d ifindex %d\n", skb->cb[0],
+ skb->ingress_ifindex, skb->ifindex);
+
+ return BPF_OK;
+}
+
+/* Test: Ensure skb->cb[] buffer is cleared */
+SEC("test_cb")
+int do_test_cb(struct __sk_buff *skb)
+{
+ printk("cb0: %x cb1: %x cb2: %x\n", skb->cb[0], skb->cb[1],
+ skb->cb[2]);
+ printk("cb3: %x cb4: %x\n", skb->cb[3], skb->cb[4]);
+
+ return BPF_OK;
+}
+
+/* Test: Verify skb data can be read */
+SEC("test_data")
+int do_test_data(struct __sk_buff *skb)
+{
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct iphdr *iph = data;
+
+ if (data + sizeof(*iph) > data_end) {
+ printk("packet truncated\n");
+ return BPF_DROP;
+ }
+
+ printk("src: %x dst: %x\n", iph->saddr, iph->daddr);
+
+ return BPF_OK;
+}
+
+#define IP_CSUM_OFF offsetof(struct iphdr, check)
+#define IP_DST_OFF offsetof(struct iphdr, daddr)
+#define IP_SRC_OFF offsetof(struct iphdr, saddr)
+#define IP_PROTO_OFF offsetof(struct iphdr, protocol)
+#define TCP_CSUM_OFF offsetof(struct tcphdr, check)
+#define UDP_CSUM_OFF offsetof(struct udphdr, check)
+#define IS_PSEUDO 0x10
+
+static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip,
+ uint32_t new_ip, int rw_daddr)
+{
+ int ret, off = 0, flags = IS_PSEUDO;
+ uint8_t proto;
+
+ ret = bpf_skb_load_bytes(skb, IP_PROTO_OFF, &proto, 1);
+ if (ret < 0) {
+ printk("bpf_l4_csum_replace failed: %d\n", ret);
+ return BPF_DROP;
+ }
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ off = TCP_CSUM_OFF;
+ break;
+
+ case IPPROTO_UDP:
+ off = UDP_CSUM_OFF;
+ flags |= BPF_F_MARK_MANGLED_0;
+ break;
+
+ case IPPROTO_ICMPV6:
+ off = offsetof(struct icmp6hdr, icmp6_cksum);
+ break;
+ }
+
+ if (off) {
+ ret = bpf_l4_csum_replace(skb, off, old_ip, new_ip,
+ flags | sizeof(new_ip));
+ if (ret < 0) {
+ printk("bpf_l4_csum_replace failed: %d\n");
+ return BPF_DROP;
+ }
+ }
+
+ ret = bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
+ if (ret < 0) {
+ printk("bpf_l3_csum_replace failed: %d\n", ret);
+ return BPF_DROP;
+ }
+
+ if (rw_daddr)
+ ret = bpf_skb_store_bytes(skb, IP_DST_OFF, &new_ip, sizeof(new_ip), 0);
+ else
+ ret = bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0);
+
+ if (ret < 0) {
+ printk("bpf_skb_store_bytes() failed: %d\n", ret);
+ return BPF_DROP;
+ }
+
+ return BPF_OK;
+}
+
+/* Test: Verify skb data can be modified */
+SEC("test_rewrite")
+int do_test_rewrite(struct __sk_buff *skb)
+{
+ uint32_t old_ip, new_ip = 0x3fea8c0;
+ int ret;
+
+ ret = bpf_skb_load_bytes(skb, IP_DST_OFF, &old_ip, 4);
+ if (ret < 0) {
+ printk("bpf_skb_load_bytes failed: %d\n", ret);
+ return BPF_DROP;
+ }
+
+ if (old_ip == 0x2fea8c0) {
+ printk("out: rewriting from %x to %x\n", old_ip, new_ip);
+ return rewrite(skb, old_ip, new_ip, 1);
+ }
+
+ return BPF_OK;
+}
+
+static inline int __do_push_ll_and_redirect(struct __sk_buff *skb)
+{
+ uint64_t smac = SRC_MAC, dmac = DST_MAC;
+ int ret, ifindex = DST_IFINDEX;
+ struct ethhdr ehdr;
+
+ ret = bpf_skb_change_head(skb, 14, 0);
+ if (ret < 0) {
+ printk("skb_change_head() failed: %d\n", ret);
+ }
+
+ ehdr.h_proto = __constant_htons(ETH_P_IP);
+ memcpy(&ehdr.h_source, &smac, 6);
+ memcpy(&ehdr.h_dest, &dmac, 6);
+
+ ret = bpf_skb_store_bytes(skb, 0, &ehdr, sizeof(ehdr), 0);
+ if (ret < 0) {
+ printk("skb_store_bytes() failed: %d\n", ret);
+ return BPF_DROP;
+ }
+
+ return bpf_redirect(ifindex, 0);
+}
+
+SEC("push_ll_and_redirect_silent")
+int do_push_ll_and_redirect_silent(struct __sk_buff *skb)
+{
+ return __do_push_ll_and_redirect(skb);
+}
+
+SEC("push_ll_and_redirect")
+int do_push_ll_and_redirect(struct __sk_buff *skb)
+{
+ int ret, ifindex = DST_IFINDEX;
+
+ ret = __do_push_ll_and_redirect(skb);
+ if (ret >= 0)
+ printk("redirected to %d\n", ifindex);
+
+ return ret;
+}
+
+static inline void __fill_garbage(struct __sk_buff *skb)
+{
+ uint64_t f = 0xFFFFFFFFFFFFFFFF;
+
+ bpf_skb_store_bytes(skb, 0, &f, sizeof(f), 0);
+ bpf_skb_store_bytes(skb, 8, &f, sizeof(f), 0);
+ bpf_skb_store_bytes(skb, 16, &f, sizeof(f), 0);
+ bpf_skb_store_bytes(skb, 24, &f, sizeof(f), 0);
+ bpf_skb_store_bytes(skb, 32, &f, sizeof(f), 0);
+ bpf_skb_store_bytes(skb, 40, &f, sizeof(f), 0);
+ bpf_skb_store_bytes(skb, 48, &f, sizeof(f), 0);
+ bpf_skb_store_bytes(skb, 56, &f, sizeof(f), 0);
+ bpf_skb_store_bytes(skb, 64, &f, sizeof(f), 0);
+ bpf_skb_store_bytes(skb, 72, &f, sizeof(f), 0);
+ bpf_skb_store_bytes(skb, 80, &f, sizeof(f), 0);
+ bpf_skb_store_bytes(skb, 88, &f, sizeof(f), 0);
+}
+
+SEC("fill_garbage")
+int do_fill_garbage(struct __sk_buff *skb)
+{
+ __fill_garbage(skb);
+ printk("Set initial 96 bytes of header to FF\n");
+ return BPF_OK;
+}
+
+SEC("fill_garbage_and_redirect")
+int do_fill_garbage_and_redirect(struct __sk_buff *skb)
+{
+ int ifindex = DST_IFINDEX;
+ __fill_garbage(skb);
+ printk("redirected to %d\n", ifindex);
+ return bpf_redirect(ifindex, 0);
+}
+
+/* Drop all packets */
+SEC("drop_all")
+int do_drop_all(struct __sk_buff *skb)
+{
+ printk("dropping with: %d\n", BPF_DROP);
+ return BPF_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh
new file mode 100644
index 0000000..a695ae2
--- /dev/null
+++ b/samples/bpf/test_lwt_bpf.sh
@@ -0,0 +1,399 @@
+#!/bin/bash
+
+# Uncomment to see generated bytecode
+#VERBOSE=verbose
+
+NS1=lwt_ns1
+NS2=lwt_ns2
+VETH0=tst_lwt1a
+VETH1=tst_lwt1b
+VETH2=tst_lwt2a
+VETH3=tst_lwt2b
+IPVETH0="192.168.254.1"
+IPVETH1="192.168.254.2"
+IPVETH1b="192.168.254.3"
+
+IPVETH2="192.168.111.1"
+IPVETH3="192.168.111.2"
+
+IP_LOCAL="192.168.99.1"
+
+TRACE_ROOT=/sys/kernel/debug/tracing
+
+function lookup_mac()
+{
+ set +x
+ if [ ! -z "$2" ]; then
+ MAC=$(ip netns exec $2 ip link show $1 | grep ether | awk '{print $2}')
+ else
+ MAC=$(ip link show $1 | grep ether | awk '{print $2}')
+ fi
+ MAC="${MAC//:/}"
+ echo "0x${MAC:10:2}${MAC:8:2}${MAC:6:2}${MAC:4:2}${MAC:2:2}${MAC:0:2}"
+ set -x
+}
+
+function cleanup {
+ set +ex
+ rm test_lwt_bpf.o 2> /dev/null
+ ip link del $VETH0 2> /dev/null
+ ip link del $VETH1 2> /dev/null
+ ip link del $VETH2 2> /dev/null
+ ip link del $VETH3 2> /dev/null
+ ip netns exec $NS1 killall netserver
+ ip netns delete $NS1 2> /dev/null
+ ip netns delete $NS2 2> /dev/null
+ set -ex
+}
+
+function setup_one_veth {
+ ip netns add $1
+ ip link add $2 type veth peer name $3
+ ip link set dev $2 up
+ ip addr add $4/24 dev $2
+ ip link set $3 netns $1
+ ip netns exec $1 ip link set dev $3 up
+ ip netns exec $1 ip addr add $5/24 dev $3
+
+ if [ "$6" ]; then
+ ip netns exec $1 ip addr add $6/32 dev $3
+ fi
+}
+
+function get_trace {
+ set +x
+ cat ${TRACE_ROOT}/trace | grep -v '^#'
+ set -x
+}
+
+function cleanup_routes {
+ ip route del ${IPVETH1}/32 dev $VETH0 2> /dev/null || true
+ ip route del table local local ${IP_LOCAL}/32 dev lo 2> /dev/null || true
+}
+
+function install_test {
+ cleanup_routes
+ cp /dev/null ${TRACE_ROOT}/trace
+
+ OPTS="encap bpf headroom 14 $1 obj test_lwt_bpf.o section $2 $VERBOSE"
+
+ if [ "$1" == "in" ]; then
+ ip route add table local local ${IP_LOCAL}/32 $OPTS dev lo
+ else
+ ip route add ${IPVETH1}/32 $OPTS dev $VETH0
+ fi
+}
+
+function remove_prog {
+ if [ "$1" == "in" ]; then
+ ip route del table local local ${IP_LOCAL}/32 dev lo
+ else
+ ip route del ${IPVETH1}/32 dev $VETH0
+ fi
+}
+
+function filter_trace {
+ # Add newline to allow starting EXPECT= variables on newline
+ NL=$'\n'
+ echo "${NL}$*" | sed -e 's/^.*: : //g'
+}
+
+function expect_fail {
+ set +x
+ echo "FAIL:"
+ echo "Expected: $1"
+ echo "Got: $2"
+ set -x
+ exit 1
+}
+
+function match_trace {
+ set +x
+ RET=0
+ TRACE=$1
+ EXPECT=$2
+ GOT="$(filter_trace "$TRACE")"
+
+ [ "$GOT" != "$EXPECT" ] && {
+ expect_fail "$EXPECT" "$GOT"
+ RET=1
+ }
+ set -x
+ return $RET
+}
+
+function test_start {
+ set +x
+ echo "----------------------------------------------------------------"
+ echo "Starting test: $*"
+ echo "----------------------------------------------------------------"
+ set -x
+}
+
+function failure {
+ get_trace
+ echo "FAIL: $*"
+ exit 1
+}
+
+function test_ctx_xmit {
+ test_start "test_ctx on lwt xmit"
+ install_test xmit test_ctx
+ ping -c 3 $IPVETH1 || {
+ failure "test_ctx xmit: packets are dropped"
+ }
+ match_trace "$(get_trace)" "
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 0 ifindex $DST_IFINDEX
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 0 ifindex $DST_IFINDEX
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 0 ifindex $DST_IFINDEX" || exit 1
+ remove_prog xmit
+}
+
+function test_ctx_out {
+ test_start "test_ctx on lwt out"
+ install_test out test_ctx
+ ping -c 3 $IPVETH1 || {
+ failure "test_ctx out: packets are dropped"
+ }
+ match_trace "$(get_trace)" "
+len 84 hash 0 protocol 0
+cb 1234 ingress_ifindex 0 ifindex 0
+len 84 hash 0 protocol 0
+cb 1234 ingress_ifindex 0 ifindex 0
+len 84 hash 0 protocol 0
+cb 1234 ingress_ifindex 0 ifindex 0" || exit 1
+ remove_prog out
+}
+
+function test_ctx_in {
+ test_start "test_ctx on lwt in"
+ install_test in test_ctx
+ ping -c 3 $IP_LOCAL || {
+ failure "test_ctx out: packets are dropped"
+ }
+ # We will both request & reply packets as the packets will
+ # be from $IP_LOCAL => $IP_LOCAL
+ match_trace "$(get_trace)" "
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 1 ifindex 1
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 1 ifindex 1
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 1 ifindex 1
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 1 ifindex 1
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 1 ifindex 1
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 1 ifindex 1" || exit 1
+ remove_prog in
+}
+
+function test_data {
+ test_start "test_data on lwt $1"
+ install_test $1 test_data
+ ping -c 3 $IPVETH1 || {
+ failure "test_data ${1}: packets are dropped"
+ }
+ match_trace "$(get_trace)" "
+src: 1fea8c0 dst: 2fea8c0
+src: 1fea8c0 dst: 2fea8c0
+src: 1fea8c0 dst: 2fea8c0" || exit 1
+ remove_prog $1
+}
+
+function test_data_in {
+ test_start "test_data on lwt in"
+ install_test in test_data
+ ping -c 3 $IP_LOCAL || {
+ failure "test_data in: packets are dropped"
+ }
+ # We will both request & reply packets as the packets will
+ # be from $IP_LOCAL => $IP_LOCAL
+ match_trace "$(get_trace)" "
+src: 163a8c0 dst: 163a8c0
+src: 163a8c0 dst: 163a8c0
+src: 163a8c0 dst: 163a8c0
+src: 163a8c0 dst: 163a8c0
+src: 163a8c0 dst: 163a8c0
+src: 163a8c0 dst: 163a8c0" || exit 1
+ remove_prog in
+}
+
+function test_cb {
+ test_start "test_cb on lwt $1"
+ install_test $1 test_cb
+ ping -c 3 $IPVETH1 || {
+ failure "test_cb ${1}: packets are dropped"
+ }
+ match_trace "$(get_trace)" "
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0" || exit 1
+ remove_prog $1
+}
+
+function test_cb_in {
+ test_start "test_cb on lwt in"
+ install_test in test_cb
+ ping -c 3 $IP_LOCAL || {
+ failure "test_cb in: packets are dropped"
+ }
+ # We will both request & reply packets as the packets will
+ # be from $IP_LOCAL => $IP_LOCAL
+ match_trace "$(get_trace)" "
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0" || exit 1
+ remove_prog in
+}
+
+function test_drop_all {
+ test_start "test_drop_all on lwt $1"
+ install_test $1 drop_all
+ ping -c 3 $IPVETH1 && {
+ failure "test_drop_all ${1}: Unexpected success of ping"
+ }
+ match_trace "$(get_trace)" "
+dropping with: 2
+dropping with: 2
+dropping with: 2" || exit 1
+ remove_prog $1
+}
+
+function test_drop_all_in {
+ test_start "test_drop_all on lwt in"
+ install_test in drop_all
+ ping -c 3 $IP_LOCAL && {
+ failure "test_drop_all in: Unexpected success of ping"
+ }
+ match_trace "$(get_trace)" "
+dropping with: 2
+dropping with: 2
+dropping with: 2" || exit 1
+ remove_prog in
+}
+
+function test_push_ll_and_redirect {
+ test_start "test_push_ll_and_redirect on lwt xmit"
+ install_test xmit push_ll_and_redirect
+ ping -c 3 $IPVETH1 || {
+ failure "Redirected packets appear to be dropped"
+ }
+ match_trace "$(get_trace)" "
+redirected to $DST_IFINDEX
+redirected to $DST_IFINDEX
+redirected to $DST_IFINDEX" || exit 1
+ remove_prog xmit
+}
+
+function test_no_l2_and_redirect {
+ test_start "test_no_l2_and_redirect on lwt xmit"
+ install_test xmit fill_garbage_and_redirect
+ ping -c 3 $IPVETH1 && {
+ failure "Unexpected success despite lack of L2 header"
+ }
+ match_trace "$(get_trace)" "
+redirected to $DST_IFINDEX
+redirected to $DST_IFINDEX
+redirected to $DST_IFINDEX" || exit 1
+ remove_prog xmit
+}
+
+function test_rewrite {
+ test_start "test_rewrite on lwt xmit"
+ install_test xmit test_rewrite
+ ping -c 3 $IPVETH1 || {
+ failure "Rewritten packets appear to be dropped"
+ }
+ match_trace "$(get_trace)" "
+out: rewriting from 2fea8c0 to 3fea8c0
+out: rewriting from 2fea8c0 to 3fea8c0
+out: rewriting from 2fea8c0 to 3fea8c0" || exit 1
+ remove_prog out
+}
+
+function test_fill_garbage {
+ test_start "test_fill_garbage on lwt xmit"
+ install_test xmit fill_garbage
+ ping -c 3 $IPVETH1 && {
+ failure "test_drop_all ${1}: Unexpected success of ping"
+ }
+ match_trace "$(get_trace)" "
+Set initial 96 bytes of header to FF
+Set initial 96 bytes of header to FF
+Set initial 96 bytes of header to FF" || exit 1
+ remove_prog xmit
+}
+
+function test_netperf_nop {
+ test_start "test_netperf_nop on lwt xmit"
+ install_test xmit nop
+ netperf -H $IPVETH1 -t TCP_STREAM || {
+ failure "packets appear to be dropped"
+ }
+ match_trace "$(get_trace)" ""|| exit 1
+ remove_prog xmit
+}
+
+function test_netperf_redirect {
+ test_start "test_netperf_redirect on lwt xmit"
+ install_test xmit push_ll_and_redirect_silent
+ netperf -H $IPVETH1 -t TCP_STREAM || {
+ failure "Rewritten packets appear to be dropped"
+ }
+ match_trace "$(get_trace)" ""|| exit 1
+ remove_prog xmit
+}
+
+cleanup
+setup_one_veth $NS1 $VETH0 $VETH1 $IPVETH0 $IPVETH1 $IPVETH1b
+setup_one_veth $NS2 $VETH2 $VETH3 $IPVETH2 $IPVETH3
+ip netns exec $NS1 netserver
+echo 1 > ${TRACE_ROOT}/tracing_on
+
+DST_MAC=$(lookup_mac $VETH1 $NS1)
+SRC_MAC=$(lookup_mac $VETH0)
+DST_IFINDEX=$(cat /sys/class/net/$VETH0/ifindex)
+
+CLANG_OPTS="-O2 -target bpf -I ../include/"
+CLANG_OPTS+=" -DSRC_MAC=$SRC_MAC -DDST_MAC=$DST_MAC -DDST_IFINDEX=$DST_IFINDEX"
+clang $CLANG_OPTS -c test_lwt_bpf.c -o test_lwt_bpf.o
+
+test_ctx_xmit
+test_ctx_out
+test_ctx_in
+test_data "xmit"
+test_data "out"
+test_data_in
+test_cb "xmit"
+test_cb "out"
+test_cb_in
+test_drop_all "xmit"
+test_drop_all "out"
+test_drop_all_in
+test_rewrite
+test_push_ll_and_redirect
+test_no_l2_and_redirect
+test_fill_garbage
+test_netperf_nop
+test_netperf_redirect
+
+cleanup
+echo 0 > ${TRACE_ROOT}/tracing_on
+exit 0
diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c
deleted file mode 100644
index cce2b59..0000000
--- a/samples/bpf/test_maps.c
+++ /dev/null
@@ -1,503 +0,0 @@
-/*
- * Testsuite for eBPF maps
- *
- * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
- * Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <stdio.h>
-#include <unistd.h>
-#include <linux/bpf.h>
-#include <errno.h>
-#include <string.h>
-#include <assert.h>
-#include <sys/wait.h>
-#include <stdlib.h>
-#include "libbpf.h"
-
-static int map_flags;
-
-/* sanity tests for map API */
-static void test_hashmap_sanity(int i, void *data)
-{
- long long key, next_key, value;
- int map_fd;
-
- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- 2, map_flags);
- if (map_fd < 0) {
- printf("failed to create hashmap '%s'\n", strerror(errno));
- exit(1);
- }
-
- key = 1;
- value = 1234;
- /* insert key=1 element */
- assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
-
- value = 0;
- /* BPF_NOEXIST means: add new element if it doesn't exist */
- assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
- /* key=1 already exists */
- errno == EEXIST);
-
- assert(bpf_update_elem(map_fd, &key, &value, -1) == -1 && errno == EINVAL);
-
- /* check that key=1 can be found */
- assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234);
-
- key = 2;
- /* check that key=2 is not found */
- assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT);
-
- /* BPF_EXIST means: update existing element */
- assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 &&
- /* key=2 is not there */
- errno == ENOENT);
-
- /* insert key=2 element */
- assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
-
- /* key=1 and key=2 were inserted, check that key=0 cannot be inserted
- * due to max_entries limit
- */
- key = 0;
- assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
- errno == E2BIG);
-
- /* update existing element, thought the map is full */
- key = 1;
- assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0);
- key = 2;
- assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
- key = 1;
- assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
-
- /* check that key = 0 doesn't exist */
- key = 0;
- assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
-
- /* iterate over two elements */
- assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 &&
- (next_key == 1 || next_key == 2));
- assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 &&
- (next_key == 1 || next_key == 2));
- assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 &&
- errno == ENOENT);
-
- /* delete both elements */
- key = 1;
- assert(bpf_delete_elem(map_fd, &key) == 0);
- key = 2;
- assert(bpf_delete_elem(map_fd, &key) == 0);
- assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
-
- key = 0;
- /* check that map is empty */
- assert(bpf_get_next_key(map_fd, &key, &next_key) == -1 &&
- errno == ENOENT);
- close(map_fd);
-}
-
-/* sanity tests for percpu map API */
-static void test_percpu_hashmap_sanity(int task, void *data)
-{
- long long key, next_key;
- int expected_key_mask = 0;
- unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
- long long value[nr_cpus];
- int map_fd, i;
-
- map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
- sizeof(value[0]), 2, map_flags);
- if (map_fd < 0) {
- printf("failed to create hashmap '%s'\n", strerror(errno));
- exit(1);
- }
-
- for (i = 0; i < nr_cpus; i++)
- value[i] = i + 100;
- key = 1;
- /* insert key=1 element */
- assert(!(expected_key_mask & key));
- assert(bpf_update_elem(map_fd, &key, value, BPF_ANY) == 0);
- expected_key_mask |= key;
-
- /* BPF_NOEXIST means: add new element if it doesn't exist */
- assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == -1 &&
- /* key=1 already exists */
- errno == EEXIST);
-
- /* -1 is an invalid flag */
- assert(bpf_update_elem(map_fd, &key, value, -1) == -1 &&
- errno == EINVAL);
-
- /* check that key=1 can be found. value could be 0 if the lookup
- * was run from a different cpu.
- */
- value[0] = 1;
- assert(bpf_lookup_elem(map_fd, &key, value) == 0 && value[0] == 100);
-
- key = 2;
- /* check that key=2 is not found */
- assert(bpf_lookup_elem(map_fd, &key, value) == -1 && errno == ENOENT);
-
- /* BPF_EXIST means: update existing element */
- assert(bpf_update_elem(map_fd, &key, value, BPF_EXIST) == -1 &&
- /* key=2 is not there */
- errno == ENOENT);
-
- /* insert key=2 element */
- assert(!(expected_key_mask & key));
- assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
- expected_key_mask |= key;
-
- /* key=1 and key=2 were inserted, check that key=0 cannot be inserted
- * due to max_entries limit
- */
- key = 0;
- assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == -1 &&
- errno == E2BIG);
-
- /* check that key = 0 doesn't exist */
- assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
-
- /* iterate over two elements */
- while (!bpf_get_next_key(map_fd, &key, &next_key)) {
- assert((expected_key_mask & next_key) == next_key);
- expected_key_mask &= ~next_key;
-
- assert(bpf_lookup_elem(map_fd, &next_key, value) == 0);
- for (i = 0; i < nr_cpus; i++)
- assert(value[i] == i + 100);
-
- key = next_key;
- }
- assert(errno == ENOENT);
-
- /* Update with BPF_EXIST */
- key = 1;
- assert(bpf_update_elem(map_fd, &key, value, BPF_EXIST) == 0);
-
- /* delete both elements */
- key = 1;
- assert(bpf_delete_elem(map_fd, &key) == 0);
- key = 2;
- assert(bpf_delete_elem(map_fd, &key) == 0);
- assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
-
- key = 0;
- /* check that map is empty */
- assert(bpf_get_next_key(map_fd, &key, &next_key) == -1 &&
- errno == ENOENT);
- close(map_fd);
-}
-
-static void test_arraymap_sanity(int i, void *data)
-{
- int key, next_key, map_fd;
- long long value;
-
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
- 2, 0);
- if (map_fd < 0) {
- printf("failed to create arraymap '%s'\n", strerror(errno));
- exit(1);
- }
-
- key = 1;
- value = 1234;
- /* insert key=1 element */
- assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
-
- value = 0;
- assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
- errno == EEXIST);
-
- /* check that key=1 can be found */
- assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234);
-
- key = 0;
- /* check that key=0 is also found and zero initialized */
- assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0);
-
-
- /* key=0 and key=1 were inserted, check that key=2 cannot be inserted
- * due to max_entries limit
- */
- key = 2;
- assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 &&
- errno == E2BIG);
-
- /* check that key = 2 doesn't exist */
- assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT);
-
- /* iterate over two elements */
- assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 &&
- next_key == 0);
- assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 &&
- next_key == 1);
- assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 &&
- errno == ENOENT);
-
- /* delete shouldn't succeed */
- key = 1;
- assert(bpf_delete_elem(map_fd, &key) == -1 && errno == EINVAL);
-
- close(map_fd);
-}
-
-static void test_percpu_arraymap_many_keys(void)
-{
- unsigned nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
- unsigned nr_keys = 20000;
- long values[nr_cpus];
- int key, map_fd, i;
-
- map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
- sizeof(values[0]), nr_keys, 0);
- if (map_fd < 0) {
- printf("failed to create per-cpu arraymap '%s'\n",
- strerror(errno));
- exit(1);
- }
-
- for (i = 0; i < nr_cpus; i++)
- values[i] = i + 10;
-
- for (key = 0; key < nr_keys; key++)
- assert(bpf_update_elem(map_fd, &key, values, BPF_ANY) == 0);
-
- for (key = 0; key < nr_keys; key++) {
- for (i = 0; i < nr_cpus; i++)
- values[i] = 0;
- assert(bpf_lookup_elem(map_fd, &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- assert(values[i] == i + 10);
- }
-
- close(map_fd);
-}
-
-static void test_percpu_arraymap_sanity(int i, void *data)
-{
- unsigned nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
- long values[nr_cpus];
- int key, next_key, map_fd;
-
- map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
- sizeof(values[0]), 2, 0);
- if (map_fd < 0) {
- printf("failed to create arraymap '%s'\n", strerror(errno));
- exit(1);
- }
-
- for (i = 0; i < nr_cpus; i++)
- values[i] = i + 100;
-
- key = 1;
- /* insert key=1 element */
- assert(bpf_update_elem(map_fd, &key, values, BPF_ANY) == 0);
-
- values[0] = 0;
- assert(bpf_update_elem(map_fd, &key, values, BPF_NOEXIST) == -1 &&
- errno == EEXIST);
-
- /* check that key=1 can be found */
- assert(bpf_lookup_elem(map_fd, &key, values) == 0 && values[0] == 100);
-
- key = 0;
- /* check that key=0 is also found and zero initialized */
- assert(bpf_lookup_elem(map_fd, &key, values) == 0 &&
- values[0] == 0 && values[nr_cpus - 1] == 0);
-
-
- /* check that key=2 cannot be inserted due to max_entries limit */
- key = 2;
- assert(bpf_update_elem(map_fd, &key, values, BPF_EXIST) == -1 &&
- errno == E2BIG);
-
- /* check that key = 2 doesn't exist */
- assert(bpf_lookup_elem(map_fd, &key, values) == -1 && errno == ENOENT);
-
- /* iterate over two elements */
- assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 &&
- next_key == 0);
- assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 &&
- next_key == 1);
- assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 &&
- errno == ENOENT);
-
- /* delete shouldn't succeed */
- key = 1;
- assert(bpf_delete_elem(map_fd, &key) == -1 && errno == EINVAL);
-
- close(map_fd);
-}
-
-#define MAP_SIZE (32 * 1024)
-static void test_map_large(void)
-{
- struct bigkey {
- int a;
- char b[116];
- long long c;
- } key;
- int map_fd, i, value;
-
- /* allocate 4Mbyte of memory */
- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags);
- if (map_fd < 0) {
- printf("failed to create large map '%s'\n", strerror(errno));
- exit(1);
- }
-
- for (i = 0; i < MAP_SIZE; i++) {
- key = (struct bigkey) {.c = i};
- value = i;
- assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
- }
- key.c = -1;
- assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
- errno == E2BIG);
-
- /* iterate through all elements */
- for (i = 0; i < MAP_SIZE; i++)
- assert(bpf_get_next_key(map_fd, &key, &key) == 0);
- assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
-
- key.c = 0;
- assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0);
- key.a = 1;
- assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT);
-
- close(map_fd);
-}
-
-/* fork N children and wait for them to complete */
-static void run_parallel(int tasks, void (*fn)(int i, void *data), void *data)
-{
- pid_t pid[tasks];
- int i;
-
- for (i = 0; i < tasks; i++) {
- pid[i] = fork();
- if (pid[i] == 0) {
- fn(i, data);
- exit(0);
- } else if (pid[i] == -1) {
- printf("couldn't spawn #%d process\n", i);
- exit(1);
- }
- }
- for (i = 0; i < tasks; i++) {
- int status;
-
- assert(waitpid(pid[i], &status, 0) == pid[i]);
- assert(status == 0);
- }
-}
-
-static void test_map_stress(void)
-{
- run_parallel(100, test_hashmap_sanity, NULL);
- run_parallel(100, test_percpu_hashmap_sanity, NULL);
- run_parallel(100, test_arraymap_sanity, NULL);
- run_parallel(100, test_percpu_arraymap_sanity, NULL);
-}
-
-#define TASKS 1024
-#define DO_UPDATE 1
-#define DO_DELETE 0
-static void do_work(int fn, void *data)
-{
- int map_fd = ((int *)data)[0];
- int do_update = ((int *)data)[1];
- int i;
- int key, value;
-
- for (i = fn; i < MAP_SIZE; i += TASKS) {
- key = value = i;
- if (do_update) {
- assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
- assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0);
- } else {
- assert(bpf_delete_elem(map_fd, &key) == 0);
- }
- }
-}
-
-static void test_map_parallel(void)
-{
- int i, map_fd, key = 0, value = 0;
- int data[2];
-
- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags);
- if (map_fd < 0) {
- printf("failed to create map for parallel test '%s'\n",
- strerror(errno));
- exit(1);
- }
-
- data[0] = map_fd;
- data[1] = DO_UPDATE;
- /* use the same map_fd in children to add elements to this map
- * child_0 adds key=0, key=1024, key=2048, ...
- * child_1 adds key=1, key=1025, key=2049, ...
- * child_1023 adds key=1023, ...
- */
- run_parallel(TASKS, do_work, data);
-
- /* check that key=0 is already there */
- assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
- errno == EEXIST);
-
- /* check that all elements were inserted */
- key = -1;
- for (i = 0; i < MAP_SIZE; i++)
- assert(bpf_get_next_key(map_fd, &key, &key) == 0);
- assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
-
- /* another check for all elements */
- for (i = 0; i < MAP_SIZE; i++) {
- key = MAP_SIZE - i - 1;
- assert(bpf_lookup_elem(map_fd, &key, &value) == 0 &&
- value == key);
- }
-
- /* now let's delete all elemenets in parallel */
- data[1] = DO_DELETE;
- run_parallel(TASKS, do_work, data);
-
- /* nothing should be left */
- key = -1;
- assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
-}
-
-static void run_all_tests(void)
-{
- test_hashmap_sanity(0, NULL);
- test_percpu_hashmap_sanity(0, NULL);
- test_arraymap_sanity(0, NULL);
- test_percpu_arraymap_sanity(0, NULL);
- test_percpu_arraymap_many_keys();
-
- test_map_large();
- test_map_parallel();
- test_map_stress();
-}
-
-int main(void)
-{
- map_flags = 0;
- run_all_tests();
- map_flags = BPF_F_NO_PREALLOC;
- run_all_tests();
- printf("test_maps: OK\n");
- return 0;
-}
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
deleted file mode 100644
index 369ffaa..0000000
--- a/samples/bpf/test_verifier.c
+++ /dev/null
@@ -1,2529 +0,0 @@
-/*
- * Testsuite for eBPF verifier
- *
- * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <stdio.h>
-#include <unistd.h>
-#include <linux/bpf.h>
-#include <errno.h>
-#include <linux/unistd.h>
-#include <string.h>
-#include <linux/filter.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <sys/resource.h>
-#include "libbpf.h"
-
-#define MAX_INSNS 512
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
-
-#define MAX_FIXUPS 8
-
-struct bpf_test {
- const char *descr;
- struct bpf_insn insns[MAX_INSNS];
- int fixup[MAX_FIXUPS];
- int prog_array_fixup[MAX_FIXUPS];
- int test_val_map_fixup[MAX_FIXUPS];
- const char *errstr;
- const char *errstr_unpriv;
- enum {
- UNDEF,
- ACCEPT,
- REJECT
- } result, result_unpriv;
- enum bpf_prog_type prog_type;
-};
-
-/* Note we want this to be 64 bit aligned so that the end of our array is
- * actually the end of the structure.
- */
-#define MAX_ENTRIES 11
-struct test_val {
- unsigned index;
- int foo[MAX_ENTRIES];
-};
-
-struct other_val {
- unsigned int action[32];
-};
-
-static struct bpf_test tests[] = {
- {
- "add+sub+mul",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_2, 3),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 3),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- },
- {
- "unreachable",
- .insns = {
- BPF_EXIT_INSN(),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable",
- .result = REJECT,
- },
- {
- "unreachable2",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable",
- .result = REJECT,
- },
- {
- "out of range jump",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_EXIT_INSN(),
- },
- .errstr = "jump out of range",
- .result = REJECT,
- },
- {
- "out of range jump2",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, -2),
- BPF_EXIT_INSN(),
- },
- .errstr = "jump out of range",
- .result = REJECT,
- },
- {
- "test1 ld_imm64",
- .insns = {
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_LD_IMM64(BPF_REG_0, 0),
- BPF_LD_IMM64(BPF_REG_0, 0),
- BPF_LD_IMM64(BPF_REG_0, 1),
- BPF_LD_IMM64(BPF_REG_0, 1),
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid BPF_LD_IMM insn",
- .errstr_unpriv = "R1 pointer comparison",
- .result = REJECT,
- },
- {
- "test2 ld_imm64",
- .insns = {
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_LD_IMM64(BPF_REG_0, 0),
- BPF_LD_IMM64(BPF_REG_0, 0),
- BPF_LD_IMM64(BPF_REG_0, 1),
- BPF_LD_IMM64(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid BPF_LD_IMM insn",
- .errstr_unpriv = "R1 pointer comparison",
- .result = REJECT,
- },
- {
- "test3 ld_imm64",
- .insns = {
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
- BPF_LD_IMM64(BPF_REG_0, 0),
- BPF_LD_IMM64(BPF_REG_0, 0),
- BPF_LD_IMM64(BPF_REG_0, 1),
- BPF_LD_IMM64(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_ld_imm64 insn",
- .result = REJECT,
- },
- {
- "test4 ld_imm64",
- .insns = {
- BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_ld_imm64 insn",
- .result = REJECT,
- },
- {
- "test5 ld_imm64",
- .insns = {
- BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
- },
- .errstr = "invalid bpf_ld_imm64 insn",
- .result = REJECT,
- },
- {
- "no bpf_exit",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2),
- },
- .errstr = "jump out of range",
- .result = REJECT,
- },
- {
- "loop (back-edge)",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "back-edge",
- .result = REJECT,
- },
- {
- "loop2 (back-edge)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_JMP_IMM(BPF_JA, 0, 0, -4),
- BPF_EXIT_INSN(),
- },
- .errstr = "back-edge",
- .result = REJECT,
- },
- {
- "conditional loop",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
- BPF_EXIT_INSN(),
- },
- .errstr = "back-edge",
- .result = REJECT,
- },
- {
- "read uninitialized register",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R2 !read_ok",
- .result = REJECT,
- },
- {
- "read invalid register",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R15 is invalid",
- .result = REJECT,
- },
- {
- "program doesn't init R0 before exit",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 !read_ok",
- .result = REJECT,
- },
- {
- "program doesn't init R0 before exit in all branches",
- .insns = {
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 !read_ok",
- .errstr_unpriv = "R1 pointer comparison",
- .result = REJECT,
- },
- {
- "stack out of bounds",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid stack",
- .result = REJECT,
- },
- {
- "invalid call insn1",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "BPF_CALL uses reserved",
- .result = REJECT,
- },
- {
- "invalid call insn2",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "BPF_CALL uses reserved",
- .result = REJECT,
- },
- {
- "invalid function call",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 1234567),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid func 1234567",
- .result = REJECT,
- },
- {
- "uninitialized stack1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup = {2},
- .errstr = "invalid indirect read from stack",
- .result = REJECT,
- },
- {
- "uninitialized stack2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid read from stack",
- .result = REJECT,
- },
- {
- "invalid argument register",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 !read_ok",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "non-invalid argument register",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_6),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "check valid spill/fill",
- .insns = {
- /* spill R1(ctx) into stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
-
- /* fill it back into R2 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
-
- /* should be able to access R0 = *(R2 + 8) */
- /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- },
- {
- "check valid spill/fill, skb mark",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = ACCEPT,
- },
- {
- "check corrupted spill/fill",
- .insns = {
- /* spill R1(ctx) into stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
-
- /* mess up with R1 pointer on stack */
- BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23),
-
- /* fill back into R0 should fail */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
-
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .errstr = "corrupted spill",
- .result = REJECT,
- },
- {
- "invalid src register in STX",
- .insns = {
- BPF_STX_MEM(BPF_B, BPF_REG_10, -1, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R15 is invalid",
- .result = REJECT,
- },
- {
- "invalid dst register in STX",
- .insns = {
- BPF_STX_MEM(BPF_B, 14, BPF_REG_10, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R14 is invalid",
- .result = REJECT,
- },
- {
- "invalid dst register in ST",
- .insns = {
- BPF_ST_MEM(BPF_B, 14, -1, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R14 is invalid",
- .result = REJECT,
- },
- {
- "invalid src register in LDX",
- .insns = {
- BPF_LDX_MEM(BPF_B, BPF_REG_0, 12, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R12 is invalid",
- .result = REJECT,
- },
- {
- "invalid dst register in LDX",
- .insns = {
- BPF_LDX_MEM(BPF_B, 11, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R11 is invalid",
- .result = REJECT,
- },
- {
- "junk insn",
- .insns = {
- BPF_RAW_INSN(0, 0, 0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid BPF_LD_IMM",
- .result = REJECT,
- },
- {
- "junk insn2",
- .insns = {
- BPF_RAW_INSN(1, 0, 0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "BPF_LDX uses reserved fields",
- .result = REJECT,
- },
- {
- "junk insn3",
- .insns = {
- BPF_RAW_INSN(-1, 0, 0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid BPF_ALU opcode f0",
- .result = REJECT,
- },
- {
- "junk insn4",
- .insns = {
- BPF_RAW_INSN(-1, -1, -1, -1, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid BPF_ALU opcode f0",
- .result = REJECT,
- },
- {
- "junk insn5",
- .insns = {
- BPF_RAW_INSN(0x7f, -1, -1, -1, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "BPF_ALU uses reserved fields",
- .result = REJECT,
- },
- {
- "misaligned read from stack",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
- BPF_EXIT_INSN(),
- },
- .errstr = "misaligned access",
- .result = REJECT,
- },
- {
- "invalid map_fd for function call",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
- BPF_EXIT_INSN(),
- },
- .errstr = "fd 0 is not pointing to valid bpf_map",
- .result = REJECT,
- },
- {
- "don't check return value before access",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {3},
- .errstr = "R0 invalid mem access 'map_value_or_null'",
- .result = REJECT,
- },
- {
- "access memory with incorrect alignment",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {3},
- .errstr = "misaligned access",
- .result = REJECT,
- },
- {
- "sometimes access memory with incorrect alignment",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
- BPF_EXIT_INSN(),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup = {3},
- .errstr = "R0 invalid mem access",
- .errstr_unpriv = "R0 leaks addr",
- .result = REJECT,
- },
- {
- "jump test 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 3),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "jump test 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 14),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 11),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 5),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "jump test 3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JA, 0, 0, 19),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 3),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
- BPF_JMP_IMM(BPF_JA, 0, 0, 15),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 3),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32),
- BPF_JMP_IMM(BPF_JA, 0, 0, 11),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 3),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -40),
- BPF_JMP_IMM(BPF_JA, 0, 0, 7),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 3),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -56),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
- BPF_EXIT_INSN(),
- },
- .fixup = {24},
- .errstr_unpriv = "R1 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "jump test 4",
- .insns = {
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "jump test 5",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
- BPF_JMP_IMM(BPF_JA, 0, 0, 2),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
- BPF_JMP_IMM(BPF_JA, 0, 0, 2),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
- BPF_JMP_IMM(BPF_JA, 0, 0, 2),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
- BPF_JMP_IMM(BPF_JA, 0, 0, 2),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
- BPF_JMP_IMM(BPF_JA, 0, 0, 2),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "access skb fields ok",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, pkt_type)),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, queue_mapping)),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, protocol)),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, vlan_present)),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, vlan_tci)),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- },
- {
- "access skb fields bad1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -4),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .result = REJECT,
- },
- {
- "access skb fields bad2",
- .insns = {
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, pkt_type)),
- BPF_EXIT_INSN(),
- },
- .fixup = {4},
- .errstr = "different pointers",
- .errstr_unpriv = "R1 pointer comparison",
- .result = REJECT,
- },
- {
- "access skb fields bad3",
- .insns = {
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, pkt_type)),
- BPF_EXIT_INSN(),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JA, 0, 0, -12),
- },
- .fixup = {6},
- .errstr = "different pointers",
- .errstr_unpriv = "R1 pointer comparison",
- .result = REJECT,
- },
- {
- "access skb fields bad4",
- .insns = {
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 3),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JA, 0, 0, -13),
- },
- .fixup = {7},
- .errstr = "different pointers",
- .errstr_unpriv = "R1 pointer comparison",
- .result = REJECT,
- },
- {
- "check skb->mark is not writeable by sockets",
- .insns = {
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .errstr_unpriv = "R1 leaks addr",
- .result = REJECT,
- },
- {
- "check skb->tc_index is not writeable by sockets",
- .insns = {
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
- offsetof(struct __sk_buff, tc_index)),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .errstr_unpriv = "R1 leaks addr",
- .result = REJECT,
- },
- {
- "check non-u32 access to cb",
- .insns = {
- BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .errstr_unpriv = "R1 leaks addr",
- .result = REJECT,
- },
- {
- "check out of range skb->cb access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0]) + 256),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .errstr_unpriv = "",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_ACT,
- },
- {
- "write skb fields from socket prog",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[4])),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_index)),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
- offsetof(struct __sk_buff, cb[2])),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .errstr_unpriv = "R1 leaks addr",
- .result_unpriv = REJECT,
- },
- {
- "write skb fields from tc_cls_act prog",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_index)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, tc_index)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[3])),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "PTR_TO_STACK store/load",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- },
- {
- "PTR_TO_STACK store/load - bad alignment on off",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "misaligned access off -6 size 8",
- },
- {
- "PTR_TO_STACK store/load - bad alignment on reg",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "misaligned access off -2 size 8",
- },
- {
- "PTR_TO_STACK store/load - out of bounds low",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack off=-79992 size=8",
- },
- {
- "PTR_TO_STACK store/load - out of bounds high",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack off=0 size=8",
- },
- {
- "unpriv: return pointer",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 leaks addr",
- },
- {
- "unpriv: add const to pointer",
- .insns = {
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 pointer arithmetic",
- },
- {
- "unpriv: add pointer to pointer",
- .insns = {
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 pointer arithmetic",
- },
- {
- "unpriv: neg pointer",
- .insns = {
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 pointer arithmetic",
- },
- {
- "unpriv: cmp pointer with const",
- .insns = {
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 pointer comparison",
- },
- {
- "unpriv: cmp pointer with pointer",
- .insns = {
- BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R10 pointer comparison",
- },
- {
- "unpriv: check that printk is disallowed",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_trace_printk),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "unknown func 6",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "unpriv: pass pointer to helper function",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {3},
- .errstr_unpriv = "R4 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "unpriv: indirectly pass pointer on stack to helper function",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {3},
- .errstr = "invalid indirect read from stack off -8+0 size 8",
- .result = REJECT,
- },
- {
- "unpriv: mangle pointer on stack 1",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "unpriv: mangle pointer on stack 2",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
- BPF_ST_MEM(BPF_B, BPF_REG_10, -1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "unpriv: read pointer from stack in small chunks",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid size",
- .result = REJECT,
- },
- {
- "unpriv: write pointer into ctx",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 leaks addr",
- .result_unpriv = REJECT,
- .errstr = "invalid bpf_context access",
- .result = REJECT,
- },
- {
- "unpriv: write pointer into map elem value",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {3},
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "unpriv: partial copy of pointer",
- .insns = {
- BPF_MOV32_REG(BPF_REG_1, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R10 partial copy",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "unpriv: pass pointer to tail_call",
- .insns = {
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_array_fixup = {1},
- .errstr_unpriv = "R3 leaks addr into helper",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "unpriv: cmp map pointer with zero",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {1},
- .errstr_unpriv = "R1 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "unpriv: write into frame pointer",
- .insns = {
- BPF_MOV64_REG(BPF_REG_10, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "frame pointer is read only",
- .result = REJECT,
- },
- {
- "unpriv: cmp of frame pointer",
- .insns = {
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R10 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "unpriv: cmp of stack pointer",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R2 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "unpriv: obfuscate stack pointer",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R2 pointer arithmetic",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "raw_stack: no skb_load_bytes",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- /* Call to skb_load_bytes() omitted. */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid read from stack off -8+0 size 8",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, negative len",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, negative len 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, ~0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, zero len",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, no init",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, init",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xcafe),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, spilled regs around bounds",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), /* spill ctx from R1 */
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), /* spill ctx from R1 */
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), /* fill ctx into R0 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), /* fill ctx into R2 */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
- offsetof(struct __sk_buff, priority)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, spilled regs corruption",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), /* spill ctx from R1 */
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), /* fill ctx into R0 */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 invalid mem access 'inv'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, spilled regs corruption 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), /* spill ctx from R1 */
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), /* spill ctx from R1 */
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), /* spill ctx from R1 */
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), /* fill ctx into R0 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), /* fill ctx into R2 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), /* fill ctx into R3 */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
- offsetof(struct __sk_buff, priority)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_3,
- offsetof(struct __sk_buff, pkt_type)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R3 invalid mem access 'inv'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, spilled regs + data",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), /* spill ctx from R1 */
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), /* spill ctx from R1 */
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), /* spill ctx from R1 */
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), /* fill ctx into R0 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), /* fill ctx into R2 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), /* fill data into R3 */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
- offsetof(struct __sk_buff, priority)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, invalid access 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -513),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3 off=-513 access_size=8",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, invalid access 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3 off=-1 access_size=8",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, invalid access 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 0xffffffff),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3 off=-1 access_size=-1",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, invalid access 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3 off=-1 access_size=2147483647",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, invalid access 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3 off=-512 access_size=2147483647",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, invalid access 6",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3 off=-512 access_size=0",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "raw_stack: skb_load_bytes, large access",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 512),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "direct packet access: test1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "direct packet access: test2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 15),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 7),
- BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_3, 12),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 14),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 48),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 48),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_3, 4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "direct packet access: test3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access off=76",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- },
- {
- "direct packet access: test4 (write)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "direct packet access: test5 (pkt_end >= reg, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "direct packet access: test6 (pkt_end >= reg, bad access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid access to packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "direct packet access: test7 (pkt_end >= reg, both accesses)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid access to packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "direct packet access: test8 (double test, variant 1)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "direct packet access: test9 (double test, variant 2)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "direct packet access: test10 (write invalid)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid access to packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test1, valid packet_ptr range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {5},
- .result_unpriv = ACCEPT,
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- },
- {
- "helper access to packet: test2, unchecked packet_ptr",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data)),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {1},
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
- },
- {
- "helper access to packet: test3, variable add",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
- BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {11},
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- },
- {
- "helper access to packet: test4, packet_ptr with bad range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {7},
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
- },
- {
- "helper access to packet: test5, packet_ptr with too short range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {6},
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
- },
- {
- "helper access to packet: test6, cls valid packet_ptr range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {5},
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test7, cls unchecked packet_ptr",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {1},
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test8, cls variable add",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
- BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {11},
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test9, cls packet_ptr with bad range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {7},
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test10, cls packet_ptr with too short range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup = {6},
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test11, cls unsuitable helper 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_4, 42),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_store_bytes),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "helper access to the packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test12, cls unsuitable helper 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_4, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "helper access to the packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test13, cls helper ok",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test14, cls helper fail sub",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "type=inv expected=fp",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test15, cls helper fail range 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test16, cls helper fail range 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, -9),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test17, cls helper fail range 3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, ~0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test18, cls helper fail range zero",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test19, pkt end as input",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R1 type=pkt_end expected=fp",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "helper access to packet: test20, wrong reg",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- },
- {
- "valid map access into an array with a constant",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .test_val_map_fixup = {3},
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "valid map access into an array with a register",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .test_val_map_fixup = {3},
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "valid map access into an array with a variable",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .test_val_map_fixup = {3},
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "valid map access into an array with a signed variable",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .test_val_map_fixup = {3},
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- },
- {
- "invalid map access into an array with a constant",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2,
- offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .test_val_map_fixup = {3},
- .errstr = "invalid access to map value, value_size=48 off=48 size=8",
- .result = REJECT,
- },
- {
- "invalid map access into an array with a register",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .test_val_map_fixup = {3},
- .errstr = "R0 min value is outside of the array range",
- .result = REJECT,
- },
- {
- "invalid map access into an array with a variable",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .test_val_map_fixup = {3},
- .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
- .result = REJECT,
- },
- {
- "invalid map access into an array with no floor check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .test_val_map_fixup = {3},
- .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
- .result = REJECT,
- },
- {
- "invalid map access into an array with a invalid max check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .test_val_map_fixup = {3},
- .errstr = "invalid access to map value, value_size=48 off=44 size=8",
- .result = REJECT,
- },
- {
- "invalid map access into an array with a invalid max check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .test_val_map_fixup = {3, 11},
- .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
- .result = REJECT,
- },
-};
-
-static int probe_filter_length(struct bpf_insn *fp)
-{
- int len = 0;
-
- for (len = MAX_INSNS - 1; len > 0; --len)
- if (fp[len].code != 0 || fp[len].imm != 0)
- break;
-
- return len + 1;
-}
-
-static int create_map(size_t val_size, int num)
-{
- int map_fd;
-
- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
- sizeof(long long), val_size, num, 0);
- if (map_fd < 0)
- printf("failed to create map '%s'\n", strerror(errno));
-
- return map_fd;
-}
-
-static int create_prog_array(void)
-{
- int map_fd;
-
- map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY,
- sizeof(int), sizeof(int), 4, 0);
- if (map_fd < 0)
- printf("failed to create prog_array '%s'\n", strerror(errno));
-
- return map_fd;
-}
-
-static int test(void)
-{
- int prog_fd, i, pass_cnt = 0, err_cnt = 0;
- bool unpriv = geteuid() != 0;
-
- for (i = 0; i < ARRAY_SIZE(tests); i++) {
- struct bpf_insn *prog = tests[i].insns;
- int prog_type = tests[i].prog_type;
- int prog_len = probe_filter_length(prog);
- int *fixup = tests[i].fixup;
- int *prog_array_fixup = tests[i].prog_array_fixup;
- int *test_val_map_fixup = tests[i].test_val_map_fixup;
- int expected_result;
- const char *expected_errstr;
- int map_fd = -1, prog_array_fd = -1, test_val_map_fd = -1;
-
- if (*fixup) {
- map_fd = create_map(sizeof(long long), 1024);
-
- do {
- prog[*fixup].imm = map_fd;
- fixup++;
- } while (*fixup);
- }
- if (*prog_array_fixup) {
- prog_array_fd = create_prog_array();
-
- do {
- prog[*prog_array_fixup].imm = prog_array_fd;
- prog_array_fixup++;
- } while (*prog_array_fixup);
- }
- if (*test_val_map_fixup) {
- /* Unprivileged can't create a hash map.*/
- if (unpriv)
- continue;
- test_val_map_fd = create_map(sizeof(struct test_val),
- 256);
- do {
- prog[*test_val_map_fixup].imm = test_val_map_fd;
- test_val_map_fixup++;
- } while (*test_val_map_fixup);
- }
-
- printf("#%d %s ", i, tests[i].descr);
-
- prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER,
- prog, prog_len * sizeof(struct bpf_insn),
- "GPL", 0);
-
- if (unpriv && tests[i].result_unpriv != UNDEF)
- expected_result = tests[i].result_unpriv;
- else
- expected_result = tests[i].result;
-
- if (unpriv && tests[i].errstr_unpriv)
- expected_errstr = tests[i].errstr_unpriv;
- else
- expected_errstr = tests[i].errstr;
-
- if (expected_result == ACCEPT) {
- if (prog_fd < 0) {
- printf("FAIL\nfailed to load prog '%s'\n",
- strerror(errno));
- printf("%s", bpf_log_buf);
- err_cnt++;
- goto fail;
- }
- } else {
- if (prog_fd >= 0) {
- printf("FAIL\nunexpected success to load\n");
- printf("%s", bpf_log_buf);
- err_cnt++;
- goto fail;
- }
- if (strstr(bpf_log_buf, expected_errstr) == 0) {
- printf("FAIL\nunexpected error message: %s",
- bpf_log_buf);
- err_cnt++;
- goto fail;
- }
- }
-
- pass_cnt++;
- printf("OK\n");
-fail:
- if (map_fd >= 0)
- close(map_fd);
- if (prog_array_fd >= 0)
- close(prog_array_fd);
- if (test_val_map_fd >= 0)
- close(test_val_map_fd);
- close(prog_fd);
-
- }
- printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt);
-
- return 0;
-}
-
-int main(void)
-{
- struct rlimit r = {1 << 20, 1 << 20};
-
- setrlimit(RLIMIT_MEMLOCK, &r);
- return test();
-}
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index 71a8ed3..41b6115 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -50,7 +50,7 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
key.userstack = bpf_get_stackid(ctx, &stackmap, USER_STACKID_FLAGS);
if ((int)key.kernstack < 0 && (int)key.userstack < 0) {
bpf_trace_printk(fmt, sizeof(fmt), cpu, ctx->sample_period,
- ctx->regs.ip);
+ PT_REGS_IP(&ctx->regs));
return 0;
}
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index ab5b19e..3e225e3 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -4,8 +4,10 @@
#include <signal.h>
#include <linux/bpf.h>
#include <string.h>
+
#include "libbpf.h"
#include "bpf_load.h"
+#include "bpf_util.h"
#define MAX_INDEX 64
#define MAX_STARS 38
@@ -36,8 +38,8 @@ struct hist_key {
static void print_hist_for_pid(int fd, void *task)
{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
struct hist_key key = {}, next_key;
- unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
long values[nr_cpus];
char starstr[MAX_STARS];
long value;
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index 48716f7..d0851cb 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -11,8 +11,10 @@
#include <stdbool.h>
#include <string.h>
#include <linux/bpf.h>
+
#include "libbpf.h"
#include "bpf_load.h"
+#include "bpf_util.h"
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
@@ -20,7 +22,7 @@
static void clear_stats(int fd)
{
- unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ unsigned int nr_cpus = bpf_num_possible_cpus();
__u64 values[nr_cpus];
__u32 key;
@@ -77,7 +79,7 @@ static void print_banner(void)
static void print_hist(int fd)
{
- unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ unsigned int nr_cpus = bpf_num_possible_cpus();
__u64 total_events = 0;
long values[nr_cpus];
__u64 max_cnt = 0;
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index a5e109e..5f040a0 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -5,109 +5,18 @@
* License as published by the Free Software Foundation.
*/
#include <linux/bpf.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
#include <assert.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/socket.h>
#include <unistd.h>
+
#include "bpf_load.h"
+#include "bpf_util.h"
#include "libbpf.h"
-static int set_link_xdp_fd(int ifindex, int fd)
-{
- struct sockaddr_nl sa;
- int sock, seq = 0, len, ret = -1;
- char buf[4096];
- struct nlattr *nla, *nla_xdp;
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifinfo;
- char attrbuf[64];
- } req;
- struct nlmsghdr *nh;
- struct nlmsgerr *err;
-
- memset(&sa, 0, sizeof(sa));
- sa.nl_family = AF_NETLINK;
-
- sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
- if (sock < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
- }
-
- if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- memset(&req, 0, sizeof(req));
- req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
- req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
- req.nh.nlmsg_type = RTM_SETLINK;
- req.nh.nlmsg_pid = 0;
- req.nh.nlmsg_seq = ++seq;
- req.ifinfo.ifi_family = AF_UNSPEC;
- req.ifinfo.ifi_index = ifindex;
- nla = (struct nlattr *)(((char *)&req)
- + NLMSG_ALIGN(req.nh.nlmsg_len));
- nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
-
- nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
- nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
- nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
-
- req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
- if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
- printf("send to netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- len = recv(sock, buf, sizeof(buf), 0);
- if (len < 0) {
- printf("recv from netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
- nh = NLMSG_NEXT(nh, len)) {
- if (nh->nlmsg_pid != getpid()) {
- printf("Wrong pid %d, expected %d\n",
- nh->nlmsg_pid, getpid());
- goto cleanup;
- }
- if (nh->nlmsg_seq != seq) {
- printf("Wrong seq %d, expected %d\n",
- nh->nlmsg_seq, seq);
- goto cleanup;
- }
- switch (nh->nlmsg_type) {
- case NLMSG_ERROR:
- err = (struct nlmsgerr *)NLMSG_DATA(nh);
- if (!err->error)
- continue;
- printf("nlmsg error %s\n", strerror(-err->error));
- goto cleanup;
- case NLMSG_DONE:
- break;
- }
- }
-
- ret = 0;
-
-cleanup:
- close(sock);
- return ret;
-}
-
static int ifindex;
static void int_exit(int sig)
@@ -120,7 +29,7 @@ static void int_exit(int sig)
*/
static void poll_stats(int interval)
{
- unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ unsigned int nr_cpus = bpf_num_possible_cpus();
const unsigned int nr_keys = 256;
__u64 values[nr_cpus], prev[nr_keys][nr_cpus];
__u32 key;
diff --git a/samples/bpf/xdp_tx_iptunnel_common.h b/samples/bpf/xdp_tx_iptunnel_common.h
new file mode 100644
index 0000000..dd12cc3
--- /dev/null
+++ b/samples/bpf/xdp_tx_iptunnel_common.h
@@ -0,0 +1,37 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
+#define _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
+
+#include <linux/types.h>
+
+#define MAX_IPTNL_ENTRIES 256U
+
+struct vip {
+ union {
+ __u32 v6[4];
+ __u32 v4;
+ } daddr;
+ __u16 dport;
+ __u16 family;
+ __u8 protocol;
+};
+
+struct iptnl_info {
+ union {
+ __u32 v6[4];
+ __u32 v4;
+ } saddr;
+ union {
+ __u32 v6[4];
+ __u32 v4;
+ } daddr;
+ __u16 family;
+ __u8 dmac[6];
+};
+
+#endif
diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c
new file mode 100644
index 0000000..85c38ec
--- /dev/null
+++ b/samples/bpf/xdp_tx_iptunnel_kern.c
@@ -0,0 +1,236 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program shows how to use bpf_xdp_adjust_head() by
+ * encapsulating the incoming packet in an IPv4/v6 header
+ * and then XDP_TX it out.
+ */
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+#include "xdp_tx_iptunnel_common.h"
+
+struct bpf_map_def SEC("maps") rxcnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 256,
+};
+
+struct bpf_map_def SEC("maps") vip2tnl = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct iptnl_info),
+ .max_entries = MAX_IPTNL_ENTRIES,
+};
+
+static __always_inline void count_tx(u32 protocol)
+{
+ u64 *rxcnt_count;
+
+ rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+ if (rxcnt_count)
+ *rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+ u8 protocol)
+{
+ struct tcphdr *th;
+ struct udphdr *uh;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)trans_data;
+ if (th + 1 > data_end)
+ return -1;
+ return th->dest;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)trans_data;
+ if (uh + 1 > data_end)
+ return -1;
+ return uh->dest;
+ default:
+ return 0;
+ }
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+ const struct ethhdr *old_eth,
+ const struct iptnl_info *tnl,
+ __be16 h_proto)
+{
+ memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+ memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+ new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct iphdr *iph = data + sizeof(struct ethhdr);
+ u16 *next_iph_u16;
+ u16 payload_len;
+ struct vip vip = {};
+ int dport;
+ u32 csum = 0;
+ int i;
+
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+
+ dport = get_dport(iph + 1, data_end, iph->protocol);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = iph->protocol;
+ vip.family = AF_INET;
+ vip.daddr.v4 = iph->daddr;
+ vip.dport = dport;
+ payload_len = ntohs(iph->tot_len);
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v4-in-v4 */
+ if (!tnl || tnl->family != AF_INET)
+ return XDP_PASS;
+
+ /* The vip key is found. Add an IP header and send it out */
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+
+ new_eth = data;
+ iph = data + sizeof(*new_eth);
+ old_eth = data + sizeof(*iph);
+
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end ||
+ iph + 1 > data_end)
+ return XDP_DROP;
+
+ set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP));
+
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) >> 2;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 0;
+ iph->tot_len = htons(payload_len + sizeof(*iph));
+ iph->daddr = tnl->daddr.v4;
+ iph->saddr = tnl->saddr.v4;
+ iph->ttl = 8;
+
+ next_iph_u16 = (u16 *)iph;
+#pragma clang loop unroll(full)
+ for (i = 0; i < sizeof(*iph) >> 1; i++)
+ csum += *next_iph_u16++;
+
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = ip6h->nexthdr;
+ vip.family = AF_INET6;
+ memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+ vip.dport = dport;
+ payload_len = ip6h->payload_len;
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v6-in-v6 */
+ if (!tnl || tnl->family != AF_INET6)
+ return XDP_PASS;
+
+ /* The vip key is found. Add an IP header and send it out */
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+
+ new_eth = data;
+ ip6h = data + sizeof(*new_eth);
+ old_eth = data + sizeof(*ip6h);
+
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end ||
+ ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IPV6));
+
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+ ip6h->payload_len = htons(ntohs(payload_len) + sizeof(*ip6h));
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip6h->hop_limit = 8;
+ memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+ memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+SEC("xdp_tx_iptunnel")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == htons(ETH_P_IP))
+ return handle_ipv4(xdp);
+ else if (h_proto == htons(ETH_P_IPV6))
+
+ return handle_ipv6(xdp);
+ else
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
new file mode 100644
index 0000000..7a71f5c
--- /dev/null
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -0,0 +1,256 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <arpa/inet.h>
+#include <netinet/ether.h>
+#include <unistd.h>
+#include <time.h>
+#include "bpf_load.h"
+#include "libbpf.h"
+#include "bpf_util.h"
+#include "xdp_tx_iptunnel_common.h"
+
+#define STATS_INTERVAL_S 2U
+
+static int ifindex = -1;
+
+static void int_exit(int sig)
+{
+ if (ifindex > -1)
+ set_link_xdp_fd(ifindex, -1);
+ exit(0);
+}
+
+/* simple per-protocol drop counter
+ */
+static void poll_stats(unsigned int kill_after_s)
+{
+ const unsigned int nr_protos = 256;
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ time_t started_at = time(NULL);
+ __u64 values[nr_cpus], prev[nr_protos][nr_cpus];
+ __u32 proto;
+ int i;
+
+ memset(prev, 0, sizeof(prev));
+
+ while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
+ sleep(STATS_INTERVAL_S);
+
+ for (proto = 0; proto < nr_protos; proto++) {
+ __u64 sum = 0;
+
+ assert(bpf_lookup_elem(map_fd[0], &proto, values) == 0);
+ for (i = 0; i < nr_cpus; i++)
+ sum += (values[i] - prev[proto][i]);
+
+ if (sum)
+ printf("proto %u: sum:%10llu pkts, rate:%10llu pkts/s\n",
+ proto, sum, sum / STATS_INTERVAL_S);
+ memcpy(prev[proto], values, sizeof(values));
+ }
+ }
+}
+
+static void usage(const char *cmd)
+{
+ printf("Start a XDP prog which encapsulates incoming packets\n"
+ "in an IPv4/v6 header and XDP_TX it out. The dst <VIP:PORT>\n"
+ "is used to select packets to encapsulate\n\n");
+ printf("Usage: %s [...]\n", cmd);
+ printf(" -i <ifindex> Interface Index\n");
+ printf(" -a <vip-service-address> IPv4 or IPv6\n");
+ printf(" -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n");
+ printf(" -s <source-ip> Used in the IPTunnel header\n");
+ printf(" -d <dest-ip> Used in the IPTunnel header\n");
+ printf(" -m <dest-MAC> Used in sending the IP Tunneled pkt\n");
+ printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
+ printf(" -P <IP-Protocol> Default is TCP\n");
+ printf(" -h Display this help\n");
+}
+
+static int parse_ipstr(const char *ipstr, unsigned int *addr)
+{
+ if (inet_pton(AF_INET6, ipstr, addr) == 1) {
+ return AF_INET6;
+ } else if (inet_pton(AF_INET, ipstr, addr) == 1) {
+ addr[1] = addr[2] = addr[3] = 0;
+ return AF_INET;
+ }
+
+ fprintf(stderr, "%s is an invalid IP\n", ipstr);
+ return AF_UNSPEC;
+}
+
+static int parse_ports(const char *port_str, int *min_port, int *max_port)
+{
+ char *end;
+ long tmp_min_port;
+ long tmp_max_port;
+
+ tmp_min_port = strtol(optarg, &end, 10);
+ if (tmp_min_port < 1 || tmp_min_port > 65535) {
+ fprintf(stderr, "Invalid port(s):%s\n", optarg);
+ return 1;
+ }
+
+ if (*end == '-') {
+ end++;
+ tmp_max_port = strtol(end, NULL, 10);
+ if (tmp_max_port < 1 || tmp_max_port > 65535) {
+ fprintf(stderr, "Invalid port(s):%s\n", optarg);
+ return 1;
+ }
+ } else {
+ tmp_max_port = tmp_min_port;
+ }
+
+ if (tmp_min_port > tmp_max_port) {
+ fprintf(stderr, "Invalid port(s):%s\n", optarg);
+ return 1;
+ }
+
+ if (tmp_max_port - tmp_min_port + 1 > MAX_IPTNL_ENTRIES) {
+ fprintf(stderr, "Port range (%s) is larger than %u\n",
+ port_str, MAX_IPTNL_ENTRIES);
+ return 1;
+ }
+ *min_port = tmp_min_port;
+ *max_port = tmp_max_port;
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ unsigned char opt_flags[256] = {};
+ unsigned int kill_after_s = 0;
+ const char *optstr = "i:a:p:s:d:m:T:P:h";
+ int min_port = 0, max_port = 0;
+ struct iptnl_info tnl = {};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct vip vip = {};
+ char filename[256];
+ int opt;
+ int i;
+
+ tnl.family = AF_UNSPEC;
+ vip.protocol = IPPROTO_TCP;
+
+ for (i = 0; i < strlen(optstr); i++)
+ if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
+ opt_flags[(unsigned char)optstr[i]] = 1;
+
+ while ((opt = getopt(argc, argv, optstr)) != -1) {
+ unsigned short family;
+ unsigned int *v6;
+
+ switch (opt) {
+ case 'i':
+ ifindex = atoi(optarg);
+ break;
+ case 'a':
+ vip.family = parse_ipstr(optarg, vip.daddr.v6);
+ if (vip.family == AF_UNSPEC)
+ return 1;
+ break;
+ case 'p':
+ if (parse_ports(optarg, &min_port, &max_port))
+ return 1;
+ break;
+ case 'P':
+ vip.protocol = atoi(optarg);
+ break;
+ case 's':
+ case 'd':
+ if (opt == 's')
+ v6 = tnl.saddr.v6;
+ else
+ v6 = tnl.daddr.v6;
+
+ family = parse_ipstr(optarg, v6);
+ if (family == AF_UNSPEC)
+ return 1;
+ if (tnl.family == AF_UNSPEC) {
+ tnl.family = family;
+ } else if (tnl.family != family) {
+ fprintf(stderr,
+ "The IP version of the src and dst addresses used in the IP encapsulation does not match\n");
+ return 1;
+ }
+ break;
+ case 'm':
+ if (!ether_aton_r(optarg,
+ (struct ether_addr *)tnl.dmac)) {
+ fprintf(stderr, "Invalid mac address:%s\n",
+ optarg);
+ return 1;
+ }
+ break;
+ case 'T':
+ kill_after_s = atoi(optarg);
+ break;
+ default:
+ usage(argv[0]);
+ return 1;
+ }
+ opt_flags[opt] = 0;
+ }
+
+ for (i = 0; i < strlen(optstr); i++) {
+ if (opt_flags[(unsigned int)optstr[i]]) {
+ fprintf(stderr, "Missing argument -%c\n", optstr[i]);
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
+ return 1;
+ }
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ if (!prog_fd[0]) {
+ printf("load_bpf_file: %s\n", strerror(errno));
+ return 1;
+ }
+
+ signal(SIGINT, int_exit);
+
+ while (min_port <= max_port) {
+ vip.dport = htons(min_port++);
+ if (bpf_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) {
+ perror("bpf_update_elem(&vip2tnl)");
+ return 1;
+ }
+ }
+
+ if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) {
+ printf("link set xdp fd failed\n");
+ return 1;
+ }
+
+ poll_stats(kill_after_s);
+
+ set_link_xdp_fd(ifindex, -1);
+
+ return 0;
+}
diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
index ae7ff6f..bf7cc6b 100644
--- a/samples/seccomp/Makefile
+++ b/samples/seccomp/Makefile
@@ -36,13 +36,13 @@ HOSTLOADLIBES_bpf-direct += $(MFLAG)
HOSTLOADLIBES_bpf-fancy += $(MFLAG)
HOSTLOADLIBES_dropper += $(MFLAG)
endif
-always := $(hostprogs-y)
+always := $(hostprogs-m)
else
# MIPS system calls are defined based on the -mabi that is passed
# to the toolchain which may or may not be a valid option
# for the host toolchain. So disable tests if target architecture
# is MIPS but the host isn't.
ifndef CONFIG_MIPS
-always := $(hostprogs-y)
+always := $(hostprogs-m)
endif
endif
diff --git a/samples/seccomp/bpf-helper.c b/samples/seccomp/bpf-helper.c
index 05cb4d5..1ef0f4d 100644
--- a/samples/seccomp/bpf-helper.c
+++ b/samples/seccomp/bpf-helper.c
@@ -18,41 +18,41 @@
int bpf_resolve_jumps(struct bpf_labels *labels,
struct sock_filter *filter, size_t count)
{
- struct sock_filter *begin = filter;
- __u8 insn = count - 1;
+ size_t i;
- if (count < 1)
+ if (count < 1 || count > BPF_MAXINSNS)
return -1;
/*
* Walk it once, backwards, to build the label table and do fixups.
* Since backward jumps are disallowed by BPF, this is easy.
*/
- filter += insn;
- for (; filter >= begin; --insn, --filter) {
- if (filter->code != (BPF_JMP+BPF_JA))
+ for (i = 0; i < count; ++i) {
+ size_t offset = count - i - 1;
+ struct sock_filter *instr = &filter[offset];
+ if (instr->code != (BPF_JMP+BPF_JA))
continue;
- switch ((filter->jt<<8)|filter->jf) {
+ switch ((instr->jt<<8)|instr->jf) {
case (JUMP_JT<<8)|JUMP_JF:
- if (labels->labels[filter->k].location == 0xffffffff) {
+ if (labels->labels[instr->k].location == 0xffffffff) {
fprintf(stderr, "Unresolved label: '%s'\n",
- labels->labels[filter->k].label);
+ labels->labels[instr->k].label);
return 1;
}
- filter->k = labels->labels[filter->k].location -
- (insn + 1);
- filter->jt = 0;
- filter->jf = 0;
+ instr->k = labels->labels[instr->k].location -
+ (offset + 1);
+ instr->jt = 0;
+ instr->jf = 0;
continue;
case (LABEL_JT<<8)|LABEL_JF:
- if (labels->labels[filter->k].location != 0xffffffff) {
+ if (labels->labels[instr->k].location != 0xffffffff) {
fprintf(stderr, "Duplicate label use: '%s'\n",
- labels->labels[filter->k].label);
+ labels->labels[instr->k].label);
return 1;
}
- labels->labels[filter->k].location = insn;
- filter->k = 0; /* fall through */
- filter->jt = 0;
- filter->jf = 0;
+ labels->labels[instr->k].location = offset;
+ instr->k = 0; /* fall through */
+ instr->jt = 0;
+ instr->jf = 0;
continue;
}
}
diff --git a/samples/seccomp/dropper.c b/samples/seccomp/dropper.c
index c69c347..68325ca 100644
--- a/samples/seccomp/dropper.c
+++ b/samples/seccomp/dropper.c
@@ -11,7 +11,6 @@
* When run, returns the specified errno for the specified
* system call number against the given architecture.
*
- * Run this one as root as PR_SET_NO_NEW_PRIVS is not called.
*/
#include <errno.h>
@@ -42,8 +41,12 @@ static int install_filter(int nr, int arch, int error)
.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
.filter = filter,
};
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ perror("prctl(NO_NEW_PRIVS)");
+ return 1;
+ }
if (prctl(PR_SET_SECCOMP, 2, &prog)) {
- perror("prctl");
+ perror("prctl(PR_SET_SECCOMP)");
return 1;
}
return 0;
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index 880a7d1..30e282d 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -79,7 +79,7 @@ static int simple_thread_fn(void *arg)
static DEFINE_MUTEX(thread_mutex);
-void foo_bar_reg(void)
+int foo_bar_reg(void)
{
pr_info("Starting thread for foo_bar_fn\n");
/*
@@ -90,6 +90,7 @@ void foo_bar_reg(void)
mutex_lock(&thread_mutex);
simple_tsk_fn = kthread_run(simple_thread_fn, NULL, "event-sample-fn");
mutex_unlock(&thread_mutex);
+ return 0;
}
void foo_bar_unreg(void)
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index d6b75bb..76a75ab 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -354,7 +354,7 @@ TRACE_EVENT_CONDITION(foo_bar_with_cond,
TP_printk("foo %s %d", __get_str(foo), __entry->bar)
);
-void foo_bar_reg(void);
+int foo_bar_reg(void);
void foo_bar_unreg(void);
/*
diff --git a/samples/vfio-mdev/Makefile b/samples/vfio-mdev/Makefile
new file mode 100644
index 0000000..a932edb
--- /dev/null
+++ b/samples/vfio-mdev/Makefile
@@ -0,0 +1,13 @@
+#
+# Makefile for mtty.c file
+#
+KERNEL_DIR:=/lib/modules/$(shell uname -r)/build
+
+obj-m:=mtty.o
+
+modules clean modules_install:
+ $(MAKE) -C $(KERNEL_DIR) SUBDIRS=$(PWD) $@
+
+default: modules
+
+module: modules
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
new file mode 100644
index 0000000..6b633a4
--- /dev/null
+++ b/samples/vfio-mdev/mtty.c
@@ -0,0 +1,1503 @@
+/*
+ * Mediated virtual PCI serial host device driver
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ * Author: Neo Jia <cjia@nvidia.com>
+ * Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Sample driver that creates mdev device that simulates serial port over PCI
+ * card.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/uuid.h>
+#include <linux/vfio.h>
+#include <linux/iommu.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/file.h>
+#include <linux/mdev.h>
+#include <linux/pci.h>
+#include <linux/serial.h>
+#include <uapi/linux/serial_reg.h>
+#include <linux/eventfd.h>
+/*
+ * #defines
+ */
+
+#define VERSION_STRING "0.1"
+#define DRIVER_AUTHOR "NVIDIA Corporation"
+
+#define MTTY_CLASS_NAME "mtty"
+
+#define MTTY_NAME "mtty"
+
+#define MTTY_STRING_LEN 16
+
+#define MTTY_CONFIG_SPACE_SIZE 0xff
+#define MTTY_IO_BAR_SIZE 0x8
+#define MTTY_MMIO_BAR_SIZE 0x100000
+
+#define STORE_LE16(addr, val) (*(u16 *)addr = val)
+#define STORE_LE32(addr, val) (*(u32 *)addr = val)
+
+#define MAX_FIFO_SIZE 16
+
+#define CIRCULAR_BUF_INC_IDX(idx) (idx = (idx + 1) & (MAX_FIFO_SIZE - 1))
+
+#define MTTY_VFIO_PCI_OFFSET_SHIFT 40
+
+#define MTTY_VFIO_PCI_OFFSET_TO_INDEX(off) (off >> MTTY_VFIO_PCI_OFFSET_SHIFT)
+#define MTTY_VFIO_PCI_INDEX_TO_OFFSET(index) \
+ ((u64)(index) << MTTY_VFIO_PCI_OFFSET_SHIFT)
+#define MTTY_VFIO_PCI_OFFSET_MASK \
+ (((u64)(1) << MTTY_VFIO_PCI_OFFSET_SHIFT) - 1)
+#define MAX_MTTYS 24
+
+/*
+ * Global Structures
+ */
+
+struct mtty_dev {
+ dev_t vd_devt;
+ struct class *vd_class;
+ struct cdev vd_cdev;
+ struct idr vd_idr;
+ struct device dev;
+} mtty_dev;
+
+struct mdev_region_info {
+ u64 start;
+ u64 phys_start;
+ u32 size;
+ u64 vfio_offset;
+};
+
+#if defined(DEBUG_REGS)
+const char *wr_reg[] = {
+ "TX",
+ "IER",
+ "FCR",
+ "LCR",
+ "MCR",
+ "LSR",
+ "MSR",
+ "SCR"
+};
+
+const char *rd_reg[] = {
+ "RX",
+ "IER",
+ "IIR",
+ "LCR",
+ "MCR",
+ "LSR",
+ "MSR",
+ "SCR"
+};
+#endif
+
+/* loop back buffer */
+struct rxtx {
+ u8 fifo[MAX_FIFO_SIZE];
+ u8 head, tail;
+ u8 count;
+};
+
+struct serial_port {
+ u8 uart_reg[8]; /* 8 registers */
+ struct rxtx rxtx; /* loop back buffer */
+ bool dlab;
+ bool overrun;
+ u16 divisor;
+ u8 fcr; /* FIFO control register */
+ u8 max_fifo_size;
+ u8 intr_trigger_level; /* interrupt trigger level */
+};
+
+/* State of each mdev device */
+struct mdev_state {
+ int irq_fd;
+ struct eventfd_ctx *intx_evtfd;
+ struct eventfd_ctx *msi_evtfd;
+ int irq_index;
+ u8 *vconfig;
+ struct mutex ops_lock;
+ struct mdev_device *mdev;
+ struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS];
+ u32 bar_mask[VFIO_PCI_NUM_REGIONS];
+ struct list_head next;
+ struct serial_port s[2];
+ struct mutex rxtx_lock;
+ struct vfio_device_info dev_info;
+ int nr_ports;
+};
+
+struct mutex mdev_list_lock;
+struct list_head mdev_devices_list;
+
+static const struct file_operations vd_fops = {
+ .owner = THIS_MODULE,
+};
+
+/* function prototypes */
+
+static int mtty_trigger_interrupt(uuid_le uuid);
+
+/* Helper functions */
+static struct mdev_state *find_mdev_state_by_uuid(uuid_le uuid)
+{
+ struct mdev_state *mds;
+
+ list_for_each_entry(mds, &mdev_devices_list, next) {
+ if (uuid_le_cmp(mds->mdev->uuid, uuid) == 0)
+ return mds;
+ }
+
+ return NULL;
+}
+
+void dump_buffer(char *buf, uint32_t count)
+{
+#if defined(DEBUG)
+ int i;
+
+ pr_info("Buffer:\n");
+ for (i = 0; i < count; i++) {
+ pr_info("%2x ", *(buf + i));
+ if ((i + 1) % 16 == 0)
+ pr_info("\n");
+ }
+#endif
+}
+
+static void mtty_create_config_space(struct mdev_state *mdev_state)
+{
+ /* PCI dev ID */
+ STORE_LE32((u32 *) &mdev_state->vconfig[0x0], 0x32534348);
+
+ /* Control: I/O+, Mem-, BusMaster- */
+ STORE_LE16((u16 *) &mdev_state->vconfig[0x4], 0x0001);
+
+ /* Status: capabilities list absent */
+ STORE_LE16((u16 *) &mdev_state->vconfig[0x6], 0x0200);
+
+ /* Rev ID */
+ mdev_state->vconfig[0x8] = 0x10;
+
+ /* programming interface class : 16550-compatible serial controller */
+ mdev_state->vconfig[0x9] = 0x02;
+
+ /* Sub class : 00 */
+ mdev_state->vconfig[0xa] = 0x00;
+
+ /* Base class : Simple Communication controllers */
+ mdev_state->vconfig[0xb] = 0x07;
+
+ /* base address registers */
+ /* BAR0: IO space */
+ STORE_LE32((u32 *) &mdev_state->vconfig[0x10], 0x000001);
+ mdev_state->bar_mask[0] = ~(MTTY_IO_BAR_SIZE) + 1;
+
+ if (mdev_state->nr_ports == 2) {
+ /* BAR1: IO space */
+ STORE_LE32((u32 *) &mdev_state->vconfig[0x14], 0x000001);
+ mdev_state->bar_mask[1] = ~(MTTY_IO_BAR_SIZE) + 1;
+ }
+
+ /* Subsystem ID */
+ STORE_LE32((u32 *) &mdev_state->vconfig[0x2c], 0x32534348);
+
+ mdev_state->vconfig[0x34] = 0x00; /* Cap Ptr */
+ mdev_state->vconfig[0x3d] = 0x01; /* interrupt pin (INTA#) */
+
+ /* Vendor specific data */
+ mdev_state->vconfig[0x40] = 0x23;
+ mdev_state->vconfig[0x43] = 0x80;
+ mdev_state->vconfig[0x44] = 0x23;
+ mdev_state->vconfig[0x48] = 0x23;
+ mdev_state->vconfig[0x4c] = 0x23;
+
+ mdev_state->vconfig[0x60] = 0x50;
+ mdev_state->vconfig[0x61] = 0x43;
+ mdev_state->vconfig[0x62] = 0x49;
+ mdev_state->vconfig[0x63] = 0x20;
+ mdev_state->vconfig[0x64] = 0x53;
+ mdev_state->vconfig[0x65] = 0x65;
+ mdev_state->vconfig[0x66] = 0x72;
+ mdev_state->vconfig[0x67] = 0x69;
+ mdev_state->vconfig[0x68] = 0x61;
+ mdev_state->vconfig[0x69] = 0x6c;
+ mdev_state->vconfig[0x6a] = 0x2f;
+ mdev_state->vconfig[0x6b] = 0x55;
+ mdev_state->vconfig[0x6c] = 0x41;
+ mdev_state->vconfig[0x6d] = 0x52;
+ mdev_state->vconfig[0x6e] = 0x54;
+}
+
+static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count)
+{
+ u32 cfg_addr, bar_mask, bar_index = 0;
+
+ switch (offset) {
+ case 0x04: /* device control */
+ case 0x06: /* device status */
+ /* do nothing */
+ break;
+ case 0x3c: /* interrupt line */
+ mdev_state->vconfig[0x3c] = buf[0];
+ break;
+ case 0x3d:
+ /*
+ * Interrupt Pin is hardwired to INTA.
+ * This field is write protected by hardware
+ */
+ break;
+ case 0x10: /* BAR0 */
+ case 0x14: /* BAR1 */
+ if (offset == 0x10)
+ bar_index = 0;
+ else if (offset == 0x14)
+ bar_index = 1;
+
+ if ((mdev_state->nr_ports == 1) && (bar_index == 1)) {
+ STORE_LE32(&mdev_state->vconfig[offset], 0);
+ break;
+ }
+
+ cfg_addr = *(u32 *)buf;
+ pr_info("BAR%d addr 0x%x\n", bar_index, cfg_addr);
+
+ if (cfg_addr == 0xffffffff) {
+ bar_mask = mdev_state->bar_mask[bar_index];
+ cfg_addr = (cfg_addr & bar_mask);
+ }
+
+ cfg_addr |= (mdev_state->vconfig[offset] & 0x3ul);
+ STORE_LE32(&mdev_state->vconfig[offset], cfg_addr);
+ break;
+ case 0x18: /* BAR2 */
+ case 0x1c: /* BAR3 */
+ case 0x20: /* BAR4 */
+ STORE_LE32(&mdev_state->vconfig[offset], 0);
+ break;
+ default:
+ pr_info("PCI config write @0x%x of %d bytes not handled\n",
+ offset, count);
+ break;
+ }
+}
+
+static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
+ u16 offset, char *buf, u32 count)
+{
+ u8 data = *buf;
+
+ /* Handle data written by guest */
+ switch (offset) {
+ case UART_TX:
+ /* if DLAB set, data is LSB of divisor */
+ if (mdev_state->s[index].dlab) {
+ mdev_state->s[index].divisor |= data;
+ break;
+ }
+
+ mutex_lock(&mdev_state->rxtx_lock);
+
+ /* save in TX buffer */
+ if (mdev_state->s[index].rxtx.count <
+ mdev_state->s[index].max_fifo_size) {
+ mdev_state->s[index].rxtx.fifo[
+ mdev_state->s[index].rxtx.head] = data;
+ mdev_state->s[index].rxtx.count++;
+ CIRCULAR_BUF_INC_IDX(mdev_state->s[index].rxtx.head);
+ mdev_state->s[index].overrun = false;
+
+ /*
+ * Trigger interrupt if receive data interrupt is
+ * enabled and fifo reached trigger level
+ */
+ if ((mdev_state->s[index].uart_reg[UART_IER] &
+ UART_IER_RDI) &&
+ (mdev_state->s[index].rxtx.count ==
+ mdev_state->s[index].intr_trigger_level)) {
+ /* trigger interrupt */
+#if defined(DEBUG_INTR)
+ pr_err("Serial port %d: Fifo level trigger\n",
+ index);
+#endif
+ mtty_trigger_interrupt(mdev_state->mdev->uuid);
+ }
+ } else {
+#if defined(DEBUG_INTR)
+ pr_err("Serial port %d: Buffer Overflow\n", index);
+#endif
+ mdev_state->s[index].overrun = true;
+
+ /*
+ * Trigger interrupt if receiver line status interrupt
+ * is enabled
+ */
+ if (mdev_state->s[index].uart_reg[UART_IER] &
+ UART_IER_RLSI)
+ mtty_trigger_interrupt(mdev_state->mdev->uuid);
+ }
+ mutex_unlock(&mdev_state->rxtx_lock);
+ break;
+
+ case UART_IER:
+ /* if DLAB set, data is MSB of divisor */
+ if (mdev_state->s[index].dlab)
+ mdev_state->s[index].divisor |= (u16)data << 8;
+ else {
+ mdev_state->s[index].uart_reg[offset] = data;
+ mutex_lock(&mdev_state->rxtx_lock);
+ if ((data & UART_IER_THRI) &&
+ (mdev_state->s[index].rxtx.head ==
+ mdev_state->s[index].rxtx.tail)) {
+#if defined(DEBUG_INTR)
+ pr_err("Serial port %d: IER_THRI write\n",
+ index);
+#endif
+ mtty_trigger_interrupt(mdev_state->mdev->uuid);
+ }
+
+ mutex_unlock(&mdev_state->rxtx_lock);
+ }
+
+ break;
+
+ case UART_FCR:
+ mdev_state->s[index].fcr = data;
+
+ mutex_lock(&mdev_state->rxtx_lock);
+ if (data & (UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT)) {
+ /* clear loop back FIFO */
+ mdev_state->s[index].rxtx.count = 0;
+ mdev_state->s[index].rxtx.head = 0;
+ mdev_state->s[index].rxtx.tail = 0;
+ }
+ mutex_unlock(&mdev_state->rxtx_lock);
+
+ switch (data & UART_FCR_TRIGGER_MASK) {
+ case UART_FCR_TRIGGER_1:
+ mdev_state->s[index].intr_trigger_level = 1;
+ break;
+
+ case UART_FCR_TRIGGER_4:
+ mdev_state->s[index].intr_trigger_level = 4;
+ break;
+
+ case UART_FCR_TRIGGER_8:
+ mdev_state->s[index].intr_trigger_level = 8;
+ break;
+
+ case UART_FCR_TRIGGER_14:
+ mdev_state->s[index].intr_trigger_level = 14;
+ break;
+ }
+
+ /*
+ * Set trigger level to 1 otherwise or implement timer with
+ * timeout of 4 characters and on expiring that timer set
+ * Recevice data timeout in IIR register
+ */
+ mdev_state->s[index].intr_trigger_level = 1;
+ if (data & UART_FCR_ENABLE_FIFO)
+ mdev_state->s[index].max_fifo_size = MAX_FIFO_SIZE;
+ else {
+ mdev_state->s[index].max_fifo_size = 1;
+ mdev_state->s[index].intr_trigger_level = 1;
+ }
+
+ break;
+
+ case UART_LCR:
+ if (data & UART_LCR_DLAB) {
+ mdev_state->s[index].dlab = true;
+ mdev_state->s[index].divisor = 0;
+ } else
+ mdev_state->s[index].dlab = false;
+
+ mdev_state->s[index].uart_reg[offset] = data;
+ break;
+
+ case UART_MCR:
+ mdev_state->s[index].uart_reg[offset] = data;
+
+ if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
+ (data & UART_MCR_OUT2)) {
+#if defined(DEBUG_INTR)
+ pr_err("Serial port %d: MCR_OUT2 write\n", index);
+#endif
+ mtty_trigger_interrupt(mdev_state->mdev->uuid);
+ }
+
+ if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
+ (data & (UART_MCR_RTS | UART_MCR_DTR))) {
+#if defined(DEBUG_INTR)
+ pr_err("Serial port %d: MCR RTS/DTR write\n", index);
+#endif
+ mtty_trigger_interrupt(mdev_state->mdev->uuid);
+ }
+ break;
+
+ case UART_LSR:
+ case UART_MSR:
+ /* do nothing */
+ break;
+
+ case UART_SCR:
+ mdev_state->s[index].uart_reg[offset] = data;
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state,
+ u16 offset, char *buf, u32 count)
+{
+ /* Handle read requests by guest */
+ switch (offset) {
+ case UART_RX:
+ /* if DLAB set, data is LSB of divisor */
+ if (mdev_state->s[index].dlab) {
+ *buf = (u8)mdev_state->s[index].divisor;
+ break;
+ }
+
+ mutex_lock(&mdev_state->rxtx_lock);
+ /* return data in tx buffer */
+ if (mdev_state->s[index].rxtx.head !=
+ mdev_state->s[index].rxtx.tail) {
+ *buf = mdev_state->s[index].rxtx.fifo[
+ mdev_state->s[index].rxtx.tail];
+ mdev_state->s[index].rxtx.count--;
+ CIRCULAR_BUF_INC_IDX(mdev_state->s[index].rxtx.tail);
+ }
+
+ if (mdev_state->s[index].rxtx.head ==
+ mdev_state->s[index].rxtx.tail) {
+ /*
+ * Trigger interrupt if tx buffer empty interrupt is
+ * enabled and fifo is empty
+ */
+#if defined(DEBUG_INTR)
+ pr_err("Serial port %d: Buffer Empty\n", index);
+#endif
+ if (mdev_state->s[index].uart_reg[UART_IER] &
+ UART_IER_THRI)
+ mtty_trigger_interrupt(mdev_state->mdev->uuid);
+ }
+ mutex_unlock(&mdev_state->rxtx_lock);
+
+ break;
+
+ case UART_IER:
+ if (mdev_state->s[index].dlab) {
+ *buf = (u8)(mdev_state->s[index].divisor >> 8);
+ break;
+ }
+ *buf = mdev_state->s[index].uart_reg[offset] & 0x0f;
+ break;
+
+ case UART_IIR:
+ {
+ u8 ier = mdev_state->s[index].uart_reg[UART_IER];
+ *buf = 0;
+
+ mutex_lock(&mdev_state->rxtx_lock);
+ /* Interrupt priority 1: Parity, overrun, framing or break */
+ if ((ier & UART_IER_RLSI) && mdev_state->s[index].overrun)
+ *buf |= UART_IIR_RLSI;
+
+ /* Interrupt priority 2: Fifo trigger level reached */
+ if ((ier & UART_IER_RDI) &&
+ (mdev_state->s[index].rxtx.count ==
+ mdev_state->s[index].intr_trigger_level))
+ *buf |= UART_IIR_RDI;
+
+ /* Interrupt priotiry 3: transmitter holding register empty */
+ if ((ier & UART_IER_THRI) &&
+ (mdev_state->s[index].rxtx.head ==
+ mdev_state->s[index].rxtx.tail))
+ *buf |= UART_IIR_THRI;
+
+ /* Interrupt priotiry 4: Modem status: CTS, DSR, RI or DCD */
+ if ((ier & UART_IER_MSI) &&
+ (mdev_state->s[index].uart_reg[UART_MCR] &
+ (UART_MCR_RTS | UART_MCR_DTR)))
+ *buf |= UART_IIR_MSI;
+
+ /* bit0: 0=> interrupt pending, 1=> no interrupt is pending */
+ if (*buf == 0)
+ *buf = UART_IIR_NO_INT;
+
+ /* set bit 6 & 7 to be 16550 compatible */
+ *buf |= 0xC0;
+ mutex_unlock(&mdev_state->rxtx_lock);
+ }
+ break;
+
+ case UART_LCR:
+ case UART_MCR:
+ *buf = mdev_state->s[index].uart_reg[offset];
+ break;
+
+ case UART_LSR:
+ {
+ u8 lsr = 0;
+
+ mutex_lock(&mdev_state->rxtx_lock);
+ /* atleast one char in FIFO */
+ if (mdev_state->s[index].rxtx.head !=
+ mdev_state->s[index].rxtx.tail)
+ lsr |= UART_LSR_DR;
+
+ /* if FIFO overrun */
+ if (mdev_state->s[index].overrun)
+ lsr |= UART_LSR_OE;
+
+ /* transmit FIFO empty and tramsitter empty */
+ if (mdev_state->s[index].rxtx.head ==
+ mdev_state->s[index].rxtx.tail)
+ lsr |= UART_LSR_TEMT | UART_LSR_THRE;
+
+ mutex_unlock(&mdev_state->rxtx_lock);
+ *buf = lsr;
+ break;
+ }
+ case UART_MSR:
+ *buf = UART_MSR_DSR | UART_MSR_DDSR | UART_MSR_DCD;
+
+ mutex_lock(&mdev_state->rxtx_lock);
+ /* if AFE is 1 and FIFO have space, set CTS bit */
+ if (mdev_state->s[index].uart_reg[UART_MCR] &
+ UART_MCR_AFE) {
+ if (mdev_state->s[index].rxtx.count <
+ mdev_state->s[index].max_fifo_size)
+ *buf |= UART_MSR_CTS | UART_MSR_DCTS;
+ } else
+ *buf |= UART_MSR_CTS | UART_MSR_DCTS;
+ mutex_unlock(&mdev_state->rxtx_lock);
+
+ break;
+
+ case UART_SCR:
+ *buf = mdev_state->s[index].uart_reg[offset];
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void mdev_read_base(struct mdev_state *mdev_state)
+{
+ int index, pos;
+ u32 start_lo, start_hi;
+ u32 mem_type;
+
+ pos = PCI_BASE_ADDRESS_0;
+
+ for (index = 0; index <= VFIO_PCI_BAR5_REGION_INDEX; index++) {
+
+ if (!mdev_state->region_info[index].size)
+ continue;
+
+ start_lo = (*(u32 *)(mdev_state->vconfig + pos)) &
+ PCI_BASE_ADDRESS_MEM_MASK;
+ mem_type = (*(u32 *)(mdev_state->vconfig + pos)) &
+ PCI_BASE_ADDRESS_MEM_TYPE_MASK;
+
+ switch (mem_type) {
+ case PCI_BASE_ADDRESS_MEM_TYPE_64:
+ start_hi = (*(u32 *)(mdev_state->vconfig + pos + 4));
+ pos += 4;
+ break;
+ case PCI_BASE_ADDRESS_MEM_TYPE_32:
+ case PCI_BASE_ADDRESS_MEM_TYPE_1M:
+ /* 1M mem BAR treated as 32-bit BAR */
+ default:
+ /* mem unknown type treated as 32-bit BAR */
+ start_hi = 0;
+ break;
+ }
+ pos += 4;
+ mdev_state->region_info[index].start = ((u64)start_hi << 32) |
+ start_lo;
+ }
+}
+
+static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
+ loff_t pos, bool is_write)
+{
+ struct mdev_state *mdev_state;
+ unsigned int index;
+ loff_t offset;
+ int ret = 0;
+
+ if (!mdev || !buf)
+ return -EINVAL;
+
+ mdev_state = mdev_get_drvdata(mdev);
+ if (!mdev_state) {
+ pr_err("%s mdev_state not found\n", __func__);
+ return -EINVAL;
+ }
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ index = MTTY_VFIO_PCI_OFFSET_TO_INDEX(pos);
+ offset = pos & MTTY_VFIO_PCI_OFFSET_MASK;
+ switch (index) {
+ case VFIO_PCI_CONFIG_REGION_INDEX:
+
+#if defined(DEBUG)
+ pr_info("%s: PCI config space %s at offset 0x%llx\n",
+ __func__, is_write ? "write" : "read", offset);
+#endif
+ if (is_write) {
+ dump_buffer(buf, count);
+ handle_pci_cfg_write(mdev_state, offset, buf, count);
+ } else {
+ memcpy(buf, (mdev_state->vconfig + offset), count);
+ dump_buffer(buf, count);
+ }
+
+ break;
+
+ case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
+ if (!mdev_state->region_info[index].start)
+ mdev_read_base(mdev_state);
+
+ if (is_write) {
+ dump_buffer(buf, count);
+
+#if defined(DEBUG_REGS)
+ pr_info("%s: BAR%d WR @0x%llx %s val:0x%02x dlab:%d\n",
+ __func__, index, offset, wr_reg[offset],
+ (u8)*buf, mdev_state->s[index].dlab);
+#endif
+ handle_bar_write(index, mdev_state, offset, buf, count);
+ } else {
+ handle_bar_read(index, mdev_state, offset, buf, count);
+ dump_buffer(buf, count);
+
+#if defined(DEBUG_REGS)
+ pr_info("%s: BAR%d RD @0x%llx %s val:0x%02x dlab:%d\n",
+ __func__, index, offset, rd_reg[offset],
+ (u8)*buf, mdev_state->s[index].dlab);
+#endif
+ }
+ break;
+
+ default:
+ ret = -1;
+ goto accessfailed;
+ }
+
+ ret = count;
+
+
+accessfailed:
+ mutex_unlock(&mdev_state->ops_lock);
+
+ return ret;
+}
+
+int mtty_create(struct kobject *kobj, struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state;
+ char name[MTTY_STRING_LEN];
+ int nr_ports = 0, i;
+
+ if (!mdev)
+ return -EINVAL;
+
+ for (i = 0; i < 2; i++) {
+ snprintf(name, MTTY_STRING_LEN, "%s-%d",
+ dev_driver_string(mdev->parent->dev), i + 1);
+ if (!strcmp(kobj->name, name)) {
+ nr_ports = i + 1;
+ break;
+ }
+ }
+
+ if (!nr_ports)
+ return -EINVAL;
+
+ mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL);
+ if (mdev_state == NULL)
+ return -ENOMEM;
+
+ mdev_state->nr_ports = nr_ports;
+ mdev_state->irq_index = -1;
+ mdev_state->s[0].max_fifo_size = MAX_FIFO_SIZE;
+ mdev_state->s[1].max_fifo_size = MAX_FIFO_SIZE;
+ mutex_init(&mdev_state->rxtx_lock);
+ mdev_state->vconfig = kzalloc(MTTY_CONFIG_SPACE_SIZE, GFP_KERNEL);
+
+ if (mdev_state->vconfig == NULL) {
+ kfree(mdev_state);
+ return -ENOMEM;
+ }
+
+ mutex_init(&mdev_state->ops_lock);
+ mdev_state->mdev = mdev;
+ mdev_set_drvdata(mdev, mdev_state);
+
+ mtty_create_config_space(mdev_state);
+
+ mutex_lock(&mdev_list_lock);
+ list_add(&mdev_state->next, &mdev_devices_list);
+ mutex_unlock(&mdev_list_lock);
+
+ return 0;
+}
+
+int mtty_remove(struct mdev_device *mdev)
+{
+ struct mdev_state *mds, *tmp_mds;
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ int ret = -EINVAL;
+
+ mutex_lock(&mdev_list_lock);
+ list_for_each_entry_safe(mds, tmp_mds, &mdev_devices_list, next) {
+ if (mdev_state == mds) {
+ list_del(&mdev_state->next);
+ mdev_set_drvdata(mdev, NULL);
+ kfree(mdev_state->vconfig);
+ kfree(mdev_state);
+ ret = 0;
+ break;
+ }
+ }
+ mutex_unlock(&mdev_list_lock);
+
+ return ret;
+}
+
+int mtty_reset(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state;
+
+ if (!mdev)
+ return -EINVAL;
+
+ mdev_state = mdev_get_drvdata(mdev);
+ if (!mdev_state)
+ return -EINVAL;
+
+ pr_info("%s: called\n", __func__);
+
+ return 0;
+}
+
+ssize_t mtty_read(struct mdev_device *mdev, char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ unsigned int done = 0;
+ int ret;
+
+ while (count) {
+ size_t filled;
+
+ if (count >= 4 && !(*ppos % 4)) {
+ u32 val;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ u16 val;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 2;
+ } else {
+ u8 val;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 1;
+ }
+
+ count -= filled;
+ done += filled;
+ *ppos += filled;
+ buf += filled;
+ }
+
+ return done;
+
+read_err:
+ return -EFAULT;
+}
+
+ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned int done = 0;
+ int ret;
+
+ while (count) {
+ size_t filled;
+
+ if (count >= 4 && !(*ppos % 4)) {
+ u32 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ u16 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 2;
+ } else {
+ u8 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 1;
+ }
+ count -= filled;
+ done += filled;
+ *ppos += filled;
+ buf += filled;
+ }
+
+ return done;
+write_err:
+ return -EFAULT;
+}
+
+static int mtty_set_irqs(struct mdev_device *mdev, uint32_t flags,
+ unsigned int index, unsigned int start,
+ unsigned int count, void *data)
+{
+ int ret = 0;
+ struct mdev_state *mdev_state;
+
+ if (!mdev)
+ return -EINVAL;
+
+ mdev_state = mdev_get_drvdata(mdev);
+ if (!mdev_state)
+ return -EINVAL;
+
+ mutex_lock(&mdev_state->ops_lock);
+ switch (index) {
+ case VFIO_PCI_INTX_IRQ_INDEX:
+ switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+ case VFIO_IRQ_SET_ACTION_MASK:
+ case VFIO_IRQ_SET_ACTION_UNMASK:
+ break;
+ case VFIO_IRQ_SET_ACTION_TRIGGER:
+ {
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ pr_info("%s: disable INTx\n", __func__);
+ if (mdev_state->intx_evtfd)
+ eventfd_ctx_put(mdev_state->intx_evtfd);
+ break;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ int fd = *(int *)data;
+
+ if (fd > 0) {
+ struct eventfd_ctx *evt;
+
+ evt = eventfd_ctx_fdget(fd);
+ if (IS_ERR(evt)) {
+ ret = PTR_ERR(evt);
+ break;
+ }
+ mdev_state->intx_evtfd = evt;
+ mdev_state->irq_fd = fd;
+ mdev_state->irq_index = index;
+ break;
+ }
+ }
+ break;
+ }
+ }
+ break;
+ case VFIO_PCI_MSI_IRQ_INDEX:
+ switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+ case VFIO_IRQ_SET_ACTION_MASK:
+ case VFIO_IRQ_SET_ACTION_UNMASK:
+ break;
+ case VFIO_IRQ_SET_ACTION_TRIGGER:
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ if (mdev_state->msi_evtfd)
+ eventfd_ctx_put(mdev_state->msi_evtfd);
+ pr_info("%s: disable MSI\n", __func__);
+ mdev_state->irq_index = VFIO_PCI_INTX_IRQ_INDEX;
+ break;
+ }
+ if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ int fd = *(int *)data;
+ struct eventfd_ctx *evt;
+
+ if (fd <= 0)
+ break;
+
+ if (mdev_state->msi_evtfd)
+ break;
+
+ evt = eventfd_ctx_fdget(fd);
+ if (IS_ERR(evt)) {
+ ret = PTR_ERR(evt);
+ break;
+ }
+ mdev_state->msi_evtfd = evt;
+ mdev_state->irq_fd = fd;
+ mdev_state->irq_index = index;
+ }
+ break;
+ }
+ break;
+ case VFIO_PCI_MSIX_IRQ_INDEX:
+ pr_info("%s: MSIX_IRQ\n", __func__);
+ break;
+ case VFIO_PCI_ERR_IRQ_INDEX:
+ pr_info("%s: ERR_IRQ\n", __func__);
+ break;
+ case VFIO_PCI_REQ_IRQ_INDEX:
+ pr_info("%s: REQ_IRQ\n", __func__);
+ break;
+ }
+
+ mutex_unlock(&mdev_state->ops_lock);
+ return ret;
+}
+
+static int mtty_trigger_interrupt(uuid_le uuid)
+{
+ int ret = -1;
+ struct mdev_state *mdev_state;
+
+ mdev_state = find_mdev_state_by_uuid(uuid);
+
+ if (!mdev_state) {
+ pr_info("%s: mdev not found\n", __func__);
+ return -EINVAL;
+ }
+
+ if ((mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) &&
+ (!mdev_state->msi_evtfd))
+ return -EINVAL;
+ else if ((mdev_state->irq_index == VFIO_PCI_INTX_IRQ_INDEX) &&
+ (!mdev_state->intx_evtfd)) {
+ pr_info("%s: Intr eventfd not found\n", __func__);
+ return -EINVAL;
+ }
+
+ if (mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX)
+ ret = eventfd_signal(mdev_state->msi_evtfd, 1);
+ else
+ ret = eventfd_signal(mdev_state->intx_evtfd, 1);
+
+#if defined(DEBUG_INTR)
+ pr_info("Intx triggered\n");
+#endif
+ if (ret != 1)
+ pr_err("%s: eventfd signal failed (%d)\n", __func__, ret);
+
+ return ret;
+}
+
+int mtty_get_region_info(struct mdev_device *mdev,
+ struct vfio_region_info *region_info,
+ u16 *cap_type_id, void **cap_type)
+{
+ unsigned int size = 0;
+ struct mdev_state *mdev_state;
+ int bar_index;
+
+ if (!mdev)
+ return -EINVAL;
+
+ mdev_state = mdev_get_drvdata(mdev);
+ if (!mdev_state)
+ return -EINVAL;
+
+ mutex_lock(&mdev_state->ops_lock);
+ bar_index = region_info->index;
+
+ switch (bar_index) {
+ case VFIO_PCI_CONFIG_REGION_INDEX:
+ size = MTTY_CONFIG_SPACE_SIZE;
+ break;
+ case VFIO_PCI_BAR0_REGION_INDEX:
+ size = MTTY_IO_BAR_SIZE;
+ break;
+ case VFIO_PCI_BAR1_REGION_INDEX:
+ if (mdev_state->nr_ports == 2)
+ size = MTTY_IO_BAR_SIZE;
+ break;
+ default:
+ size = 0;
+ break;
+ }
+
+ mdev_state->region_info[bar_index].size = size;
+ mdev_state->region_info[bar_index].vfio_offset =
+ MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
+
+ region_info->size = size;
+ region_info->offset = MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
+ region_info->flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE;
+ mutex_unlock(&mdev_state->ops_lock);
+ return 0;
+}
+
+int mtty_get_irq_info(struct mdev_device *mdev, struct vfio_irq_info *irq_info)
+{
+ switch (irq_info->index) {
+ case VFIO_PCI_INTX_IRQ_INDEX:
+ case VFIO_PCI_MSI_IRQ_INDEX:
+ case VFIO_PCI_REQ_IRQ_INDEX:
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ irq_info->flags = VFIO_IRQ_INFO_EVENTFD;
+ irq_info->count = 1;
+
+ if (irq_info->index == VFIO_PCI_INTX_IRQ_INDEX)
+ irq_info->flags |= (VFIO_IRQ_INFO_MASKABLE |
+ VFIO_IRQ_INFO_AUTOMASKED);
+ else
+ irq_info->flags |= VFIO_IRQ_INFO_NORESIZE;
+
+ return 0;
+}
+
+int mtty_get_device_info(struct mdev_device *mdev,
+ struct vfio_device_info *dev_info)
+{
+ dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
+ dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+ dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
+
+ return 0;
+}
+
+static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret = 0;
+ unsigned long minsz;
+ struct mdev_state *mdev_state;
+
+ if (!mdev)
+ return -EINVAL;
+
+ mdev_state = mdev_get_drvdata(mdev);
+ if (!mdev_state)
+ return -ENODEV;
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ {
+ struct vfio_device_info info;
+
+ minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = mtty_get_device_info(mdev, &info);
+ if (ret)
+ return ret;
+
+ memcpy(&mdev_state->dev_info, &info, sizeof(info));
+
+ return copy_to_user((void __user *)arg, &info, minsz);
+ }
+ case VFIO_DEVICE_GET_REGION_INFO:
+ {
+ struct vfio_region_info info;
+ u16 cap_type_id = 0;
+ void *cap_type = NULL;
+
+ minsz = offsetofend(struct vfio_region_info, offset);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = mtty_get_region_info(mdev, &info, &cap_type_id,
+ &cap_type);
+ if (ret)
+ return ret;
+
+ return copy_to_user((void __user *)arg, &info, minsz);
+ }
+
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ {
+ struct vfio_irq_info info;
+
+ minsz = offsetofend(struct vfio_irq_info, count);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if ((info.argsz < minsz) ||
+ (info.index >= mdev_state->dev_info.num_irqs))
+ return -EINVAL;
+
+ ret = mtty_get_irq_info(mdev, &info);
+ if (ret)
+ return ret;
+
+ if (info.count == -1)
+ return -EINVAL;
+
+ return copy_to_user((void __user *)arg, &info, minsz);
+ }
+ case VFIO_DEVICE_SET_IRQS:
+ {
+ struct vfio_irq_set hdr;
+ u8 *data = NULL, *ptr = NULL;
+ size_t data_size = 0;
+
+ minsz = offsetofend(struct vfio_irq_set, count);
+
+ if (copy_from_user(&hdr, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ ret = vfio_set_irqs_validate_and_prepare(&hdr,
+ mdev_state->dev_info.num_irqs,
+ VFIO_PCI_NUM_IRQS,
+ &data_size);
+ if (ret)
+ return ret;
+
+ if (data_size) {
+ ptr = data = memdup_user((void __user *)(arg + minsz),
+ data_size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+ }
+
+ ret = mtty_set_irqs(mdev, hdr.flags, hdr.index, hdr.start,
+ hdr.count, data);
+
+ kfree(ptr);
+ return ret;
+ }
+ case VFIO_DEVICE_RESET:
+ return mtty_reset(mdev);
+ }
+ return -ENOTTY;
+}
+
+int mtty_open(struct mdev_device *mdev)
+{
+ pr_info("%s\n", __func__);
+ return 0;
+}
+
+void mtty_close(struct mdev_device *mdev)
+{
+ pr_info("%s\n", __func__);
+}
+
+static ssize_t
+sample_mtty_dev_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "This is phy device\n");
+}
+
+static DEVICE_ATTR_RO(sample_mtty_dev);
+
+static struct attribute *mtty_dev_attrs[] = {
+ &dev_attr_sample_mtty_dev.attr,
+ NULL,
+};
+
+static const struct attribute_group mtty_dev_group = {
+ .name = "mtty_dev",
+ .attrs = mtty_dev_attrs,
+};
+
+const struct attribute_group *mtty_dev_groups[] = {
+ &mtty_dev_group,
+ NULL,
+};
+
+static ssize_t
+sample_mdev_dev_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct mdev_device *mdev = to_mdev_device(dev);
+
+ if (mdev)
+ return sprintf(buf, "This is MDEV %s\n", dev_name(&mdev->dev));
+
+ return sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR_RO(sample_mdev_dev);
+
+static struct attribute *mdev_dev_attrs[] = {
+ &dev_attr_sample_mdev_dev.attr,
+ NULL,
+};
+
+static const struct attribute_group mdev_dev_group = {
+ .name = "vendor",
+ .attrs = mdev_dev_attrs,
+};
+
+const struct attribute_group *mdev_dev_groups[] = {
+ &mdev_dev_group,
+ NULL,
+};
+
+static ssize_t
+name_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+ char name[MTTY_STRING_LEN];
+ int i;
+ const char *name_str[2] = {"Single port serial", "Dual port serial"};
+
+ for (i = 0; i < 2; i++) {
+ snprintf(name, MTTY_STRING_LEN, "%s-%d",
+ dev_driver_string(dev), i + 1);
+ if (!strcmp(kobj->name, name))
+ return sprintf(buf, "%s\n", name_str[i]);
+ }
+
+ return -EINVAL;
+}
+
+MDEV_TYPE_ATTR_RO(name);
+
+static ssize_t
+available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+ char name[MTTY_STRING_LEN];
+ int i;
+ struct mdev_state *mds;
+ int ports = 0, used = 0;
+
+ for (i = 0; i < 2; i++) {
+ snprintf(name, MTTY_STRING_LEN, "%s-%d",
+ dev_driver_string(dev), i + 1);
+ if (!strcmp(kobj->name, name)) {
+ ports = i + 1;
+ break;
+ }
+ }
+
+ if (!ports)
+ return -EINVAL;
+
+ list_for_each_entry(mds, &mdev_devices_list, next)
+ used += mds->nr_ports;
+
+ return sprintf(buf, "%d\n", (MAX_MTTYS - used)/ports);
+}
+
+MDEV_TYPE_ATTR_RO(available_instances);
+
+
+static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
+ char *buf)
+{
+ return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
+}
+
+MDEV_TYPE_ATTR_RO(device_api);
+
+static struct attribute *mdev_types_attrs[] = {
+ &mdev_type_attr_name.attr,
+ &mdev_type_attr_device_api.attr,
+ &mdev_type_attr_available_instances.attr,
+ NULL,
+};
+
+static struct attribute_group mdev_type_group1 = {
+ .name = "1",
+ .attrs = mdev_types_attrs,
+};
+
+static struct attribute_group mdev_type_group2 = {
+ .name = "2",
+ .attrs = mdev_types_attrs,
+};
+
+struct attribute_group *mdev_type_groups[] = {
+ &mdev_type_group1,
+ &mdev_type_group2,
+ NULL,
+};
+
+struct parent_ops mdev_fops = {
+ .owner = THIS_MODULE,
+ .dev_attr_groups = mtty_dev_groups,
+ .mdev_attr_groups = mdev_dev_groups,
+ .supported_type_groups = mdev_type_groups,
+ .create = mtty_create,
+ .remove = mtty_remove,
+ .open = mtty_open,
+ .release = mtty_close,
+ .read = mtty_read,
+ .write = mtty_write,
+ .ioctl = mtty_ioctl,
+};
+
+static void mtty_device_release(struct device *dev)
+{
+ dev_dbg(dev, "mtty: released\n");
+}
+
+static int __init mtty_dev_init(void)
+{
+ int ret = 0;
+
+ pr_info("mtty_dev: %s\n", __func__);
+
+ memset(&mtty_dev, 0, sizeof(mtty_dev));
+
+ idr_init(&mtty_dev.vd_idr);
+
+ ret = alloc_chrdev_region(&mtty_dev.vd_devt, 0, MINORMASK, MTTY_NAME);
+
+ if (ret < 0) {
+ pr_err("Error: failed to register mtty_dev, err:%d\n", ret);
+ return ret;
+ }
+
+ cdev_init(&mtty_dev.vd_cdev, &vd_fops);
+ cdev_add(&mtty_dev.vd_cdev, mtty_dev.vd_devt, MINORMASK);
+
+ pr_info("major_number:%d\n", MAJOR(mtty_dev.vd_devt));
+
+ mtty_dev.vd_class = class_create(THIS_MODULE, MTTY_CLASS_NAME);
+
+ if (IS_ERR(mtty_dev.vd_class)) {
+ pr_err("Error: failed to register mtty_dev class\n");
+ goto failed1;
+ }
+
+ mtty_dev.dev.class = mtty_dev.vd_class;
+ mtty_dev.dev.release = mtty_device_release;
+ dev_set_name(&mtty_dev.dev, "%s", MTTY_NAME);
+
+ ret = device_register(&mtty_dev.dev);
+ if (ret)
+ goto failed2;
+
+ if (mdev_register_device(&mtty_dev.dev, &mdev_fops) != 0)
+ goto failed3;
+
+ mutex_init(&mdev_list_lock);
+ INIT_LIST_HEAD(&mdev_devices_list);
+
+ goto all_done;
+
+failed3:
+
+ device_unregister(&mtty_dev.dev);
+failed2:
+ class_destroy(mtty_dev.vd_class);
+
+failed1:
+ cdev_del(&mtty_dev.vd_cdev);
+ unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK);
+
+all_done:
+ return ret;
+}
+
+static void __exit mtty_dev_exit(void)
+{
+ mtty_dev.dev.bus = NULL;
+ mdev_unregister_device(&mtty_dev.dev);
+
+ device_unregister(&mtty_dev.dev);
+ idr_destroy(&mtty_dev.vd_idr);
+ cdev_del(&mtty_dev.vd_cdev);
+ unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK);
+ class_destroy(mtty_dev.vd_class);
+ mtty_dev.vd_class = NULL;
+ pr_info("mtty_dev: Unloaded!\n");
+}
+
+module_init(mtty_dev_init)
+module_exit(mtty_dev_exit)
+
+MODULE_LICENSE("GPL v2");
+MODULE_INFO(supported, "Test driver that simulate serial port over PCI");
+MODULE_VERSION(VERSION_STRING);
+MODULE_AUTHOR(DRIVER_AUTHOR);
OpenPOWER on IntegriCloud