summaryrefslogtreecommitdiffstats
path: root/samples
diff options
context:
space:
mode:
Diffstat (limited to 'samples')
-rw-r--r--samples/Kconfig16
-rw-r--r--samples/Makefile4
-rw-r--r--samples/blackfin/Makefile1
-rw-r--r--samples/blackfin/gptimers-example.c91
-rw-r--r--samples/bpf/Makefile21
-rw-r--r--samples/bpf/bpf_load.c124
-rw-r--r--samples/bpf/bpf_load.h2
-rw-r--r--samples/bpf/cookie_uid_helper_example.c2
-rw-r--r--samples/bpf/cpustat_kern.c281
-rw-r--r--samples/bpf/cpustat_user.c219
-rw-r--r--samples/bpf/tcbpf2_kern.c171
-rw-r--r--samples/bpf/test_cgrp2_attach2.c36
-rwxr-xr-xsamples/bpf/test_cgrp2_sock.sh1
-rwxr-xr-xsamples/bpf/test_cgrp2_sock2.sh3
-rw-r--r--samples/bpf/test_overhead_raw_tp_kern.c17
-rw-r--r--samples/bpf/test_overhead_user.c12
-rwxr-xr-xsamples/bpf/test_override_return.sh15
-rwxr-xr-xsamples/bpf/test_tunnel_bpf.sh131
-rw-r--r--samples/bpf/trace_event_kern.c4
-rw-r--r--samples/bpf/trace_event_user.c15
-rw-r--r--samples/bpf/tracex7_kern.c16
-rw-r--r--samples/bpf/tracex7_user.c28
-rw-r--r--samples/bpf/xdp1_user.c4
-rwxr-xr-xsamples/bpf/xdp2skb_meta.sh220
-rw-r--r--samples/bpf/xdp2skb_meta_kern.c105
-rw-r--r--samples/bpf/xdp_monitor_kern.c96
-rw-r--r--samples/bpf/xdp_monitor_user.c416
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c6
-rw-r--r--samples/bpf/xdp_redirect_map_user.c8
-rw-r--r--samples/bpf/xdp_redirect_user.c15
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c10
-rw-r--r--samples/bpf/xdp_rxq_info_kern.c96
-rw-r--r--samples/bpf/xdp_rxq_info_user.c531
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c6
-rw-r--r--samples/kobject/kobject-example.c4
-rw-r--r--samples/kobject/kset-example.c4
-rw-r--r--samples/kprobes/kprobe_example.c8
-rw-r--r--samples/qmi/Makefile1
-rw-r--r--samples/qmi/qmi_sample_client.c622
-rw-r--r--samples/seccomp/Makefile10
-rw-r--r--samples/sockmap/Makefile2
-rw-r--r--samples/sockmap/sockmap_kern.c239
-rwxr-xr-xsamples/sockmap/sockmap_test.sh488
-rw-r--r--samples/sockmap/sockmap_user.c702
-rw-r--r--samples/vfio-mdev/mtty.c2
45 files changed, 4403 insertions, 402 deletions
diff --git a/samples/Kconfig b/samples/Kconfig
index c332a3b..3db002b 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -62,6 +62,16 @@ config SAMPLE_KDB
Build an example of how to dynamically add the hello
command to the kdb shell.
+config SAMPLE_QMI_CLIENT
+ tristate "Build qmi client sample -- loadable modules only"
+ depends on m
+ depends on ARCH_QCOM
+ depends on NET
+ select QCOM_QMI_HELPERS
+ help
+ Build an QMI client sample driver, which demonstrates how to
+ communicate with a remote QRTR service, using QMI encoded messages.
+
config SAMPLE_RPMSG_CLIENT
tristate "Build rpmsg client sample -- loadable modules only"
depends on RPMSG && m
@@ -98,12 +108,6 @@ config SAMPLE_SECCOMP
Build samples of seccomp filters using various methods of
BPF filter construction.
-config SAMPLE_BLACKFIN_GPTIMERS
- tristate "Build blackfin gptimers sample code -- loadable modules only"
- depends on BLACKFIN && BFIN_GPTIMERS && m
- help
- Build samples of blackfin gptimers sample module.
-
config SAMPLE_VFIO_MDEV_MTTY
tristate "Build VFIO mtty example mediated device sample code -- loadable modules only"
depends on VFIO_MDEV_DEVICE && m
diff --git a/samples/Makefile b/samples/Makefile
index db54e76..bd601c0 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -2,5 +2,5 @@
obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \
hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
- configfs/ connector/ v4l/ trace_printk/ blackfin/ \
- vfio-mdev/ statx/
+ configfs/ connector/ v4l/ trace_printk/ \
+ vfio-mdev/ statx/ qmi/
diff --git a/samples/blackfin/Makefile b/samples/blackfin/Makefile
deleted file mode 100644
index 89b86cf..0000000
--- a/samples/blackfin/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_SAMPLE_BLACKFIN_GPTIMERS) += gptimers-example.o
diff --git a/samples/blackfin/gptimers-example.c b/samples/blackfin/gptimers-example.c
deleted file mode 100644
index 283eba9..0000000
--- a/samples/blackfin/gptimers-example.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Simple gptimers example
- * http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:drivers:gptimers
- *
- * Copyright 2007-2009 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-
-#include <asm/gptimers.h>
-#include <asm/portmux.h>
-
-/* ... random driver includes ... */
-
-#define DRIVER_NAME "gptimer_example"
-
-#ifdef IRQ_TIMER5
-#define SAMPLE_IRQ_TIMER IRQ_TIMER5
-#else
-#define SAMPLE_IRQ_TIMER IRQ_TIMER2
-#endif
-
-struct gptimer_data {
- uint32_t period, width;
-};
-static struct gptimer_data data;
-
-/* ... random driver state ... */
-
-static irqreturn_t gptimer_example_irq(int irq, void *dev_id)
-{
- struct gptimer_data *data = dev_id;
-
- /* make sure it was our timer which caused the interrupt */
- if (!get_gptimer_intr(TIMER5_id))
- return IRQ_NONE;
-
- /* read the width/period values that were captured for the waveform */
- data->width = get_gptimer_pwidth(TIMER5_id);
- data->period = get_gptimer_period(TIMER5_id);
-
- /* acknowledge the interrupt */
- clear_gptimer_intr(TIMER5_id);
-
- /* tell the upper layers we took care of things */
- return IRQ_HANDLED;
-}
-
-/* ... random driver code ... */
-
-static int __init gptimer_example_init(void)
-{
- int ret;
-
- /* grab the peripheral pins */
- ret = peripheral_request(P_TMR5, DRIVER_NAME);
- if (ret) {
- printk(KERN_NOTICE DRIVER_NAME ": peripheral request failed\n");
- return ret;
- }
-
- /* grab the IRQ for the timer */
- ret = request_irq(SAMPLE_IRQ_TIMER, gptimer_example_irq,
- IRQF_SHARED, DRIVER_NAME, &data);
- if (ret) {
- printk(KERN_NOTICE DRIVER_NAME ": IRQ request failed\n");
- peripheral_free(P_TMR5);
- return ret;
- }
-
- /* setup the timer and enable it */
- set_gptimer_config(TIMER5_id,
- WDTH_CAP | PULSE_HI | PERIOD_CNT | IRQ_ENA);
- enable_gptimers(TIMER5bit);
-
- return 0;
-}
-module_init(gptimer_example_init);
-
-static void __exit gptimer_example_exit(void)
-{
- disable_gptimers(TIMER5bit);
- free_irq(SAMPLE_IRQ_TIMER, &data);
- peripheral_free(P_TMR5);
-}
-module_exit(gptimer_example_exit);
-
-MODULE_LICENSE("BSD");
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index adeaa13..4d6a6ed 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -12,6 +12,7 @@ hostprogs-y += tracex3
hostprogs-y += tracex4
hostprogs-y += tracex5
hostprogs-y += tracex6
+hostprogs-y += tracex7
hostprogs-y += test_probe_write_user
hostprogs-y += trace_output
hostprogs-y += lathist
@@ -40,10 +41,12 @@ hostprogs-y += xdp_redirect
hostprogs-y += xdp_redirect_map
hostprogs-y += xdp_redirect_cpu
hostprogs-y += xdp_monitor
+hostprogs-y += xdp_rxq_info
hostprogs-y += syscall_tp
+hostprogs-y += cpustat
# Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
+LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
test_lru_dist-objs := test_lru_dist.o $(LIBBPF)
@@ -58,6 +61,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o
tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o
tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o
tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o
+tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o
load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
@@ -88,7 +92,9 @@ xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
+xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
+cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -101,6 +107,7 @@ always += tracex3_kern.o
always += tracex4_kern.o
always += tracex5_kern.o
always += tracex6_kern.o
+always += tracex7_kern.o
always += sock_flags_kern.o
always += test_probe_write_user_kern.o
always += trace_output_kern.o
@@ -112,6 +119,7 @@ always += offwaketime_kern.o
always += spintest_kern.o
always += map_perf_test_kern.o
always += test_overhead_tp_kern.o
+always += test_overhead_raw_tp_kern.o
always += test_overhead_kprobe_kern.o
always += parse_varlen.o parse_simple.o parse_ldabs.o
always += test_cgrp2_tc_kern.o
@@ -136,7 +144,10 @@ always += xdp_redirect_kern.o
always += xdp_redirect_map_kern.o
always += xdp_redirect_cpu_kern.o
always += xdp_monitor_kern.o
+always += xdp_rxq_info_kern.o
+always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o
+always += cpustat_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -155,6 +166,7 @@ HOSTLOADLIBES_tracex3 += -lelf
HOSTLOADLIBES_tracex4 += -lelf -lrt
HOSTLOADLIBES_tracex5 += -lelf
HOSTLOADLIBES_tracex6 += -lelf
+HOSTLOADLIBES_tracex7 += -lelf
HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
HOSTLOADLIBES_load_sock_ops += -lelf
HOSTLOADLIBES_test_probe_write_user += -lelf
@@ -178,7 +190,9 @@ HOSTLOADLIBES_xdp_redirect += -lelf
HOSTLOADLIBES_xdp_redirect_map += -lelf
HOSTLOADLIBES_xdp_redirect_cpu += -lelf
HOSTLOADLIBES_xdp_monitor += -lelf
+HOSTLOADLIBES_xdp_rxq_info += -lelf
HOSTLOADLIBES_syscall_tp += -lelf
+HOSTLOADLIBES_cpustat += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
@@ -192,13 +206,16 @@ CLANG_ARCH_ARGS = -target $(ARCH)
endif
# Trick to allow make to be run from this directory
-all:
+all: $(LIBBPF)
$(MAKE) -C ../../ $(CURDIR)/
clean:
$(MAKE) -C ../../ M=$(CURDIR) clean
@rm -f *~
+$(LIBBPF): FORCE
+ $(MAKE) -C $(dir $@) $(notdir $@)
+
$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c
$(call if_changed_dep,cc_s_c)
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 242631a..bebe418 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -61,12 +61,14 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
+ bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0;
bool is_xdp = strncmp(event, "xdp", 3) == 0;
bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
bool is_sockops = strncmp(event, "sockops", 7) == 0;
bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
+ bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0;
size_t insns_cnt = size / sizeof(struct bpf_insn);
enum bpf_prog_type prog_type;
char buf[256];
@@ -84,6 +86,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_KPROBE;
} else if (is_tracepoint) {
prog_type = BPF_PROG_TYPE_TRACEPOINT;
+ } else if (is_raw_tracepoint) {
+ prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT;
} else if (is_xdp) {
prog_type = BPF_PROG_TYPE_XDP;
} else if (is_perf_event) {
@@ -96,6 +100,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_SOCK_OPS;
} else if (is_sk_skb) {
prog_type = BPF_PROG_TYPE_SK_SKB;
+ } else if (is_sk_msg) {
+ prog_type = BPF_PROG_TYPE_SK_MSG;
} else {
printf("Unknown event '%s'\n", event);
return -1;
@@ -113,7 +119,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
return 0;
- if (is_socket || is_sockops || is_sk_skb) {
+ if (is_socket || is_sockops || is_sk_skb || is_sk_msg) {
if (is_socket)
event += 6;
else
@@ -128,6 +134,16 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return populate_prog_array(event, fd);
}
+ if (is_raw_tracepoint) {
+ efd = bpf_raw_tracepoint_open(event + 15, fd);
+ if (efd < 0) {
+ printf("tracepoint %s %s\n", event + 15, strerror(errno));
+ return -1;
+ }
+ event_fd[prog_cnt - 1] = efd;
+ return 0;
+ }
+
if (is_kprobe || is_kretprobe) {
if (is_kprobe)
event += 7;
@@ -584,12 +600,14 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
if (memcmp(shname, "kprobe/", 7) == 0 ||
memcmp(shname, "kretprobe/", 10) == 0 ||
memcmp(shname, "tracepoint/", 11) == 0 ||
+ memcmp(shname, "raw_tracepoint/", 15) == 0 ||
memcmp(shname, "xdp", 3) == 0 ||
memcmp(shname, "perf_event", 10) == 0 ||
memcmp(shname, "socket", 6) == 0 ||
memcmp(shname, "cgroup/", 7) == 0 ||
memcmp(shname, "sockops", 7) == 0 ||
- memcmp(shname, "sk_skb", 6) == 0) {
+ memcmp(shname, "sk_skb", 6) == 0 ||
+ memcmp(shname, "sk_msg", 6) == 0) {
ret = load_and_attach(shname, data->d_buf,
data->d_size);
if (ret != 0)
@@ -695,105 +713,3 @@ struct ksym *ksym_search(long key)
return &syms[0];
}
-int set_link_xdp_fd(int ifindex, int fd, __u32 flags)
-{
- struct sockaddr_nl sa;
- int sock, seq = 0, len, ret = -1;
- char buf[4096];
- struct nlattr *nla, *nla_xdp;
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifinfo;
- char attrbuf[64];
- } req;
- struct nlmsghdr *nh;
- struct nlmsgerr *err;
-
- memset(&sa, 0, sizeof(sa));
- sa.nl_family = AF_NETLINK;
-
- sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
- if (sock < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
- }
-
- if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- memset(&req, 0, sizeof(req));
- req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
- req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
- req.nh.nlmsg_type = RTM_SETLINK;
- req.nh.nlmsg_pid = 0;
- req.nh.nlmsg_seq = ++seq;
- req.ifinfo.ifi_family = AF_UNSPEC;
- req.ifinfo.ifi_index = ifindex;
-
- /* started nested attribute for XDP */
- nla = (struct nlattr *)(((char *)&req)
- + NLMSG_ALIGN(req.nh.nlmsg_len));
- nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
- nla->nla_len = NLA_HDRLEN;
-
- /* add XDP fd */
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
- nla->nla_len += nla_xdp->nla_len;
-
- /* if user passed in any flags, add those too */
- if (flags) {
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
- nla->nla_len += nla_xdp->nla_len;
- }
-
- req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
- if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
- printf("send to netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- len = recv(sock, buf, sizeof(buf), 0);
- if (len < 0) {
- printf("recv from netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
- nh = NLMSG_NEXT(nh, len)) {
- if (nh->nlmsg_pid != getpid()) {
- printf("Wrong pid %d, expected %d\n",
- nh->nlmsg_pid, getpid());
- goto cleanup;
- }
- if (nh->nlmsg_seq != seq) {
- printf("Wrong seq %d, expected %d\n",
- nh->nlmsg_seq, seq);
- goto cleanup;
- }
- switch (nh->nlmsg_type) {
- case NLMSG_ERROR:
- err = (struct nlmsgerr *)NLMSG_DATA(nh);
- if (!err->error)
- continue;
- printf("nlmsg error %s\n", strerror(-err->error));
- goto cleanup;
- case NLMSG_DONE:
- break;
- }
- }
-
- ret = 0;
-
-cleanup:
- close(sock);
- return ret;
-}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 7d57a42..453c200 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -61,5 +61,5 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
-int set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
#endif
diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c
index 9d751e2..8eca27e 100644
--- a/samples/bpf/cookie_uid_helper_example.c
+++ b/samples/bpf/cookie_uid_helper_example.c
@@ -246,7 +246,7 @@ static void udp_client(void)
recv_len = recvfrom(s_rcv, &buf, sizeof(buf), 0,
(struct sockaddr *)&si_me, &slen);
if (recv_len < 0)
- error(1, errno, "revieve\n");
+ error(1, errno, "receive\n");
res = memcmp(&(si_other.sin_addr), &(si_me.sin_addr),
sizeof(si_me.sin_addr));
if (res != 0)
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
new file mode 100644
index 0000000..68c84da
--- /dev/null
+++ b/samples/bpf/cpustat_kern.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/*
+ * The CPU number, cstate number and pstate number are based
+ * on 96boards Hikey with octa CA53 CPUs.
+ *
+ * Every CPU have three idle states for cstate:
+ * WFI, CPU_OFF, CLUSTER_OFF
+ *
+ * Every CPU have 5 operating points:
+ * 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
+ *
+ * This code is based on these assumption and other platforms
+ * need to adjust these definitions.
+ */
+#define MAX_CPU 8
+#define MAX_PSTATE_ENTRIES 5
+#define MAX_CSTATE_ENTRIES 3
+
+static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
+
+/*
+ * my_map structure is used to record cstate and pstate index and
+ * timestamp (Idx, Ts), when new event incoming we need to update
+ * combination for new state index and timestamp (Idx`, Ts`).
+ *
+ * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
+ * interval for the previous state: Duration(Idx) = Ts` - Ts.
+ *
+ * Every CPU has one below array for recording state index and
+ * timestamp, and record for cstate and pstate saperately:
+ *
+ * +--------------------------+
+ * | cstate timestamp |
+ * +--------------------------+
+ * | cstate index |
+ * +--------------------------+
+ * | pstate timestamp |
+ * +--------------------------+
+ * | pstate index |
+ * +--------------------------+
+ */
+#define MAP_OFF_CSTATE_TIME 0
+#define MAP_OFF_CSTATE_IDX 1
+#define MAP_OFF_PSTATE_TIME 2
+#define MAP_OFF_PSTATE_IDX 3
+#define MAP_OFF_NUM 4
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU * MAP_OFF_NUM,
+};
+
+/* cstate_duration records duration time for every idle state per CPU */
+struct bpf_map_def SEC("maps") cstate_duration = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
+};
+
+/* pstate_duration records duration time for every operating point per CPU */
+struct bpf_map_def SEC("maps") pstate_duration = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
+};
+
+/*
+ * The trace events for cpu_idle and cpu_frequency are taken from:
+ * /sys/kernel/debug/tracing/events/power/cpu_idle/format
+ * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
+ *
+ * These two events have same format, so define one common structure.
+ */
+struct cpu_args {
+ u64 pad;
+ u32 state;
+ u32 cpu_id;
+};
+
+/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
+static u32 find_cpu_pstate_idx(u32 frequency)
+{
+ u32 i;
+
+ for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
+ if (frequency == cpu_opps[i])
+ return i;
+ }
+
+ return i;
+}
+
+SEC("tracepoint/power/cpu_idle")
+int bpf_prog1(struct cpu_args *ctx)
+{
+ u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+ u32 key, cpu, pstate_idx;
+ u64 *val;
+
+ if (ctx->cpu_id > MAX_CPU)
+ return 0;
+
+ cpu = ctx->cpu_id;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
+ cts = bpf_map_lookup_elem(&my_map, &key);
+ if (!cts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+ cstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!cstate)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+ pts = bpf_map_lookup_elem(&my_map, &key);
+ if (!pts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+ pstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!pstate)
+ return 0;
+
+ prev_state = *cstate;
+ *cstate = ctx->state;
+
+ if (!*cts) {
+ *cts = bpf_ktime_get_ns();
+ return 0;
+ }
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = cur_ts - *cts;
+ *cts = cur_ts;
+
+ /*
+ * When state doesn't equal to (u32)-1, the cpu will enter
+ * one idle state; for this case we need to record interval
+ * for the pstate.
+ *
+ * OPP2
+ * +---------------------+
+ * OPP1 | |
+ * ---------+ |
+ * | Idle state
+ * +---------------
+ *
+ * |<- pstate duration ->|
+ * ^ ^
+ * pts cur_ts
+ */
+ if (ctx->state != (u32)-1) {
+
+ /* record pstate after have first cpu_frequency event */
+ if (!*pts)
+ return 0;
+
+ delta = cur_ts - *pts;
+
+ pstate_idx = find_cpu_pstate_idx(*pstate);
+ if (pstate_idx >= MAX_PSTATE_ENTRIES)
+ return 0;
+
+ key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+ val = bpf_map_lookup_elem(&pstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+
+ /*
+ * When state equal to (u32)-1, the cpu just exits from one
+ * specific idle state; for this case we need to record
+ * interval for the pstate.
+ *
+ * OPP2
+ * -----------+
+ * | OPP1
+ * | +-----------
+ * | Idle state |
+ * +---------------------+
+ *
+ * |<- cstate duration ->|
+ * ^ ^
+ * cts cur_ts
+ */
+ } else {
+
+ key = cpu * MAX_CSTATE_ENTRIES + prev_state;
+ val = bpf_map_lookup_elem(&cstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+ }
+
+ /* Update timestamp for pstate as new start time */
+ if (*pts)
+ *pts = cur_ts;
+
+ return 0;
+}
+
+SEC("tracepoint/power/cpu_frequency")
+int bpf_prog2(struct cpu_args *ctx)
+{
+ u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+ u32 key, cpu, pstate_idx;
+ u64 *val;
+
+ cpu = ctx->cpu_id;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+ pts = bpf_map_lookup_elem(&my_map, &key);
+ if (!pts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+ pstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!pstate)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+ cstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!cstate)
+ return 0;
+
+ prev_state = *pstate;
+ *pstate = ctx->state;
+
+ if (!*pts) {
+ *pts = bpf_ktime_get_ns();
+ return 0;
+ }
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = cur_ts - *pts;
+ *pts = cur_ts;
+
+ /* When CPU is in idle, bail out to skip pstate statistics */
+ if (*cstate != (u32)(-1))
+ return 0;
+
+ /*
+ * The cpu changes to another different OPP (in below diagram
+ * change frequency from OPP3 to OPP1), need recording interval
+ * for previous frequency OPP3 and update timestamp as start
+ * time for new frequency OPP1.
+ *
+ * OPP3
+ * +---------------------+
+ * OPP2 | |
+ * ---------+ |
+ * | OPP1
+ * +---------------
+ *
+ * |<- pstate duration ->|
+ * ^ ^
+ * pts cur_ts
+ */
+ pstate_idx = find_cpu_pstate_idx(*pstate);
+ if (pstate_idx >= MAX_PSTATE_ENTRIES)
+ return 0;
+
+ key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+ val = bpf_map_lookup_elem(&pstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
new file mode 100644
index 0000000..2b4cd1a
--- /dev/null
+++ b/samples/bpf/cpustat_user.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sched.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_CPU 8
+#define MAX_PSTATE_ENTRIES 5
+#define MAX_CSTATE_ENTRIES 3
+#define MAX_STARS 40
+
+#define CPUFREQ_MAX_SYSFS_PATH "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
+#define CPUFREQ_LOWEST_FREQ "208000"
+#define CPUFREQ_HIGHEST_FREQ "12000000"
+
+struct cpu_stat_data {
+ unsigned long cstate[MAX_CSTATE_ENTRIES];
+ unsigned long pstate[MAX_PSTATE_ENTRIES];
+};
+
+static struct cpu_stat_data stat_data[MAX_CPU];
+
+static void cpu_stat_print(void)
+{
+ int i, j;
+ char state_str[sizeof("cstate-9")];
+ struct cpu_stat_data *data;
+
+ /* Clear screen */
+ printf("\033[2J");
+
+ /* Header */
+ printf("\nCPU states statistics:\n");
+ printf("%-10s ", "state(ms)");
+
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+ sprintf(state_str, "cstate-%d", i);
+ printf("%-11s ", state_str);
+ }
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+ sprintf(state_str, "pstate-%d", i);
+ printf("%-11s ", state_str);
+ }
+
+ printf("\n");
+
+ for (j = 0; j < MAX_CPU; j++) {
+ data = &stat_data[j];
+
+ printf("CPU-%-6d ", j);
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
+ printf("%-11ld ", data->cstate[i] / 1000000);
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
+ printf("%-11ld ", data->pstate[i] / 1000000);
+
+ printf("\n");
+ }
+}
+
+static void cpu_stat_update(int cstate_fd, int pstate_fd)
+{
+ unsigned long key, value;
+ int c, i;
+
+ for (c = 0; c < MAX_CPU; c++) {
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+ key = c * MAX_CSTATE_ENTRIES + i;
+ bpf_map_lookup_elem(cstate_fd, &key, &value);
+ stat_data[c].cstate[i] = value;
+ }
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+ key = c * MAX_PSTATE_ENTRIES + i;
+ bpf_map_lookup_elem(pstate_fd, &key, &value);
+ stat_data[c].pstate[i] = value;
+ }
+ }
+}
+
+/*
+ * This function is copied from 'idlestat' tool function
+ * idlestat_wake_all() in idlestate.c.
+ *
+ * It sets the self running task affinity to cpus one by one so can wake up
+ * the specific CPU to handle scheduling; this results in all cpus can be
+ * waken up once and produce ftrace event 'trace_cpu_idle'.
+ */
+static int cpu_stat_inject_cpu_idle_event(void)
+{
+ int rcpu, i, ret;
+ cpu_set_t cpumask;
+ cpu_set_t original_cpumask;
+
+ ret = sysconf(_SC_NPROCESSORS_CONF);
+ if (ret < 0)
+ return -1;
+
+ rcpu = sched_getcpu();
+ if (rcpu < 0)
+ return -1;
+
+ /* Keep track of the CPUs we will run on */
+ sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
+
+ for (i = 0; i < ret; i++) {
+
+ /* Pointless to wake up ourself */
+ if (i == rcpu)
+ continue;
+
+ /* Pointless to wake CPUs we will not run on */
+ if (!CPU_ISSET(i, &original_cpumask))
+ continue;
+
+ CPU_ZERO(&cpumask);
+ CPU_SET(i, &cpumask);
+
+ sched_setaffinity(0, sizeof(cpumask), &cpumask);
+ }
+
+ /* Enable all the CPUs of the original mask */
+ sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
+ return 0;
+}
+
+/*
+ * It's possible to have no any frequency change for long time and cannot
+ * get ftrace event 'trace_cpu_frequency' for long period, this introduces
+ * big deviation for pstate statistics.
+ *
+ * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
+ * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
+ * the maximum frequency value 1.2GHz.
+ */
+static int cpu_stat_inject_cpu_frequency_event(void)
+{
+ int len, fd;
+
+ fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
+ if (fd < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ return fd;
+ }
+
+ len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
+ if (len < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ goto err;
+ }
+
+ len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
+ if (len < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ goto err;
+ }
+
+err:
+ close(fd);
+ return len;
+}
+
+static void int_exit(int sig)
+{
+ cpu_stat_inject_cpu_idle_event();
+ cpu_stat_inject_cpu_frequency_event();
+ cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_print();
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ int ret;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ ret = cpu_stat_inject_cpu_idle_event();
+ if (ret < 0)
+ return 1;
+
+ ret = cpu_stat_inject_cpu_frequency_event();
+ if (ret < 0)
+ return 1;
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ while (1) {
+ cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_print();
+ sleep(5);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index 370b749..9a8db7bd 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -15,6 +15,7 @@
#include <uapi/linux/tcp.h>
#include <uapi/linux/filter.h>
#include <uapi/linux/pkt_cls.h>
+#include <uapi/linux/erspan.h>
#include <net/ipv6.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
@@ -39,10 +40,6 @@ struct vxlan_metadata {
u32 gbp;
};
-struct erspan_metadata {
- __be32 index;
-};
-
SEC("gre_set_tunnel")
int _gre_set_tunnel(struct __sk_buff *skb)
{
@@ -55,7 +52,8 @@ int _gre_set_tunnel(struct __sk_buff *skb)
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
@@ -81,6 +79,50 @@ int _gre_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("ip6gretap_set_tunnel")
+int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = _htonl(0x11); /* ::11 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+ key.tunnel_label = 0xabcde;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+ BPF_F_SEQ_NUMBER);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6gretap_get_tunnel")
+int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip6 ::%x label %x\n";
+ struct bpf_tunnel_key key;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+ return TC_ACT_OK;
+}
+
SEC("erspan_set_tunnel")
int _erspan_set_tunnel(struct __sk_buff *skb)
{
@@ -100,7 +142,20 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- md.index = htonl(123);
+ __builtin_memset(&md, 0, sizeof(md));
+#ifdef ERSPAN_V1
+ md.version = 1;
+ md.u.index = bpf_htonl(123);
+#else
+ u8 direction = 1;
+ u8 hwid = 7;
+
+ md.version = 2;
+ md.u.md2.dir = direction;
+ md.u.md2.hwid = hwid & 0xf;
+ md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
ERROR(ret);
@@ -113,7 +168,7 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
SEC("erspan_get_tunnel")
int _erspan_get_tunnel(struct __sk_buff *skb)
{
- char fmt[] = "key %d remote ip 0x%x erspan index 0x%x\n";
+ char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
struct bpf_tunnel_key key;
struct erspan_metadata md;
u32 index;
@@ -131,9 +186,107 @@ int _erspan_get_tunnel(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- index = bpf_ntohl(md.index);
bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, index);
+ key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+ char fmt2[] = "\tindex %x\n";
+
+ index = bpf_ntohl(md.u.index);
+ bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+ char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+ bpf_trace_printk(fmt2, sizeof(fmt2),
+ md.u.md2.dir,
+ (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+ bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+ return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_set_tunnel")
+int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = _htonl(0x11);
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&md, 0, sizeof(md));
+
+#ifdef ERSPAN_V1
+ md.u.index = htonl(123);
+ md.version = 1;
+#else
+ u8 direction = 0;
+ u8 hwid = 17;
+
+ md.version = 2;
+ md.u.md2.dir = direction;
+ md.u.md2.hwid = hwid & 0xf;
+ md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_get_tunnel")
+int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ u32 index;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+ char fmt2[] = "\tindex %x\n";
+
+ index = bpf_ntohl(md.u.index);
+ bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+ char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+ bpf_trace_printk(fmt2, sizeof(fmt2),
+ md.u.md2.dir,
+ (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+ bpf_ntohl(md.u.md2.timestamp));
+#endif
return TC_ACT_OK;
}
diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c
index 3e8232c..1af412e 100644
--- a/samples/bpf/test_cgrp2_attach2.c
+++ b/samples/bpf/test_cgrp2_attach2.c
@@ -78,7 +78,8 @@ static int test_foo_bar(void)
if (join_cgroup(FOO))
goto err;
- if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
log_err("Attaching prog to /foo");
goto err;
}
@@ -97,7 +98,8 @@ static int test_foo_bar(void)
printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0);
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
log_err("Attaching prog to /foo/bar");
goto err;
}
@@ -114,7 +116,8 @@ static int test_foo_bar(void)
"This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0);
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
log_err("Attaching prog to /foo/bar");
goto err;
}
@@ -128,7 +131,8 @@ static int test_foo_bar(void)
"This ping in cgroup /foo/bar should pass...\n");
assert(system(PING_CMD) == 0);
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
log_err("Attaching prog to /foo/bar");
goto err;
}
@@ -161,13 +165,15 @@ static int test_foo_bar(void)
goto err;
}
- if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
errno = 0;
log_err("Unexpected success attaching overridable prog to /foo/bar");
goto err;
}
- if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
errno = 0;
log_err("Unexpected success attaching overridable prog to /foo");
goto err;
@@ -273,27 +279,33 @@ static int test_multiprog(void)
if (join_cgroup("/cg1/cg2/cg3/cg4/cg5"))
goto err;
- if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 2)) {
+ if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI)) {
log_err("Attaching prog to cg1");
goto err;
}
- if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 2)) {
+ if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI)) {
log_err("Unexpected success attaching the same prog to cg1");
goto err;
}
- if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, 2)) {
+ if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI)) {
log_err("Attaching prog2 to cg1");
goto err;
}
- if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
log_err("Attaching prog to cg2");
goto err;
}
- if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, 2)) {
+ if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI)) {
log_err("Attaching prog to cg3");
goto err;
}
- if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
log_err("Attaching prog to cg4");
goto err;
}
diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
index 8ee0371a..9f61742 100755
--- a/samples/bpf/test_cgrp2_sock.sh
+++ b/samples/bpf/test_cgrp2_sock.sh
@@ -61,6 +61,7 @@ cleanup_and_exit()
[ -n "$msg" ] && echo "ERROR: $msg"
+ test_cgrp2_sock -d ${CGRP_MNT}/sockopts
ip li del cgrp2_sock
umount ${CGRP_MNT}
diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh
index fc4e64d..0f396a8 100755
--- a/samples/bpf/test_cgrp2_sock2.sh
+++ b/samples/bpf/test_cgrp2_sock2.sh
@@ -28,6 +28,9 @@ function attach_bpf {
}
function cleanup {
+ if [ -d /tmp/cgroupv2/foo ]; then
+ test_cgrp2_sock -d /tmp/cgroupv2/foo
+ fi
ip link del veth0b
ip netns delete at_ns0
umount /tmp/cgroupv2
diff --git a/samples/bpf/test_overhead_raw_tp_kern.c b/samples/bpf/test_overhead_raw_tp_kern.c
new file mode 100644
index 0000000..d2af8bc
--- /dev/null
+++ b/samples/bpf/test_overhead_raw_tp_kern.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+SEC("raw_tracepoint/task_rename")
+int prog(struct bpf_raw_tracepoint_args *ctx)
+{
+ return 0;
+}
+
+SEC("raw_tracepoint/urandom_read")
+int prog2(struct bpf_raw_tracepoint_args *ctx)
+{
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c
index d291167f..e1d35e0 100644
--- a/samples/bpf/test_overhead_user.c
+++ b/samples/bpf/test_overhead_user.c
@@ -158,5 +158,17 @@ int main(int argc, char **argv)
unload_progs();
}
+ if (test_flags & 0xC0) {
+ snprintf(filename, sizeof(filename),
+ "%s_raw_tp_kern.o", argv[0]);
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+ printf("w/RAW_TRACEPOINT\n");
+ run_perf_test(num_cpu, test_flags >> 6);
+ unload_progs();
+ }
+
return 0;
}
diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh
new file mode 100755
index 0000000..e68b9ee
--- /dev/null
+++ b/samples/bpf/test_override_return.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+rm -f testfile.img
+dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
+DEVICE=$(losetup --show -f testfile.img)
+mkfs.btrfs -f $DEVICE
+mkdir tmpmnt
+./tracex7 $DEVICE
+if [ $? -eq 0 ]
+then
+ echo "SUCCESS!"
+else
+ echo "FAILED!"
+fi
+losetup -d $DEVICE
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
index 312e172..c265863 100755
--- a/samples/bpf/test_tunnel_bpf.sh
+++ b/samples/bpf/test_tunnel_bpf.sh
@@ -23,7 +23,8 @@ function config_device {
function add_gre_tunnel {
# in namespace
ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200
ip netns exec at_ns0 ip link set dev $DEV_NS up
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
@@ -33,10 +34,43 @@ function add_gre_tunnel {
ip addr add dev $DEV 10.1.1.200/24
}
-function add_erspan_tunnel {
+function add_ip6gretap_tunnel {
+
+ # assign ipv6 address
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
# in namespace
ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 local 172.16.1.100 remote 172.16.1.200 erspan 123
+ ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
+ local ::11 remote ::22
+
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # out of namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip addr add dev $DEV fc80::200/24
+ ip link set dev $DEV up
+}
+
+function add_erspan_tunnel {
+ # in namespace
+ if [ "$1" == "v1" ]; then
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 1 erspan 123
+ else
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 2 erspan_dir egress erspan_hwid 3
+ fi
ip netns exec at_ns0 ip link set dev $DEV_NS up
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
@@ -46,6 +80,35 @@ function add_erspan_tunnel {
ip addr add dev $DEV 10.1.1.200/24
}
+function add_ip6erspan_tunnel {
+
+ # assign ipv6 address
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # in namespace
+ if [ "$1" == "v1" ]; then
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local ::11 remote ::22 \
+ erspan_ver 1 erspan 123
+ else
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local ::11 remote ::22 \
+ erspan_ver 2 erspan_dir egress erspan_hwid 7
+ fi
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # out of namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
function add_vxlan_tunnel {
# Set static ARP entry here because iptables set-mark works
# on L3 packet, as a result not applying to ARP packets,
@@ -113,18 +176,65 @@ function test_gre {
cleanup
}
+function test_ip6gre {
+ TYPE=ip6gre
+ DEV_NS=ip6gre00
+ DEV=ip6gre11
+ config_device
+ # reuse the ip6gretap function
+ add_ip6gretap_tunnel
+ attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+ # underlay
+ ping6 -c 4 ::11
+ # overlay: ipv4 over ipv6
+ ip netns exec at_ns0 ping -c 1 10.1.1.200
+ ping -c 1 10.1.1.100
+ # overlay: ipv6 over ipv6
+ ip netns exec at_ns0 ping6 -c 1 fc80::200
+ cleanup
+}
+
+function test_ip6gretap {
+ TYPE=ip6gretap
+ DEV_NS=ip6gretap00
+ DEV=ip6gretap11
+ config_device
+ add_ip6gretap_tunnel
+ attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+ # underlay
+ ping6 -c 4 ::11
+ # overlay: ipv4 over ipv6
+ ip netns exec at_ns0 ping -i .2 -c 1 10.1.1.200
+ ping -c 1 10.1.1.100
+ # overlay: ipv6 over ipv6
+ ip netns exec at_ns0 ping6 -c 1 fc80::200
+ cleanup
+}
+
function test_erspan {
TYPE=erspan
DEV_NS=erspan00
DEV=erspan11
config_device
- add_erspan_tunnel
+ add_erspan_tunnel $1
attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
ping -c 1 10.1.1.100
ip netns exec at_ns0 ping -c 1 10.1.1.200
cleanup
}
+function test_ip6erspan {
+ TYPE=ip6erspan
+ DEV_NS=ip6erspan00
+ DEV=ip6erspan11
+ config_device
+ add_ip6erspan_tunnel $1
+ attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
+ ping6 -c 3 ::11
+ ip netns exec at_ns0 ping -c 1 10.1.1.200
+ cleanup
+}
+
function test_vxlan {
TYPE=vxlan
DEV_NS=vxlan00
@@ -175,9 +285,12 @@ function cleanup {
ip link del veth1
ip link del ipip11
ip link del gretap11
+ ip link del ip6gre11
+ ip link del ip6gretap11
ip link del vxlan11
ip link del geneve11
ip link del erspan11
+ ip link del ip6erspan11
pkill tcpdump
pkill cat
set -ex
@@ -187,8 +300,16 @@ trap cleanup 0 2 3 6 9
cleanup
echo "Testing GRE tunnel..."
test_gre
+echo "Testing IP6GRE tunnel..."
+test_ip6gre
+echo "Testing IP6GRETAP tunnel..."
+test_ip6gretap
echo "Testing ERSPAN tunnel..."
-test_erspan
+test_erspan v1
+test_erspan v2
+echo "Testing IP6ERSPAN tunnel..."
+test_ip6erspan v1
+test_ip6erspan v2
echo "Testing VXLAN tunnel..."
test_vxlan
echo "Testing GENEVE tunnel..."
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index a77a583d..7068fbd 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -39,6 +39,7 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
{
char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu";
char time_fmt2[] = "Get Time Failed, ErrCode: %d";
+ char addr_fmt[] = "Address recorded on event: %llx";
char fmt[] = "CPU-%d period %lld ip %llx";
u32 cpu = bpf_get_smp_processor_id();
struct bpf_perf_event_value value_buf;
@@ -64,6 +65,9 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
else
bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret);
+ if (ctx->addr != 0)
+ bpf_trace_printk(addr_fmt, sizeof(addr_fmt), ctx->addr);
+
val = bpf_map_lookup_elem(&counts, &key);
if (val)
(*val)++;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index bf4f1b6..56f7a25 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -215,6 +215,17 @@ static void test_bpf_perf_event(void)
/* Intel Instruction Retired */
.config = 0xc0,
};
+ struct perf_event_attr attr_type_raw_lock_load = {
+ .sample_freq = SAMPLE_FREQ,
+ .freq = 1,
+ .type = PERF_TYPE_RAW,
+ /* Intel MEM_UOPS_RETIRED.LOCK_LOADS */
+ .config = 0x21d0,
+ /* Request to record lock address from PEBS */
+ .sample_type = PERF_SAMPLE_ADDR,
+ /* Record address value requires precise event */
+ .precise_ip = 2,
+ };
printf("Test HW_CPU_CYCLES\n");
test_perf_event_all_cpu(&attr_type_hw);
@@ -236,6 +247,10 @@ static void test_bpf_perf_event(void)
test_perf_event_all_cpu(&attr_type_raw);
test_perf_event_task(&attr_type_raw);
+ printf("Test Lock Load\n");
+ test_perf_event_all_cpu(&attr_type_raw_lock_load);
+ test_perf_event_task(&attr_type_raw_lock_load);
+
printf("*** PASS ***\n");
}
diff --git a/samples/bpf/tracex7_kern.c b/samples/bpf/tracex7_kern.c
new file mode 100644
index 0000000..1ab308a
--- /dev/null
+++ b/samples/bpf/tracex7_kern.c
@@ -0,0 +1,16 @@
+#include <uapi/linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+SEC("kprobe/open_ctree")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ unsigned long rc = -12;
+
+ bpf_override_return(ctx, rc);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c
new file mode 100644
index 0000000..8a52ac4
--- /dev/null
+++ b/samples/bpf/tracex7_user.c
@@ -0,0 +1,28 @@
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+int main(int argc, char **argv)
+{
+ FILE *f;
+ char filename[256];
+ char command[256];
+ int ret;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ snprintf(command, 256, "mount %s tmpmnt/", argv[1]);
+ f = popen(command, "r");
+ ret = pclose(f);
+
+ return ret ? 0 : 1;
+}
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index fdaefe9..b901ee2 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -25,7 +25,7 @@ static __u32 xdp_flags;
static void int_exit(int sig)
{
- set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
exit(0);
}
@@ -116,7 +116,7 @@ int main(int argc, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
diff --git a/samples/bpf/xdp2skb_meta.sh b/samples/bpf/xdp2skb_meta.sh
new file mode 100755
index 0000000..b9c9549
--- /dev/null
+++ b/samples/bpf/xdp2skb_meta.sh
@@ -0,0 +1,220 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
+#
+# Bash-shell example on using iproute2 tools 'tc' and 'ip' to load
+# eBPF programs, both for XDP and clsbpf. Shell script function
+# wrappers and even long options parsing is illustrated, for ease of
+# use.
+#
+# Related to sample/bpf/xdp2skb_meta_kern.c, which contains BPF-progs
+# that need to collaborate between XDP and TC hooks. Thus, it is
+# convenient that the same tool load both programs that need to work
+# together.
+#
+BPF_FILE=xdp2skb_meta_kern.o
+DIR=$(dirname $0)
+
+export TC=/usr/sbin/tc
+export IP=/usr/sbin/ip
+
+function usage() {
+ echo ""
+ echo "Usage: $0 [-vfh] --dev ethX"
+ echo " -d | --dev : Network device (required)"
+ echo " --flush : Cleanup flush TC and XDP progs"
+ echo " --list : (\$LIST) List TC and XDP progs"
+ echo " -v | --verbose : (\$VERBOSE) Verbose"
+ echo " --dry-run : (\$DRYRUN) Dry-run only (echo commands)"
+ echo ""
+}
+
+## -- General shell logging cmds --
+function err() {
+ local exitcode=$1
+ shift
+ echo "ERROR: $@" >&2
+ exit $exitcode
+}
+
+function info() {
+ if [[ -n "$VERBOSE" ]]; then
+ echo "# $@"
+ fi
+}
+
+## -- Helper function calls --
+
+# Wrapper call for TC and IP
+# - Will display the offending command on failure
+function _call_cmd() {
+ local cmd="$1"
+ local allow_fail="$2"
+ shift 2
+ if [[ -n "$VERBOSE" ]]; then
+ echo "$(basename $cmd) $@"
+ fi
+ if [[ -n "$DRYRUN" ]]; then
+ return
+ fi
+ $cmd "$@"
+ local status=$?
+ if (( $status != 0 )); then
+ if [[ "$allow_fail" == "" ]]; then
+ err 2 "Exec error($status) occurred cmd: \"$cmd $@\""
+ fi
+ fi
+}
+function call_tc() {
+ _call_cmd "$TC" "" "$@"
+}
+function call_tc_allow_fail() {
+ _call_cmd "$TC" "allow_fail" "$@"
+}
+function call_ip() {
+ _call_cmd "$IP" "" "$@"
+}
+
+## --- Parse command line arguments / parameters ---
+# Using external program "getopt" to get --long-options
+OPTIONS=$(getopt -o vfhd: \
+ --long verbose,flush,help,list,dev:,dry-run -- "$@")
+if (( $? != 0 )); then
+ err 4 "Error calling getopt"
+fi
+eval set -- "$OPTIONS"
+
+unset DEV
+unset FLUSH
+while true; do
+ case "$1" in
+ -d | --dev ) # device
+ DEV=$2
+ info "Device set to: DEV=$DEV" >&2
+ shift 2
+ ;;
+ -v | --verbose)
+ VERBOSE=yes
+ # info "Verbose mode: VERBOSE=$VERBOSE" >&2
+ shift
+ ;;
+ --dry-run )
+ DRYRUN=yes
+ VERBOSE=yes
+ info "Dry-run mode: enable VERBOSE and don't call TC+IP" >&2
+ shift
+ ;;
+ -f | --flush )
+ FLUSH=yes
+ shift
+ ;;
+ --list )
+ LIST=yes
+ shift
+ ;;
+ -- )
+ shift
+ break
+ ;;
+ -h | --help )
+ usage;
+ exit 0
+ ;;
+ * )
+ shift
+ break
+ ;;
+ esac
+done
+
+FILE="$DIR/$BPF_FILE"
+if [[ ! -e $FILE ]]; then
+ err 3 "Missing BPF object file ($FILE)"
+fi
+
+if [[ -z $DEV ]]; then
+ usage
+ err 2 "Please specify network device -- required option --dev"
+fi
+
+## -- Function calls --
+
+function list_tc()
+{
+ local device="$1"
+ shift
+ info "Listing current TC ingress rules"
+ call_tc filter show dev $device ingress
+}
+
+function list_xdp()
+{
+ local device="$1"
+ shift
+ info "Listing current XDP device($device) setting"
+ call_ip link show dev $device | grep --color=auto xdp
+}
+
+function flush_tc()
+{
+ local device="$1"
+ shift
+ info "Flush TC on device: $device"
+ call_tc_allow_fail filter del dev $device ingress
+ call_tc_allow_fail qdisc del dev $device clsact
+}
+
+function flush_xdp()
+{
+ local device="$1"
+ shift
+ info "Flush XDP on device: $device"
+ call_ip link set dev $device xdp off
+}
+
+function attach_tc_mark()
+{
+ local device="$1"
+ local file="$2"
+ local prog="tc_mark"
+ shift 2
+
+ # Re-attach clsact to clear/flush existing role
+ call_tc_allow_fail qdisc del dev $device clsact 2> /dev/null
+ call_tc qdisc add dev $device clsact
+
+ # Attach BPF prog
+ call_tc filter add dev $device ingress \
+ prio 1 handle 1 bpf da obj $file sec $prog
+}
+
+function attach_xdp_mark()
+{
+ local device="$1"
+ local file="$2"
+ local prog="xdp_mark"
+ shift 2
+
+ # Remove XDP prog in-case it's already loaded
+ # TODO: Need ip-link option to override/replace existing XDP prog
+ flush_xdp $device
+
+ # Attach XDP/BPF prog
+ call_ip link set dev $device xdp obj $file sec $prog
+}
+
+if [[ -n $FLUSH ]]; then
+ flush_tc $DEV
+ flush_xdp $DEV
+ exit 0
+fi
+
+if [[ -n $LIST ]]; then
+ list_tc $DEV
+ list_xdp $DEV
+ exit 0
+fi
+
+attach_tc_mark $DEV $FILE
+attach_xdp_mark $DEV $FILE
diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c
new file mode 100644
index 0000000..0c12048
--- /dev/null
+++ b/samples/bpf/xdp2skb_meta_kern.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
+ *
+ * Example howto transfer info from XDP to SKB, e.g. skb->mark
+ * -----------------------------------------------------------
+ * This uses the XDP data_meta infrastructure, and is a cooperation
+ * between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook.
+ *
+ * Notice: This example does not use the BPF C-loader (bpf_load.c),
+ * but instead rely on the iproute2 TC tool for loading BPF-objects.
+ */
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/pkt_cls.h>
+
+#include "bpf_helpers.h"
+
+/*
+ * This struct is stored in the XDP 'data_meta' area, which is located
+ * just in-front-of the raw packet payload data. The meaning is
+ * specific to these two BPF programs that use it as a communication
+ * channel. XDP adjust/increase the area via a bpf-helper, and TC use
+ * boundary checks to see if data have been provided.
+ *
+ * The struct must be 4 byte aligned, which here is enforced by the
+ * struct __attribute__((aligned(4))).
+ */
+struct meta_info {
+ __u32 mark;
+} __attribute__((aligned(4)));
+
+SEC("xdp_mark")
+int _xdp_mark(struct xdp_md *ctx)
+{
+ struct meta_info *meta;
+ void *data, *data_end;
+ int ret;
+
+ /* Reserve space in-front of data pointer for our meta info.
+ * (Notice drivers not supporting data_meta will fail here!)
+ */
+ ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta));
+ if (ret < 0)
+ return XDP_ABORTED;
+
+ /* Notice: Kernel-side verifier requires that loading of
+ * ctx->data MUST happen _after_ helper bpf_xdp_adjust_meta(),
+ * as pkt-data pointers are invalidated. Helpers that require
+ * this are determined/marked by bpf_helper_changes_pkt_data()
+ */
+ data = (void *)(unsigned long)ctx->data;
+
+ /* Check data_meta have room for meta_info struct */
+ meta = (void *)(unsigned long)ctx->data_meta;
+ if (meta + 1 > data)
+ return XDP_ABORTED;
+
+ meta->mark = 42;
+
+ return XDP_PASS;
+}
+
+SEC("tc_mark")
+int _tc_mark(struct __sk_buff *ctx)
+{
+ void *data = (void *)(unsigned long)ctx->data;
+ void *data_end = (void *)(unsigned long)ctx->data_end;
+ void *data_meta = (void *)(unsigned long)ctx->data_meta;
+ struct meta_info *meta = data_meta;
+
+ /* Check XDP gave us some data_meta */
+ if (meta + 1 > data) {
+ ctx->mark = 41;
+ /* Skip "accept" if no data_meta is avail */
+ return TC_ACT_OK;
+ }
+
+ /* Hint: See func tc_cls_act_is_valid_access() for BPF_WRITE access */
+ ctx->mark = meta->mark; /* Transfer XDP-mark to SKB-mark */
+
+ return TC_ACT_OK;
+}
+
+/* Manually attaching these programs:
+export DEV=ixgbe2
+export FILE=xdp2skb_meta_kern.o
+
+# via TC command
+tc qdisc del dev $DEV clsact 2> /dev/null
+tc qdisc add dev $DEV clsact
+tc filter add dev $DEV ingress prio 1 handle 1 bpf da obj $FILE sec tc_mark
+tc filter show dev $DEV ingress
+
+# XDP via IP command:
+ip link set dev $DEV xdp off
+ip link set dev $DEV xdp obj $FILE sec xdp_mark
+
+# Use iptable to "see" if SKBs are marked
+iptables -I INPUT -p icmp -m mark --mark 41 # == 0x29
+iptables -I INPUT -p icmp -m mark --mark 42 # == 0x2a
+
+# Hint: catch XDP_ABORTED errors via
+perf record -e xdp:*
+perf script
+
+*/
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index 2fe2f76..211db8d 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -1,6 +1,7 @@
-/* XDP monitor tool, based on tracepoints
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
*
- * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * XDP monitor tool, based on tracepoints
*/
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
@@ -104,7 +105,7 @@ struct xdp_exception_ctx {
SEC("tracepoint/xdp/xdp_exception")
int trace_xdp_exception(struct xdp_exception_ctx *ctx)
{
- u64 *cnt;;
+ u64 *cnt;
u32 key;
key = ctx->act;
@@ -118,3 +119,92 @@ int trace_xdp_exception(struct xdp_exception_ctx *ctx)
return 0;
}
+
+/* Common stats data record shared with _user.c */
+struct datarec {
+ u64 processed;
+ u64 dropped;
+ u64 info;
+};
+#define MAX_CPUS 64
+
+struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = MAX_CPUS,
+};
+
+struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = 1,
+};
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct cpumap_enqueue_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int cpu; // offset:16; size:4; signed:1;
+ unsigned int drops; // offset:20; size:4; signed:0;
+ unsigned int processed; // offset:24; size:4; signed:0;
+ int to_cpu; // offset:28; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_enqueue")
+int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
+{
+ u32 to_cpu = ctx->to_cpu;
+ struct datarec *rec;
+
+ if (to_cpu >= MAX_CPUS)
+ return 1;
+
+ rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
+ if (!rec)
+ return 0;
+ rec->processed += ctx->processed;
+ rec->dropped += ctx->drops;
+
+ /* Record bulk events, then userspace can calc average bulk size */
+ if (ctx->processed > 0)
+ rec->info += 1;
+
+ return 0;
+}
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct cpumap_kthread_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int cpu; // offset:16; size:4; signed:1;
+ unsigned int drops; // offset:20; size:4; signed:0;
+ unsigned int processed; // offset:24; size:4; signed:0;
+ int sched; // offset:28; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_kthread")
+int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
+{
+ struct datarec *rec;
+ u32 key = 0;
+
+ rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
+ if (!rec)
+ return 0;
+ rec->processed += ctx->processed;
+ rec->dropped += ctx->drops;
+
+ /* Count times kthread yielded CPU via schedule call */
+ if (ctx->sched)
+ rec->info++;
+
+ return 0;
+}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index eaba165..eec1452 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -1,4 +1,5 @@
-/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
static const char *__doc__=
"XDP monitor tool, based on tracepoints\n"
@@ -40,6 +41,9 @@ static const struct option long_options[] = {
{0, 0, NULL, 0 }
};
+/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
+#define EXIT_FAIL_MEM 5
+
static void usage(char *argv[])
{
int i;
@@ -108,23 +112,93 @@ static const char *action2str(int action)
return NULL;
}
+/* Common stats data record shared with _kern.c */
+struct datarec {
+ __u64 processed;
+ __u64 dropped;
+ __u64 info;
+};
+#define MAX_CPUS 64
+
+/* Userspace structs for collection of stats from maps */
struct record {
- __u64 counter;
__u64 timestamp;
+ struct datarec total;
+ struct datarec *cpu;
+};
+struct u64rec {
+ __u64 processed;
+};
+struct record_u64 {
+ /* record for _kern side __u64 values */
+ __u64 timestamp;
+ struct u64rec total;
+ struct u64rec *cpu;
};
struct stats_record {
- struct record xdp_redir[REDIR_RES_MAX];
- struct record xdp_exception[XDP_ACTION_MAX];
+ struct record_u64 xdp_redirect[REDIR_RES_MAX];
+ struct record_u64 xdp_exception[XDP_ACTION_MAX];
+ struct record xdp_cpumap_kthread;
+ struct record xdp_cpumap_enqueue[MAX_CPUS];
};
-static void stats_print_headers(bool err_only)
+static bool map_collect_record(int fd, __u32 key, struct record *rec)
{
- if (err_only)
- printf("\n%s\n", __doc_err_only__);
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct datarec values[nr_cpus];
+ __u64 sum_processed = 0;
+ __u64 sum_dropped = 0;
+ __u64 sum_info = 0;
+ int i;
+
+ if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+ fprintf(stderr,
+ "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+ return false;
+ }
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
- printf("%-14s %-11s %-10s %-18s %-9s\n",
- "ACTION", "result", "pps ", "pps-human-readable", "measure-period");
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = values[i].processed;
+ sum_processed += values[i].processed;
+ rec->cpu[i].dropped = values[i].dropped;
+ sum_dropped += values[i].dropped;
+ rec->cpu[i].info = values[i].info;
+ sum_info += values[i].info;
+ }
+ rec->total.processed = sum_processed;
+ rec->total.dropped = sum_dropped;
+ rec->total.info = sum_info;
+ return true;
+}
+
+static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct u64rec values[nr_cpus];
+ __u64 sum_total = 0;
+ int i;
+
+ if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+ fprintf(stderr,
+ "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+ return false;
+ }
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = values[i].processed;
+ sum_total += values[i].processed;
+ }
+ rec->total.processed = sum_total;
+ return true;
}
static double calc_period(struct record *r, struct record *p)
@@ -139,77 +213,203 @@ static double calc_period(struct record *r, struct record *p)
return period_;
}
-static double calc_pps(struct record *r, struct record *p, double period)
+static double calc_period_u64(struct record_u64 *r, struct record_u64 *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double) period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static double calc_pps(struct datarec *r, struct datarec *p, double period)
+{
+ __u64 packets = 0;
+ double pps = 0;
+
+ if (period > 0) {
+ packets = r->processed - p->processed;
+ pps = packets / period;
+ }
+ return pps;
+}
+
+static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period)
+{
+ __u64 packets = 0;
+ double pps = 0;
+
+ if (period > 0) {
+ packets = r->processed - p->processed;
+ pps = packets / period;
+ }
+ return pps;
+}
+
+static double calc_drop(struct datarec *r, struct datarec *p, double period)
+{
+ __u64 packets = 0;
+ double pps = 0;
+
+ if (period > 0) {
+ packets = r->dropped - p->dropped;
+ pps = packets / period;
+ }
+ return pps;
+}
+
+static double calc_info(struct datarec *r, struct datarec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
- packets = r->counter - p->counter;
+ packets = r->info - p->info;
pps = packets / period;
}
return pps;
}
-static void stats_print(struct stats_record *rec,
- struct stats_record *prev,
+static void stats_print(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
bool err_only)
{
- double period = 0, pps = 0;
- struct record *r, *p;
- int i = 0;
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ int rec_i = 0, i, to_cpu;
+ double t = 0, pps = 0;
- char *fmt = "%-14s %-11s %-10.0f %'-18.0f %f\n";
+ /* Header */
+ printf("%-15s %-7s %-12s %-12s %-9s\n",
+ "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info");
/* tracepoint: xdp:xdp_redirect_* */
if (err_only)
- i = REDIR_ERROR;
-
- for (; i < REDIR_RES_MAX; i++) {
- r = &rec->xdp_redir[i];
- p = &prev->xdp_redir[i];
-
- if (p->timestamp) {
- period = calc_period(r, p);
- pps = calc_pps(r, p, period);
+ rec_i = REDIR_ERROR;
+
+ for (; rec_i < REDIR_RES_MAX; rec_i++) {
+ struct record_u64 *rec, *prev;
+ char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
+ char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
+
+ rec = &stats_rec->xdp_redirect[rec_i];
+ prev = &stats_prev->xdp_redirect[rec_i];
+ t = calc_period_u64(rec, prev);
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct u64rec *r = &rec->cpu[i];
+ struct u64rec *p = &prev->cpu[i];
+
+ pps = calc_pps_u64(r, p, t);
+ if (pps > 0)
+ printf(fmt1, "XDP_REDIRECT", i,
+ rec_i ? 0.0: pps, rec_i ? pps : 0.0,
+ err2str(rec_i));
}
- printf(fmt, "XDP_REDIRECT", err2str(i), pps, pps, period);
+ pps = calc_pps_u64(&rec->total, &prev->total, t);
+ printf(fmt2, "XDP_REDIRECT", "total",
+ rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i));
}
/* tracepoint: xdp:xdp_exception */
- for (i = 0; i < XDP_ACTION_MAX; i++) {
- r = &rec->xdp_exception[i];
- p = &prev->xdp_exception[i];
- if (p->timestamp) {
- period = calc_period(r, p);
- pps = calc_pps(r, p, period);
+ for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
+ struct record_u64 *rec, *prev;
+ char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
+ char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
+
+ rec = &stats_rec->xdp_exception[rec_i];
+ prev = &stats_prev->xdp_exception[rec_i];
+ t = calc_period_u64(rec, prev);
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct u64rec *r = &rec->cpu[i];
+ struct u64rec *p = &prev->cpu[i];
+
+ pps = calc_pps_u64(r, p, t);
+ if (pps > 0)
+ printf(fmt1, "Exception", i,
+ 0.0, pps, err2str(rec_i));
}
+ pps = calc_pps_u64(&rec->total, &prev->total, t);
if (pps > 0)
- printf(fmt, action2str(i), "Exception",
- pps, pps, period);
+ printf(fmt2, "Exception", "total",
+ 0.0, pps, action2str(rec_i));
}
- printf("\n");
-}
-static __u64 get_key32_value64_percpu(int fd, __u32 key)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- __u64 values[nr_cpus];
- __u64 sum = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return 0;
+ /* cpumap enqueue stats */
+ for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
+ char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
+ char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
+ struct record *rec, *prev;
+ char *info_str = "";
+ double drop, info;
+
+ rec = &stats_rec->xdp_cpumap_enqueue[to_cpu];
+ prev = &stats_prev->xdp_cpumap_enqueue[to_cpu];
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop(r, p, t);
+ info = calc_info(r, p, t);
+ if (info > 0) {
+ info_str = "bulk-average";
+ info = pps / info; /* calc average bulk size */
+ }
+ if (pps > 0)
+ printf(fmt1, "cpumap-enqueue",
+ i, to_cpu, pps, drop, info, info_str);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ if (pps > 0) {
+ drop = calc_drop(&rec->total, &prev->total, t);
+ info = calc_info(&rec->total, &prev->total, t);
+ if (info > 0) {
+ info_str = "bulk-average";
+ info = pps / info; /* calc average bulk size */
+ }
+ printf(fmt2, "cpumap-enqueue",
+ "sum", to_cpu, pps, drop, info, info_str);
+ }
}
- /* Sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- sum += values[i];
+ /* cpumap kthread stats */
+ {
+ char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
+ char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
+ struct record *rec, *prev;
+ double drop, info;
+ char *i_str = "";
+
+ rec = &stats_rec->xdp_cpumap_kthread;
+ prev = &stats_prev->xdp_cpumap_kthread;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop(r, p, t);
+ info = calc_info(r, p, t);
+ if (info > 0)
+ i_str = "sched";
+ if (pps > 0)
+ printf(fmt1, "cpumap-kthread",
+ i, pps, drop, info, i_str);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop(&rec->total, &prev->total, t);
+ info = calc_info(&rec->total, &prev->total, t);
+ if (info > 0)
+ i_str = "sched-sum";
+ printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
}
- return sum;
+
+ printf("\n");
}
static bool stats_collect(struct stats_record *rec)
@@ -222,25 +422,109 @@ static bool stats_collect(struct stats_record *rec)
*/
fd = map_data[0].fd; /* map0: redirect_err_cnt */
- for (i = 0; i < REDIR_RES_MAX; i++) {
- rec->xdp_redir[i].timestamp = gettime();
- rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i);
- }
+ for (i = 0; i < REDIR_RES_MAX; i++)
+ map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
fd = map_data[1].fd; /* map1: exception_cnt */
for (i = 0; i < XDP_ACTION_MAX; i++) {
- rec->xdp_exception[i].timestamp = gettime();
- rec->xdp_exception[i].counter = get_key32_value64_percpu(fd, i);
+ map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
}
+ fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
+ for (i = 0; i < MAX_CPUS; i++)
+ map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
+
+ fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
+ map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
+
return true;
}
+static void *alloc_rec_per_cpu(int record_size)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ void *array;
+ size_t size;
+
+ size = record_size * nr_cpus;
+ array = malloc(size);
+ memset(array, 0, size);
+ if (!array) {
+ fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
+ exit(EXIT_FAIL_MEM);
+ }
+ return array;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+ struct stats_record *rec;
+ int rec_sz;
+ int i;
+
+ /* Alloc main stats_record structure */
+ rec = malloc(sizeof(*rec));
+ memset(rec, 0, sizeof(*rec));
+ if (!rec) {
+ fprintf(stderr, "Mem alloc error\n");
+ exit(EXIT_FAIL_MEM);
+ }
+
+ /* Alloc stats stored per CPU for each record */
+ rec_sz = sizeof(struct u64rec);
+ for (i = 0; i < REDIR_RES_MAX; i++)
+ rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz);
+
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz);
+
+ rec_sz = sizeof(struct datarec);
+ rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
+
+ for (i = 0; i < MAX_CPUS; i++)
+ rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
+
+ return rec;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+ int i;
+
+ for (i = 0; i < REDIR_RES_MAX; i++)
+ free(r->xdp_redirect[i].cpu);
+
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(r->xdp_exception[i].cpu);
+
+ free(r->xdp_cpumap_kthread.cpu);
+
+ for (i = 0; i < MAX_CPUS; i++)
+ free(r->xdp_cpumap_enqueue[i].cpu);
+
+ free(r);
+}
+
+/* Pointer swap trick */
+static inline void swap(struct stats_record **a, struct stats_record **b)
+{
+ struct stats_record *tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
static void stats_poll(int interval, bool err_only)
{
- struct stats_record rec, prev;
+ struct stats_record *rec, *prev;
- memset(&rec, 0, sizeof(rec));
+ rec = alloc_stats_record();
+ prev = alloc_stats_record();
+ stats_collect(rec);
+
+ if (err_only)
+ printf("\n%s\n", __doc_err_only__);
/* Trick to pretty printf with thousands separators use %' */
setlocale(LC_NUMERIC, "en_US");
@@ -258,13 +542,15 @@ static void stats_poll(int interval, bool err_only)
fflush(stdout);
while (1) {
- memcpy(&prev, &rec, sizeof(rec));
- stats_collect(&rec);
- stats_print_headers(err_only);
- stats_print(&rec, &prev, err_only);
+ swap(&prev, &rec);
+ stats_collect(rec);
+ stats_print(rec, prev, err_only);
fflush(stdout);
sleep(interval);
}
+
+ free_stats_record(rec);
+ free_stats_record(prev);
}
static void print_bpf_prog_info(void)
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 35fec9f..23744a8 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -26,7 +26,7 @@ static const char *__doc__ =
/* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead
* use bpf/libbpf.h), but cannot as (currently) needed for XDP
- * attaching to a device via set_link_xdp_fd()
+ * attaching to a device via bpf_set_link_xdp_fd()
*/
#include "libbpf.h"
#include "bpf_load.h"
@@ -67,7 +67,7 @@ static void int_exit(int sig)
"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
ifindex, ifname);
if (ifindex > -1)
- set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
exit(EXIT_OK);
}
@@ -682,7 +682,7 @@ int main(int argc, char **argv)
/* Remove XDP program when program is interrupted */
signal(SIGINT, int_exit);
- if (set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
return EXIT_FAIL_XDP;
}
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index 978a532..7eae07d72 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -34,9 +34,9 @@ static __u32 xdp_flags;
static void int_exit(int sig)
{
- set_link_xdp_fd(ifindex_in, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
if (ifindex_out_xdp_dummy_attached)
- set_link_xdp_fd(ifindex_out, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
exit(0);
}
@@ -120,13 +120,13 @@ int main(int argc, char **argv)
return 1;
}
- if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
return 1;
}
/* Loading dummy XDP prog on out-device */
- if (set_link_xdp_fd(ifindex_out, prog_fd[1],
+ if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1],
(xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
ifindex_out_xdp_dummy_attached = false;
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index 4475d83..b701b5c 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -20,6 +20,7 @@
#include <string.h>
#include <unistd.h>
#include <libgen.h>
+#include <sys/resource.h>
#include "bpf_load.h"
#include "bpf_util.h"
@@ -33,9 +34,9 @@ static __u32 xdp_flags;
static void int_exit(int sig)
{
- set_link_xdp_fd(ifindex_in, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
if (ifindex_out_xdp_dummy_attached)
- set_link_xdp_fd(ifindex_out, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
exit(0);
}
@@ -75,6 +76,7 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
const char *optstr = "SN";
char filename[256];
int ret, opt, key = 0;
@@ -98,6 +100,11 @@ int main(int argc, char **argv)
return 1;
}
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
ifindex_in = strtoul(argv[optind], NULL, 0);
ifindex_out = strtoul(argv[optind + 1], NULL, 0);
printf("input: %d output: %d\n", ifindex_in, ifindex_out);
@@ -114,13 +121,13 @@ int main(int argc, char **argv)
return 1;
}
- if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
return 1;
}
/* Loading dummy XDP prog on out-device */
- if (set_link_xdp_fd(ifindex_out, prog_fd[1],
+ if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1],
(xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
ifindex_out_xdp_dummy_attached = false;
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index 9164621..6296741 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -37,7 +37,7 @@ static void int_exit(int sig)
int i = 0;
for (i = 0; i < total_ifindex; i++)
- set_link_xdp_fd(ifindex_list[i], -1, flags);
+ bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
exit(0);
}
@@ -49,7 +49,7 @@ static void close_and_exit(int sig)
close(sock_arp);
for (i = 0; i < total_ifindex; i++)
- set_link_xdp_fd(ifindex_list[i], -1, flags);
+ bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
exit(0);
}
@@ -183,7 +183,7 @@ static void read_route(struct nlmsghdr *nh, int nll)
int i = 0;
for (i = 0; i < total_ifindex; i++)
- set_link_xdp_fd(ifindex_list[i], -1, flags);
+ bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
exit(0);
}
assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0);
@@ -633,12 +633,12 @@ int main(int ac, char **argv)
}
}
for (i = 0; i < total_ifindex; i++) {
- if (set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) {
printf("link set xdp fd failed\n");
int recovery_index = i;
for (i = 0; i < recovery_index; i++)
- set_link_xdp_fd(ifindex_list[i], -1, flags);
+ bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
return 1;
}
diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c
new file mode 100644
index 0000000..3fd2092
--- /dev/null
+++ b/samples/bpf/xdp_rxq_info_kern.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ *
+ * Example howto extract XDP RX-queue info
+ */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* Config setup from with userspace
+ *
+ * User-side setup ifindex in config_map, to verify that
+ * ctx->ingress_ifindex is correct (against configured ifindex)
+ */
+struct config {
+ __u32 action;
+ int ifindex;
+};
+struct bpf_map_def SEC("maps") config_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct config),
+ .max_entries = 1,
+};
+
+/* Common stats data record (shared with userspace) */
+struct datarec {
+ __u64 processed;
+ __u64 issue;
+};
+
+struct bpf_map_def SEC("maps") stats_global_map = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = 1,
+};
+
+#define MAX_RXQs 64
+
+/* Stats per rx_queue_index (per CPU) */
+struct bpf_map_def SEC("maps") rx_queue_index_map = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = MAX_RXQs + 1,
+};
+
+SEC("xdp_prog0")
+int xdp_prognum0(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct datarec *rec, *rxq_rec;
+ int ingress_ifindex;
+ struct config *config;
+ u32 key = 0;
+
+ /* Global stats record */
+ rec = bpf_map_lookup_elem(&stats_global_map, &key);
+ if (!rec)
+ return XDP_ABORTED;
+ rec->processed++;
+
+ /* Accessing ctx->ingress_ifindex, cause BPF to rewrite BPF
+ * instructions inside kernel to access xdp_rxq->dev->ifindex
+ */
+ ingress_ifindex = ctx->ingress_ifindex;
+
+ config = bpf_map_lookup_elem(&config_map, &key);
+ if (!config)
+ return XDP_ABORTED;
+
+ /* Simple test: check ctx provided ifindex is as expected */
+ if (ingress_ifindex != config->ifindex) {
+ /* count this error case */
+ rec->issue++;
+ return XDP_ABORTED;
+ }
+
+ /* Update stats per rx_queue_index. Handle if rx_queue_index
+ * is larger than stats map can contain info for.
+ */
+ key = ctx->rx_queue_index;
+ if (key >= MAX_RXQs)
+ key = MAX_RXQs;
+ rxq_rec = bpf_map_lookup_elem(&rx_queue_index_map, &key);
+ if (!rxq_rec)
+ return XDP_ABORTED;
+ rxq_rec->processed++;
+ if (key == MAX_RXQs)
+ rxq_rec->issue++;
+
+ return config->action;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
new file mode 100644
index 0000000..478d954
--- /dev/null
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -0,0 +1,531 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ */
+static const char *__doc__ = " XDP RX-queue info extract example\n\n"
+ "Monitor how many packets per sec (pps) are received\n"
+ "per NIC RX queue index and which CPU processed the packet\n"
+ ;
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <locale.h>
+#include <sys/resource.h>
+#include <getopt.h>
+#include <net/if.h>
+#include <time.h>
+
+#include <arpa/inet.h>
+#include <linux/if_link.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "bpf_util.h"
+
+static int ifindex = -1;
+static char ifname_buf[IF_NAMESIZE];
+static char *ifname;
+
+static __u32 xdp_flags;
+
+/* Exit return codes */
+#define EXIT_OK 0
+#define EXIT_FAIL 1
+#define EXIT_FAIL_OPTION 2
+#define EXIT_FAIL_XDP 3
+#define EXIT_FAIL_BPF 4
+#define EXIT_FAIL_MEM 5
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"dev", required_argument, NULL, 'd' },
+ {"skb-mode", no_argument, NULL, 'S' },
+ {"sec", required_argument, NULL, 's' },
+ {"no-separators", no_argument, NULL, 'z' },
+ {"action", required_argument, NULL, 'a' },
+ {0, 0, NULL, 0 }
+};
+
+static void int_exit(int sig)
+{
+ fprintf(stderr,
+ "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
+ ifindex, ifname);
+ if (ifindex > -1)
+ bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ exit(EXIT_OK);
+}
+
+struct config {
+ __u32 action;
+ int ifindex;
+};
+#define XDP_ACTION_MAX (XDP_TX + 1)
+#define XDP_ACTION_MAX_STRLEN 11
+static const char *xdp_action_names[XDP_ACTION_MAX] = {
+ [XDP_ABORTED] = "XDP_ABORTED",
+ [XDP_DROP] = "XDP_DROP",
+ [XDP_PASS] = "XDP_PASS",
+ [XDP_TX] = "XDP_TX",
+};
+
+static const char *action2str(int action)
+{
+ if (action < XDP_ACTION_MAX)
+ return xdp_action_names[action];
+ return NULL;
+}
+
+static int parse_xdp_action(char *action_str)
+{
+ size_t maxlen;
+ __u64 action = -1;
+ int i;
+
+ for (i = 0; i < XDP_ACTION_MAX; i++) {
+ maxlen = XDP_ACTION_MAX_STRLEN;
+ if (strncmp(xdp_action_names[i], action_str, maxlen) == 0) {
+ action = i;
+ break;
+ }
+ }
+ return action;
+}
+
+static void list_xdp_actions(void)
+{
+ int i;
+
+ printf("Available XDP --action <options>\n");
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ printf("\t%s\n", xdp_action_names[i]);
+ printf("\n");
+}
+
+static void usage(char *argv[])
+{
+ int i;
+
+ printf("\nDOCUMENTATION:\n%s\n", __doc__);
+ printf(" Usage: %s (options-see-below)\n", argv[0]);
+ printf(" Listing options:\n");
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-12s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value:%d)",
+ *long_options[i].flag);
+ else
+ printf(" short-option: -%c",
+ long_options[i].val);
+ printf("\n");
+ }
+ printf("\n");
+ list_xdp_actions();
+}
+
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+static __u64 gettime(void)
+{
+ struct timespec t;
+ int res;
+
+ res = clock_gettime(CLOCK_MONOTONIC, &t);
+ if (res < 0) {
+ fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
+ exit(EXIT_FAIL);
+ }
+ return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+/* Common stats data record shared with _kern.c */
+struct datarec {
+ __u64 processed;
+ __u64 issue;
+};
+struct record {
+ __u64 timestamp;
+ struct datarec total;
+ struct datarec *cpu;
+};
+struct stats_record {
+ struct record stats;
+ struct record *rxq;
+};
+
+static struct datarec *alloc_record_per_cpu(void)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct datarec *array;
+ size_t size;
+
+ size = sizeof(struct datarec) * nr_cpus;
+ array = malloc(size);
+ memset(array, 0, size);
+ if (!array) {
+ fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
+ exit(EXIT_FAIL_MEM);
+ }
+ return array;
+}
+
+static struct record *alloc_record_per_rxq(void)
+{
+ unsigned int nr_rxqs = map_data[2].def.max_entries;
+ struct record *array;
+ size_t size;
+
+ size = sizeof(struct record) * nr_rxqs;
+ array = malloc(size);
+ memset(array, 0, size);
+ if (!array) {
+ fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs);
+ exit(EXIT_FAIL_MEM);
+ }
+ return array;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+ unsigned int nr_rxqs = map_data[2].def.max_entries;
+ struct stats_record *rec;
+ int i;
+
+ rec = malloc(sizeof(*rec));
+ memset(rec, 0, sizeof(*rec));
+ if (!rec) {
+ fprintf(stderr, "Mem alloc error\n");
+ exit(EXIT_FAIL_MEM);
+ }
+ rec->rxq = alloc_record_per_rxq();
+ for (i = 0; i < nr_rxqs; i++)
+ rec->rxq[i].cpu = alloc_record_per_cpu();
+
+ rec->stats.cpu = alloc_record_per_cpu();
+ return rec;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+ unsigned int nr_rxqs = map_data[2].def.max_entries;
+ int i;
+
+ for (i = 0; i < nr_rxqs; i++)
+ free(r->rxq[i].cpu);
+
+ free(r->rxq);
+ free(r->stats.cpu);
+ free(r);
+}
+
+static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct datarec values[nr_cpus];
+ __u64 sum_processed = 0;
+ __u64 sum_issue = 0;
+ int i;
+
+ if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+ fprintf(stderr,
+ "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+ return false;
+ }
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = values[i].processed;
+ sum_processed += values[i].processed;
+ rec->cpu[i].issue = values[i].issue;
+ sum_issue += values[i].issue;
+ }
+ rec->total.processed = sum_processed;
+ rec->total.issue = sum_issue;
+ return true;
+}
+
+static void stats_collect(struct stats_record *rec)
+{
+ int fd, i, max_rxqs;
+
+ fd = map_data[1].fd; /* map: stats_global_map */
+ map_collect_percpu(fd, 0, &rec->stats);
+
+ fd = map_data[2].fd; /* map: rx_queue_index_map */
+ max_rxqs = map_data[2].def.max_entries;
+ for (i = 0; i < max_rxqs; i++)
+ map_collect_percpu(fd, i, &rec->rxq[i]);
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double) period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->processed - p->processed;
+ pps = packets / period_;
+ }
+ return pps;
+}
+
+static __u64 calc_errs_pps(struct datarec *r,
+ struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->issue - p->issue;
+ pps = packets / period_;
+ }
+ return pps;
+}
+
+static void stats_print(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int action)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ unsigned int nr_rxqs = map_data[2].def.max_entries;
+ double pps = 0, err = 0;
+ struct record *rec, *prev;
+ double t;
+ int rxq;
+ int i;
+
+ /* Header */
+ printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s\n",
+ ifname, ifindex, action2str(action));
+
+ /* stats_global_map */
+ {
+ char *fmt_rx = "%-15s %-7d %'-11.0f %'-10.0f %s\n";
+ char *fm2_rx = "%-15s %-7s %'-11.0f\n";
+ char *errstr = "";
+
+ printf("%-15s %-7s %-11s %-11s\n",
+ "XDP stats", "CPU", "pps", "issue-pps");
+
+ rec = &stats_rec->stats;
+ prev = &stats_prev->stats;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps (r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (err > 0)
+ errstr = "invalid-ifindex";
+ if (pps > 0)
+ printf(fmt_rx, "XDP-RX CPU",
+ i, pps, err, errstr);
+ }
+ pps = calc_pps (&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+ printf(fm2_rx, "XDP-RX CPU", "total", pps, err);
+ }
+
+ /* rx_queue_index_map */
+ printf("\n%-15s %-7s %-11s %-11s\n",
+ "RXQ stats", "RXQ:CPU", "pps", "issue-pps");
+
+ for (rxq = 0; rxq < nr_rxqs; rxq++) {
+ char *fmt_rx = "%-15s %3d:%-3d %'-11.0f %'-10.0f %s\n";
+ char *fm2_rx = "%-15s %3d:%-3s %'-11.0f\n";
+ char *errstr = "";
+ int rxq_ = rxq;
+
+ /* Last RXQ in map catch overflows */
+ if (rxq_ == nr_rxqs - 1)
+ rxq_ = -1;
+
+ rec = &stats_rec->rxq[rxq];
+ prev = &stats_prev->rxq[rxq];
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps (r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (err > 0) {
+ if (rxq_ == -1)
+ errstr = "map-overflow-RXQ";
+ else
+ errstr = "err";
+ }
+ if (pps > 0)
+ printf(fmt_rx, "rx_queue_index",
+ rxq_, i, pps, err, errstr);
+ }
+ pps = calc_pps (&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+ if (pps || err)
+ printf(fm2_rx, "rx_queue_index", rxq_, "sum", pps, err);
+ }
+}
+
+
+/* Pointer swap trick */
+static inline void swap(struct stats_record **a, struct stats_record **b)
+{
+ struct stats_record *tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
+static void stats_poll(int interval, int action)
+{
+ struct stats_record *record, *prev;
+
+ record = alloc_stats_record();
+ prev = alloc_stats_record();
+ stats_collect(record);
+
+ while (1) {
+ swap(&prev, &record);
+ stats_collect(record);
+ stats_print(record, prev, action);
+ sleep(interval);
+ }
+
+ free_stats_record(record);
+ free_stats_record(prev);
+}
+
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ bool use_separators = true;
+ struct config cfg = { 0 };
+ char filename[256];
+ int longindex = 0;
+ int interval = 2;
+ __u32 key = 0;
+ int opt, err;
+
+ char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
+ int action = XDP_PASS; /* Default action */
+ char *action_str = NULL;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
+ if (load_bpf_file(filename)) {
+ fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf);
+ return EXIT_FAIL;
+ }
+
+ if (!prog_fd[0]) {
+ fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
+ return EXIT_FAIL;
+ }
+
+ /* Parse commands line args */
+ while ((opt = getopt_long(argc, argv, "hSd:",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 'd':
+ if (strlen(optarg) >= IF_NAMESIZE) {
+ fprintf(stderr, "ERR: --dev name too long\n");
+ goto error;
+ }
+ ifname = (char *)&ifname_buf;
+ strncpy(ifname, optarg, IF_NAMESIZE);
+ ifindex = if_nametoindex(ifname);
+ if (ifindex == 0) {
+ fprintf(stderr,
+ "ERR: --dev name unknown err(%d):%s\n",
+ errno, strerror(errno));
+ goto error;
+ }
+ break;
+ case 's':
+ interval = atoi(optarg);
+ break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'z':
+ use_separators = false;
+ break;
+ case 'a':
+ action_str = (char *)&action_str_buf;
+ strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN);
+ break;
+ case 'h':
+ error:
+ default:
+ usage(argv);
+ return EXIT_FAIL_OPTION;
+ }
+ }
+ /* Required option */
+ if (ifindex == -1) {
+ fprintf(stderr, "ERR: required option --dev missing\n");
+ usage(argv);
+ return EXIT_FAIL_OPTION;
+ }
+ cfg.ifindex = ifindex;
+
+ /* Parse action string */
+ if (action_str) {
+ action = parse_xdp_action(action_str);
+ if (action < 0) {
+ fprintf(stderr, "ERR: Invalid XDP --action: %s\n",
+ action_str);
+ list_xdp_actions();
+ return EXIT_FAIL_OPTION;
+ }
+ }
+ cfg.action = action;
+
+ /* Trick to pretty printf with thousands separators use %' */
+ if (use_separators)
+ setlocale(LC_NUMERIC, "en_US");
+
+ /* User-side setup ifindex in config_map */
+ err = bpf_map_update_elem(map_fd[0], &key, &cfg, 0);
+ if (err) {
+ fprintf(stderr, "Store config failed (err:%d)\n", err);
+ exit(EXIT_FAIL_BPF);
+ }
+
+ /* Remove XDP program when program is interrupted */
+ signal(SIGINT, int_exit);
+
+ if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+ fprintf(stderr, "link set xdp fd failed\n");
+ return EXIT_FAIL_XDP;
+ }
+
+ stats_poll(interval, action);
+ return EXIT_OK;
+}
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 715cd12..f0a7872 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -30,7 +30,7 @@ static __u32 xdp_flags = 0;
static void int_exit(int sig)
{
if (ifindex > -1)
- set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
exit(0);
}
@@ -254,14 +254,14 @@ int main(int argc, char **argv)
}
}
- if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
poll_stats(kill_after_s);
- set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
return 0;
}
diff --git a/samples/kobject/kobject-example.c b/samples/kobject/kobject-example.c
index 2e0740f..9e383fd 100644
--- a/samples/kobject/kobject-example.c
+++ b/samples/kobject/kobject-example.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Sample kobject implementation
*
* Copyright (C) 2004-2007 Greg Kroah-Hartman <greg@kroah.com>
* Copyright (C) 2007 Novell Inc.
- *
- * Released under the GPL version 2 only.
- *
*/
#include <linux/kobject.h>
#include <linux/string.h>
diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
index a55bff5..401328f 100644
--- a/samples/kobject/kset-example.c
+++ b/samples/kobject/kset-example.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Sample kset and ktype implementation
*
* Copyright (C) 2004-2007 Greg Kroah-Hartman <greg@kroah.com>
* Copyright (C) 2007 Novell Inc.
- *
- * Released under the GPL version 2 only.
- *
*/
#include <linux/kobject.h>
#include <linux/string.h>
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index 67de3b7..02be898 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -38,10 +38,6 @@ static int handler_pre(struct kprobe *p, struct pt_regs *regs)
pr_info("<%s> pre_handler: p->addr = 0x%p, epc = 0x%lx, status = 0x%lx\n",
p->symbol_name, p->addr, regs->cp0_epc, regs->cp0_status);
#endif
-#ifdef CONFIG_TILEGX
- pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx, ex1 = 0x%lx\n",
- p->symbol_name, p->addr, regs->pc, regs->ex1);
-#endif
#ifdef CONFIG_ARM64
pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx,"
" pstate = 0x%lx\n",
@@ -72,10 +68,6 @@ static void handler_post(struct kprobe *p, struct pt_regs *regs,
pr_info("<%s> post_handler: p->addr = 0x%p, status = 0x%lx\n",
p->symbol_name, p->addr, regs->cp0_status);
#endif
-#ifdef CONFIG_TILEGX
- pr_info("<%s> post_handler: p->addr = 0x%p, ex1 = 0x%lx\n",
- p->symbol_name, p->addr, regs->ex1);
-#endif
#ifdef CONFIG_ARM64
pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n",
p->symbol_name, p->addr, (long)regs->pstate);
diff --git a/samples/qmi/Makefile b/samples/qmi/Makefile
new file mode 100644
index 0000000..2b111d2
--- /dev/null
+++ b/samples/qmi/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_QMI_CLIENT) += qmi_sample_client.o
diff --git a/samples/qmi/qmi_sample_client.c b/samples/qmi/qmi_sample_client.c
new file mode 100644
index 0000000..c9e7276
--- /dev/null
+++ b/samples/qmi/qmi_sample_client.c
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Sample in-kernel QMI client driver
+ *
+ * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2017 Linaro Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/qrtr.h>
+#include <linux/net.h>
+#include <linux/completion.h>
+#include <linux/idr.h>
+#include <linux/string.h>
+#include <net/sock.h>
+#include <linux/soc/qcom/qmi.h>
+
+#define PING_REQ1_TLV_TYPE 0x1
+#define PING_RESP1_TLV_TYPE 0x2
+#define PING_OPT1_TLV_TYPE 0x10
+#define PING_OPT2_TLV_TYPE 0x11
+
+#define DATA_REQ1_TLV_TYPE 0x1
+#define DATA_RESP1_TLV_TYPE 0x2
+#define DATA_OPT1_TLV_TYPE 0x10
+#define DATA_OPT2_TLV_TYPE 0x11
+
+#define TEST_MED_DATA_SIZE_V01 8192
+#define TEST_MAX_NAME_SIZE_V01 255
+
+#define TEST_PING_REQ_MSG_ID_V01 0x20
+#define TEST_DATA_REQ_MSG_ID_V01 0x21
+
+#define TEST_PING_REQ_MAX_MSG_LEN_V01 266
+#define TEST_DATA_REQ_MAX_MSG_LEN_V01 8456
+
+struct test_name_type_v01 {
+ u32 name_len;
+ char name[TEST_MAX_NAME_SIZE_V01];
+};
+
+static struct qmi_elem_info test_name_type_v01_ei[] = {
+ {
+ .data_type = QMI_DATA_LEN,
+ .elem_len = 1,
+ .elem_size = sizeof(u8),
+ .array_type = NO_ARRAY,
+ .tlv_type = QMI_COMMON_TLV_TYPE,
+ .offset = offsetof(struct test_name_type_v01,
+ name_len),
+ },
+ {
+ .data_type = QMI_UNSIGNED_1_BYTE,
+ .elem_len = TEST_MAX_NAME_SIZE_V01,
+ .elem_size = sizeof(char),
+ .array_type = VAR_LEN_ARRAY,
+ .tlv_type = QMI_COMMON_TLV_TYPE,
+ .offset = offsetof(struct test_name_type_v01,
+ name),
+ },
+ {}
+};
+
+struct test_ping_req_msg_v01 {
+ char ping[4];
+
+ u8 client_name_valid;
+ struct test_name_type_v01 client_name;
+};
+
+static struct qmi_elem_info test_ping_req_msg_v01_ei[] = {
+ {
+ .data_type = QMI_UNSIGNED_1_BYTE,
+ .elem_len = 4,
+ .elem_size = sizeof(char),
+ .array_type = STATIC_ARRAY,
+ .tlv_type = PING_REQ1_TLV_TYPE,
+ .offset = offsetof(struct test_ping_req_msg_v01,
+ ping),
+ },
+ {
+ .data_type = QMI_OPT_FLAG,
+ .elem_len = 1,
+ .elem_size = sizeof(u8),
+ .array_type = NO_ARRAY,
+ .tlv_type = PING_OPT1_TLV_TYPE,
+ .offset = offsetof(struct test_ping_req_msg_v01,
+ client_name_valid),
+ },
+ {
+ .data_type = QMI_STRUCT,
+ .elem_len = 1,
+ .elem_size = sizeof(struct test_name_type_v01),
+ .array_type = NO_ARRAY,
+ .tlv_type = PING_OPT1_TLV_TYPE,
+ .offset = offsetof(struct test_ping_req_msg_v01,
+ client_name),
+ .ei_array = test_name_type_v01_ei,
+ },
+ {}
+};
+
+struct test_ping_resp_msg_v01 {
+ struct qmi_response_type_v01 resp;
+
+ u8 pong_valid;
+ char pong[4];
+
+ u8 service_name_valid;
+ struct test_name_type_v01 service_name;
+};
+
+static struct qmi_elem_info test_ping_resp_msg_v01_ei[] = {
+ {
+ .data_type = QMI_STRUCT,
+ .elem_len = 1,
+ .elem_size = sizeof(struct qmi_response_type_v01),
+ .array_type = NO_ARRAY,
+ .tlv_type = PING_RESP1_TLV_TYPE,
+ .offset = offsetof(struct test_ping_resp_msg_v01,
+ resp),
+ .ei_array = qmi_response_type_v01_ei,
+ },
+ {
+ .data_type = QMI_OPT_FLAG,
+ .elem_len = 1,
+ .elem_size = sizeof(u8),
+ .array_type = NO_ARRAY,
+ .tlv_type = PING_OPT1_TLV_TYPE,
+ .offset = offsetof(struct test_ping_resp_msg_v01,
+ pong_valid),
+ },
+ {
+ .data_type = QMI_UNSIGNED_1_BYTE,
+ .elem_len = 4,
+ .elem_size = sizeof(char),
+ .array_type = STATIC_ARRAY,
+ .tlv_type = PING_OPT1_TLV_TYPE,
+ .offset = offsetof(struct test_ping_resp_msg_v01,
+ pong),
+ },
+ {
+ .data_type = QMI_OPT_FLAG,
+ .elem_len = 1,
+ .elem_size = sizeof(u8),
+ .array_type = NO_ARRAY,
+ .tlv_type = PING_OPT2_TLV_TYPE,
+ .offset = offsetof(struct test_ping_resp_msg_v01,
+ service_name_valid),
+ },
+ {
+ .data_type = QMI_STRUCT,
+ .elem_len = 1,
+ .elem_size = sizeof(struct test_name_type_v01),
+ .array_type = NO_ARRAY,
+ .tlv_type = PING_OPT2_TLV_TYPE,
+ .offset = offsetof(struct test_ping_resp_msg_v01,
+ service_name),
+ .ei_array = test_name_type_v01_ei,
+ },
+ {}
+};
+
+struct test_data_req_msg_v01 {
+ u32 data_len;
+ u8 data[TEST_MED_DATA_SIZE_V01];
+
+ u8 client_name_valid;
+ struct test_name_type_v01 client_name;
+};
+
+static struct qmi_elem_info test_data_req_msg_v01_ei[] = {
+ {
+ .data_type = QMI_DATA_LEN,
+ .elem_len = 1,
+ .elem_size = sizeof(u32),
+ .array_type = NO_ARRAY,
+ .tlv_type = DATA_REQ1_TLV_TYPE,
+ .offset = offsetof(struct test_data_req_msg_v01,
+ data_len),
+ },
+ {
+ .data_type = QMI_UNSIGNED_1_BYTE,
+ .elem_len = TEST_MED_DATA_SIZE_V01,
+ .elem_size = sizeof(u8),
+ .array_type = VAR_LEN_ARRAY,
+ .tlv_type = DATA_REQ1_TLV_TYPE,
+ .offset = offsetof(struct test_data_req_msg_v01,
+ data),
+ },
+ {
+ .data_type = QMI_OPT_FLAG,
+ .elem_len = 1,
+ .elem_size = sizeof(u8),
+ .array_type = NO_ARRAY,
+ .tlv_type = DATA_OPT1_TLV_TYPE,
+ .offset = offsetof(struct test_data_req_msg_v01,
+ client_name_valid),
+ },
+ {
+ .data_type = QMI_STRUCT,
+ .elem_len = 1,
+ .elem_size = sizeof(struct test_name_type_v01),
+ .array_type = NO_ARRAY,
+ .tlv_type = DATA_OPT1_TLV_TYPE,
+ .offset = offsetof(struct test_data_req_msg_v01,
+ client_name),
+ .ei_array = test_name_type_v01_ei,
+ },
+ {}
+};
+
+struct test_data_resp_msg_v01 {
+ struct qmi_response_type_v01 resp;
+
+ u8 data_valid;
+ u32 data_len;
+ u8 data[TEST_MED_DATA_SIZE_V01];
+
+ u8 service_name_valid;
+ struct test_name_type_v01 service_name;
+};
+
+static struct qmi_elem_info test_data_resp_msg_v01_ei[] = {
+ {
+ .data_type = QMI_STRUCT,
+ .elem_len = 1,
+ .elem_size = sizeof(struct qmi_response_type_v01),
+ .array_type = NO_ARRAY,
+ .tlv_type = DATA_RESP1_TLV_TYPE,
+ .offset = offsetof(struct test_data_resp_msg_v01,
+ resp),
+ .ei_array = qmi_response_type_v01_ei,
+ },
+ {
+ .data_type = QMI_OPT_FLAG,
+ .elem_len = 1,
+ .elem_size = sizeof(u8),
+ .array_type = NO_ARRAY,
+ .tlv_type = DATA_OPT1_TLV_TYPE,
+ .offset = offsetof(struct test_data_resp_msg_v01,
+ data_valid),
+ },
+ {
+ .data_type = QMI_DATA_LEN,
+ .elem_len = 1,
+ .elem_size = sizeof(u32),
+ .array_type = NO_ARRAY,
+ .tlv_type = DATA_OPT1_TLV_TYPE,
+ .offset = offsetof(struct test_data_resp_msg_v01,
+ data_len),
+ },
+ {
+ .data_type = QMI_UNSIGNED_1_BYTE,
+ .elem_len = TEST_MED_DATA_SIZE_V01,
+ .elem_size = sizeof(u8),
+ .array_type = VAR_LEN_ARRAY,
+ .tlv_type = DATA_OPT1_TLV_TYPE,
+ .offset = offsetof(struct test_data_resp_msg_v01,
+ data),
+ },
+ {
+ .data_type = QMI_OPT_FLAG,
+ .elem_len = 1,
+ .elem_size = sizeof(u8),
+ .array_type = NO_ARRAY,
+ .tlv_type = DATA_OPT2_TLV_TYPE,
+ .offset = offsetof(struct test_data_resp_msg_v01,
+ service_name_valid),
+ },
+ {
+ .data_type = QMI_STRUCT,
+ .elem_len = 1,
+ .elem_size = sizeof(struct test_name_type_v01),
+ .array_type = NO_ARRAY,
+ .tlv_type = DATA_OPT2_TLV_TYPE,
+ .offset = offsetof(struct test_data_resp_msg_v01,
+ service_name),
+ .ei_array = test_name_type_v01_ei,
+ },
+ {}
+};
+
+/*
+ * ping_write() - ping_pong debugfs file write handler
+ * @file: debugfs file context
+ * @user_buf: reference to the user data (ignored)
+ * @count: number of bytes in @user_buf
+ * @ppos: offset in @file to write
+ *
+ * This function allows user space to send out a ping_pong QMI encoded message
+ * to the associated remote test service and will return with the result of the
+ * transaction. It serves as an example of how to provide a custom response
+ * handler.
+ *
+ * Return: @count, or negative errno on failure.
+ */
+static ssize_t ping_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct qmi_handle *qmi = file->private_data;
+ struct test_ping_req_msg_v01 req = {};
+ struct qmi_txn txn;
+ int ret;
+
+ memcpy(req.ping, "ping", sizeof(req.ping));
+
+ ret = qmi_txn_init(qmi, &txn, NULL, NULL);
+ if (ret < 0)
+ return ret;
+
+ ret = qmi_send_request(qmi, NULL, &txn,
+ TEST_PING_REQ_MSG_ID_V01,
+ TEST_PING_REQ_MAX_MSG_LEN_V01,
+ test_ping_req_msg_v01_ei, &req);
+ if (ret < 0) {
+ qmi_txn_cancel(&txn);
+ return ret;
+ }
+
+ ret = qmi_txn_wait(&txn, 5 * HZ);
+ if (ret < 0)
+ count = ret;
+
+ return count;
+}
+
+static const struct file_operations ping_fops = {
+ .open = simple_open,
+ .write = ping_write,
+};
+
+static void ping_pong_cb(struct qmi_handle *qmi, struct sockaddr_qrtr *sq,
+ struct qmi_txn *txn, const void *data)
+{
+ const struct test_ping_resp_msg_v01 *resp = data;
+
+ if (!txn) {
+ pr_err("spurious ping response\n");
+ return;
+ }
+
+ if (resp->resp.result == QMI_RESULT_FAILURE_V01)
+ txn->result = -ENXIO;
+ else if (!resp->pong_valid || memcmp(resp->pong, "pong", 4))
+ txn->result = -EINVAL;
+
+ complete(&txn->completion);
+}
+
+/*
+ * data_write() - data debugfs file write handler
+ * @file: debugfs file context
+ * @user_buf: reference to the user data
+ * @count: number of bytes in @user_buf
+ * @ppos: offset in @file to write
+ *
+ * This function allows user space to send out a data QMI encoded message to
+ * the associated remote test service and will return with the result of the
+ * transaction. It serves as an example of how to have the QMI helpers decode a
+ * transaction response into a provided object automatically.
+ *
+ * Return: @count, or negative errno on failure.
+ */
+static ssize_t data_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+
+{
+ struct qmi_handle *qmi = file->private_data;
+ struct test_data_resp_msg_v01 *resp;
+ struct test_data_req_msg_v01 *req;
+ struct qmi_txn txn;
+ int ret;
+
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+ if (!resp) {
+ kfree(req);
+ return -ENOMEM;
+ }
+
+ req->data_len = min_t(size_t, sizeof(req->data), count);
+ if (copy_from_user(req->data, user_buf, req->data_len)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = qmi_txn_init(qmi, &txn, test_data_resp_msg_v01_ei, resp);
+ if (ret < 0)
+ goto out;
+
+ ret = qmi_send_request(qmi, NULL, &txn,
+ TEST_DATA_REQ_MSG_ID_V01,
+ TEST_DATA_REQ_MAX_MSG_LEN_V01,
+ test_data_req_msg_v01_ei, req);
+ if (ret < 0) {
+ qmi_txn_cancel(&txn);
+ goto out;
+ }
+
+ ret = qmi_txn_wait(&txn, 5 * HZ);
+ if (ret < 0) {
+ goto out;
+ } else if (!resp->data_valid ||
+ resp->data_len != req->data_len ||
+ memcmp(resp->data, req->data, req->data_len)) {
+ pr_err("response data doesn't match expectation\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = count;
+
+out:
+ kfree(resp);
+ kfree(req);
+
+ return ret;
+}
+
+static const struct file_operations data_fops = {
+ .open = simple_open,
+ .write = data_write,
+};
+
+static struct qmi_msg_handler qmi_sample_handlers[] = {
+ {
+ .type = QMI_RESPONSE,
+ .msg_id = TEST_PING_REQ_MSG_ID_V01,
+ .ei = test_ping_resp_msg_v01_ei,
+ .decoded_size = sizeof(struct test_ping_req_msg_v01),
+ .fn = ping_pong_cb
+ },
+ {}
+};
+
+struct qmi_sample {
+ struct qmi_handle qmi;
+
+ struct dentry *de_dir;
+ struct dentry *de_data;
+ struct dentry *de_ping;
+};
+
+static struct dentry *qmi_debug_dir;
+
+static int qmi_sample_probe(struct platform_device *pdev)
+{
+ struct sockaddr_qrtr *sq;
+ struct qmi_sample *sample;
+ char path[20];
+ int ret;
+
+ sample = devm_kzalloc(&pdev->dev, sizeof(*sample), GFP_KERNEL);
+ if (!sample)
+ return -ENOMEM;
+
+ ret = qmi_handle_init(&sample->qmi, TEST_DATA_REQ_MAX_MSG_LEN_V01,
+ NULL,
+ qmi_sample_handlers);
+ if (ret < 0)
+ return ret;
+
+ sq = dev_get_platdata(&pdev->dev);
+ ret = kernel_connect(sample->qmi.sock, (struct sockaddr *)sq,
+ sizeof(*sq), 0);
+ if (ret < 0) {
+ pr_err("failed to connect to remote service port\n");
+ goto err_release_qmi_handle;
+ }
+
+ snprintf(path, sizeof(path), "%d:%d", sq->sq_node, sq->sq_port);
+
+ sample->de_dir = debugfs_create_dir(path, qmi_debug_dir);
+ if (IS_ERR(sample->de_dir)) {
+ ret = PTR_ERR(sample->de_dir);
+ goto err_release_qmi_handle;
+ }
+
+ sample->de_data = debugfs_create_file("data", 0600, sample->de_dir,
+ sample, &data_fops);
+ if (IS_ERR(sample->de_data)) {
+ ret = PTR_ERR(sample->de_data);
+ goto err_remove_de_dir;
+ }
+
+ sample->de_ping = debugfs_create_file("ping", 0600, sample->de_dir,
+ sample, &ping_fops);
+ if (IS_ERR(sample->de_ping)) {
+ ret = PTR_ERR(sample->de_ping);
+ goto err_remove_de_data;
+ }
+
+ platform_set_drvdata(pdev, sample);
+
+ return 0;
+
+err_remove_de_data:
+ debugfs_remove(sample->de_data);
+err_remove_de_dir:
+ debugfs_remove(sample->de_dir);
+err_release_qmi_handle:
+ qmi_handle_release(&sample->qmi);
+
+ return ret;
+}
+
+static int qmi_sample_remove(struct platform_device *pdev)
+{
+ struct qmi_sample *sample = platform_get_drvdata(pdev);
+
+ debugfs_remove(sample->de_ping);
+ debugfs_remove(sample->de_data);
+ debugfs_remove(sample->de_dir);
+
+ qmi_handle_release(&sample->qmi);
+
+ return 0;
+}
+
+static struct platform_driver qmi_sample_driver = {
+ .probe = qmi_sample_probe,
+ .remove = qmi_sample_remove,
+ .driver = {
+ .name = "qmi_sample_client",
+ },
+};
+
+static int qmi_sample_new_server(struct qmi_handle *qmi,
+ struct qmi_service *service)
+{
+ struct platform_device *pdev;
+ struct sockaddr_qrtr sq = { AF_QIPCRTR, service->node, service->port };
+ int ret;
+
+ pdev = platform_device_alloc("qmi_sample_client", PLATFORM_DEVID_AUTO);
+ if (!pdev)
+ return -ENOMEM;
+
+ ret = platform_device_add_data(pdev, &sq, sizeof(sq));
+ if (ret)
+ goto err_put_device;
+
+ ret = platform_device_add(pdev);
+ if (ret)
+ goto err_put_device;
+
+ service->priv = pdev;
+
+ return 0;
+
+err_put_device:
+ platform_device_put(pdev);
+
+ return ret;
+}
+
+static void qmi_sample_del_server(struct qmi_handle *qmi,
+ struct qmi_service *service)
+{
+ struct platform_device *pdev = service->priv;
+
+ platform_device_unregister(pdev);
+}
+
+static struct qmi_handle lookup_client;
+
+static struct qmi_ops lookup_ops = {
+ .new_server = qmi_sample_new_server,
+ .del_server = qmi_sample_del_server,
+};
+
+static int qmi_sample_init(void)
+{
+ int ret;
+
+ qmi_debug_dir = debugfs_create_dir("qmi_sample", NULL);
+ if (IS_ERR(qmi_debug_dir)) {
+ pr_err("failed to create qmi_sample dir\n");
+ return PTR_ERR(qmi_debug_dir);
+ }
+
+ ret = platform_driver_register(&qmi_sample_driver);
+ if (ret)
+ goto err_remove_debug_dir;
+
+ ret = qmi_handle_init(&lookup_client, 0, &lookup_ops, NULL);
+ if (ret < 0)
+ goto err_unregister_driver;
+
+ qmi_add_lookup(&lookup_client, 15, 0, 0);
+
+ return 0;
+
+err_unregister_driver:
+ platform_driver_unregister(&qmi_sample_driver);
+err_remove_debug_dir:
+ debugfs_remove(qmi_debug_dir);
+
+ return ret;
+}
+
+static void qmi_sample_exit(void)
+{
+ qmi_handle_release(&lookup_client);
+
+ platform_driver_unregister(&qmi_sample_driver);
+
+ debugfs_remove(qmi_debug_dir);
+}
+
+module_init(qmi_sample_init);
+module_exit(qmi_sample_exit);
+
+MODULE_DESCRIPTION("Sample QMI client driver");
+MODULE_LICENSE("GPL v2");
diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
index 0e349b8..ba942e3 100644
--- a/samples/seccomp/Makefile
+++ b/samples/seccomp/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct
HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include
@@ -16,7 +17,6 @@ HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include
bpf-direct-objs := bpf-direct.o
# Try to match the kernel target.
-ifndef CROSS_COMPILE
ifndef CONFIG_64BIT
# s390 has -m31 flag to build 31 bit binaries
@@ -35,12 +35,4 @@ HOSTLOADLIBES_bpf-fancy += $(MFLAG)
HOSTLOADLIBES_dropper += $(MFLAG)
endif
always := $(hostprogs-m)
-else
-# MIPS system calls are defined based on the -mabi that is passed
-# to the toolchain which may or may not be a valid option
-# for the host toolchain. So disable tests if target architecture
-# is MIPS but the host isn't.
-ifndef CONFIG_MIPS
-always := $(hostprogs-m)
-endif
endif
diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile
index 73f1da4..9bf2881 100644
--- a/samples/sockmap/Makefile
+++ b/samples/sockmap/Makefile
@@ -2,7 +2,7 @@
hostprogs-y := sockmap
# Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
+LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 52b0053..9ff8bc5 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -43,6 +43,55 @@ struct bpf_map_def SEC("maps") sock_map = {
.max_entries = 20,
};
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_pull_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 2
+};
+
+struct bpf_map_def SEC("maps") sock_redir_flags = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_skb_opts = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
{
@@ -54,15 +103,24 @@ int bpf_prog2(struct __sk_buff *skb)
{
__u32 lport = skb->local_port;
__u32 rport = skb->remote_port;
- int ret = 0;
+ int len, *f, ret, zero = 0;
+ __u64 flags = 0;
if (lport == 10000)
ret = 10;
else
ret = 1;
- bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
- return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
+ len = (__u32)skb->data_end - (__u32)skb->data;
+ f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
+ if (f && *f) {
+ ret = 3;
+ flags = *f;
+ }
+
+ bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
+ len, flags);
+ return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
}
SEC("sockops")
@@ -105,4 +163,179 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
return 0;
}
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1;
+ int *start, *end;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+ return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+ int err1 = -1, err2 = -1, zero = 0, one = 1;
+ int *bytes, *start, *end, len1, len2;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ err1 = bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ err2 = bpf_msg_cork_bytes(msg, *bytes);
+ len1 = (__u64)msg->data_end - (__u64)msg->data;
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end) {
+ int err;
+
+ bpf_printk("sk_msg2: pull(%i:%i)\n",
+ start ? *start : 0, end ? *end : 0);
+ err = bpf_msg_pull_data(msg, *start, *end, 0);
+ if (err)
+ bpf_printk("sk_msg2: pull_data err %i\n",
+ err);
+ len2 = (__u64)msg->data_end - (__u64)msg->data;
+ bpf_printk("sk_msg2: length update %i->%i\n",
+ len1, len2);
+ }
+ bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+ len1, err1, err2);
+ return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1, key = 0;
+ int *start, *end, *f;
+ __u64 flags = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+ f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+ if (f && *f) {
+ key = 2;
+ flags = *f;
+ }
+ return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+ int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
+ int *f, *bytes, *start, *end, len1, len2;
+ __u64 flags = 0;
+
+ int err;
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ err1 = bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ err2 = bpf_msg_cork_bytes(msg, *bytes);
+ len1 = (__u64)msg->data_end - (__u64)msg->data;
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end) {
+
+ bpf_printk("sk_msg2: pull(%i:%i)\n",
+ start ? *start : 0, end ? *end : 0);
+ err = bpf_msg_pull_data(msg, *start, *end, 0);
+ if (err)
+ bpf_printk("sk_msg2: pull_data err %i\n",
+ err);
+ len2 = (__u64)msg->data_end - (__u64)msg->data;
+ bpf_printk("sk_msg2: length update %i->%i\n",
+ len1, len2);
+ }
+ f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+ if (f && *f) {
+ key = 2;
+ flags = *f;
+ }
+ bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
+ len1, flags, err1 ? err1 : err2);
+ err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+ bpf_printk("sk_msg3: err %i\n", err);
+ return err;
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+ int ret = 0, *bytes, zero = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes) {
+ ret = bpf_msg_apply_bytes(msg, *bytes);
+ if (ret)
+ return SK_DROP;
+ } else {
+ return SK_DROP;
+ }
+ return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+ int ret = 0, *bytes, zero = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes) {
+ if (((__u64)data_end - (__u64)data) >= *bytes)
+ return SK_PASS;
+ ret = bpf_msg_cork_bytes(msg, *bytes);
+ if (ret)
+ return SK_DROP;
+ }
+ return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1;
+ int *start, *end;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+
+ return SK_DROP;
+}
+
+
char _license[] SEC("license") = "GPL";
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
new file mode 100755
index 0000000..ace75f0
--- /dev/null
+++ b/samples/sockmap/sockmap_test.sh
@@ -0,0 +1,488 @@
+#Test a bunch of positive cases to verify basic functionality
+for prog in "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
+for t in "sendmsg" "sendpage"; do
+for r in 1 10 100; do
+ for i in 1 10 100; do
+ for l in 1 10 100; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+ done
+ done
+done
+done
+done
+
+#Test max iov
+t="sendmsg"
+r=1
+i=1024
+l=1
+prog="--txmsg"
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+prog="--txmsg_redir"
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+
+# Test max iov with 1k send
+
+t="sendmsg"
+r=1
+i=1024
+l=1024
+prog="--txmsg"
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+prog="--txmsg_redir"
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+
+# Test apply with 1B
+r=1
+i=1024
+l=1024
+prog="--txmsg_apply 1"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test apply with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test apply with apply that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test apply and redirect with 1B
+r=1
+i=1024
+l=1024
+prog="--txmsg_redir --txmsg_apply 1"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+
+# Test apply and redirect with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_redir --txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+
+# Test apply and redirect with apply that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with 1B not really useful but test it anyways
+r=1
+i=1024
+l=1024
+prog="--txmsg_cork 1"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with a more reasonable 100B
+r=1
+i=1000
+l=1000
+prog="--txmsg_cork 100"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with cork that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+r=1
+i=1024
+l=1024
+prog="--txmsg_redir --txmsg_cork 1"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with a more reasonable 100B
+r=1
+i=1000
+l=1000
+prog="--txmsg_redir --txmsg_cork 100"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_redir --txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with cork that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+
+# mix and match cork and apply not really useful but valid programs
+
+# Test apply < cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 10 --txmsg_cork 100"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Try again with larger sizes so we hit overflow case
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 4096 --txmsg_cork 8096"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test apply > cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 100 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Again with larger sizes so we hit overflow cases
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 8096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+
+# Test apply = cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 10 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 4096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test apply < cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 100"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Try again with larger sizes so we hit overflow case
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 8096"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test apply > cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 100 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Again with larger sizes so we hit overflow cases
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 8096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+
+# Test apply = cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Tests for bpf_msg_pull_data()
+for i in `seq 99 100 1600`; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 0 --txmsg_end $i --txmsg_cork 1600"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+for i in `seq 199 100 1600`; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 100 --txmsg_end $i --txmsg_cork 1600"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 1500 --txmsg_end 1600 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 1111 --txmsg_end 1112 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 1111 --txmsg_end 0 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1602"
+echo $TEST
+$TEST
+sleep 2
+
+# Run through gamut again with start and end
+for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
+for t in "sendmsg" "sendpage"; do
+for r in 1 10 100; do
+ for i in 1 10 100; do
+ for l in 1 10 100; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog --txmsg_start 1 --txmsg_end 2"
+ echo $TEST
+ $TEST
+ sleep 2
+ done
+ done
+done
+done
+done
+
+# Some specific tests to cover specific code paths
+./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
+ -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
+./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
+ -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
+./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
+ -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
+./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
+ -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 7cc9d22..6f23349 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -23,9 +23,13 @@
#include <stdbool.h>
#include <signal.h>
#include <fcntl.h>
+#include <sys/wait.h>
+#include <time.h>
#include <sys/time.h>
+#include <sys/resource.h>
#include <sys/types.h>
+#include <sys/sendfile.h>
#include <linux/netlink.h>
#include <linux/socket.h>
@@ -35,6 +39,8 @@
#include <assert.h>
#include <libgen.h>
+#include <getopt.h>
+
#include "../bpf/bpf_load.h"
#include "../bpf/bpf_util.h"
#include "../bpf/libbpf.h"
@@ -46,15 +52,66 @@ void running_handler(int a);
#define S1_PORT 10000
#define S2_PORT 10001
-static int sockmap_test_sockets(int rate, int dot)
+/* global sockets */
+int s1, s2, c1, c2, p1, p2;
+
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+int txmsg_drop;
+int txmsg_apply;
+int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
+int txmsg_ingress;
+int txmsg_skb;
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"cgroup", required_argument, NULL, 'c' },
+ {"rate", required_argument, NULL, 'r' },
+ {"verbose", no_argument, NULL, 'v' },
+ {"iov_count", required_argument, NULL, 'i' },
+ {"length", required_argument, NULL, 'l' },
+ {"test", required_argument, NULL, 't' },
+ {"data_test", no_argument, NULL, 'd' },
+ {"txmsg", no_argument, &txmsg_pass, 1 },
+ {"txmsg_noisy", no_argument, &txmsg_noisy, 1 },
+ {"txmsg_redir", no_argument, &txmsg_redir, 1 },
+ {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1},
+ {"txmsg_drop", no_argument, &txmsg_drop, 1 },
+ {"txmsg_apply", required_argument, NULL, 'a'},
+ {"txmsg_cork", required_argument, NULL, 'k'},
+ {"txmsg_start", required_argument, NULL, 's'},
+ {"txmsg_end", required_argument, NULL, 'e'},
+ {"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
+ {"txmsg_skb", no_argument, &txmsg_skb, 1 },
+ {0, 0, NULL, 0 }
+};
+
+static void usage(char *argv[])
+{
+ int i;
+
+ printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
+ printf(" options:\n");
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-12s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value:%d)\n",
+ *long_options[i].flag);
+ else
+ printf(" -%c\n", long_options[i].val);
+ }
+ printf("\n");
+}
+
+static int sockmap_init_sockets(void)
{
- int i, sc, err, max_fd, one = 1;
- int s1, s2, c1, c2, p1, p2;
+ int i, err, one = 1;
struct sockaddr_in addr;
- struct timeval timeout;
- char buf[1024] = {0};
int *fds[4] = {&s1, &s2, &c1, &c2};
- fd_set w;
s1 = s2 = p1 = p2 = c1 = c2 = 0;
@@ -63,8 +120,7 @@ static int sockmap_test_sockets(int rate, int dot)
*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
if (*fds[i] < 0) {
perror("socket s1 failed()");
- err = *fds[i];
- goto out;
+ return errno;
}
}
@@ -74,16 +130,16 @@ static int sockmap_test_sockets(int rate, int dot)
(char *)&one, sizeof(one));
if (err) {
perror("setsockopt failed()");
- goto out;
+ return errno;
}
}
/* Non-blocking sockets */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < 2; i++) {
err = ioctl(*fds[i], FIONBIO, (char *)&one);
if (err < 0) {
perror("ioctl s1 failed()");
- goto out;
+ return errno;
}
}
@@ -96,14 +152,14 @@ static int sockmap_test_sockets(int rate, int dot)
err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
perror("bind s1 failed()\n");
- goto out;
+ return errno;
}
addr.sin_port = htons(S2_PORT);
err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
perror("bind s2 failed()\n");
- goto out;
+ return errno;
}
/* Listen server sockets */
@@ -111,14 +167,14 @@ static int sockmap_test_sockets(int rate, int dot)
err = listen(s1, 32);
if (err < 0) {
perror("listen s1 failed()\n");
- goto out;
+ return errno;
}
addr.sin_port = htons(S2_PORT);
err = listen(s2, 32);
if (err < 0) {
perror("listen s1 failed()\n");
- goto out;
+ return errno;
}
/* Initiate Connect */
@@ -126,46 +182,328 @@ static int sockmap_test_sockets(int rate, int dot)
err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0 && errno != EINPROGRESS) {
perror("connect c1 failed()\n");
- goto out;
+ return errno;
}
addr.sin_port = htons(S2_PORT);
err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0 && errno != EINPROGRESS) {
perror("connect c2 failed()\n");
- goto out;
+ return errno;
+ } else if (err < 0) {
+ err = 0;
}
/* Accept Connecrtions */
p1 = accept(s1, NULL, NULL);
if (p1 < 0) {
perror("accept s1 failed()\n");
- goto out;
+ return errno;
}
p2 = accept(s2, NULL, NULL);
if (p2 < 0) {
perror("accept s1 failed()\n");
- goto out;
+ return errno;
}
- max_fd = p2;
- timeout.tv_sec = 10;
- timeout.tv_usec = 0;
-
printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
c1, s1, c2, s2);
+ return 0;
+}
+
+struct msg_stats {
+ size_t bytes_sent;
+ size_t bytes_recvd;
+ struct timespec start;
+ struct timespec end;
+};
+
+struct sockmap_options {
+ int verbose;
+ bool base;
+ bool sendpage;
+ bool data_test;
+ bool drop_expected;
+};
+
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+ struct msg_stats *s,
+ struct sockmap_options *opt)
+{
+ bool drop = opt->drop_expected;
+ unsigned char k = 0;
+ FILE *file;
+ int i, fp;
+
+ file = fopen(".sendpage_tst.tmp", "w+");
+ for (i = 0; i < iov_length * cnt; i++, k++)
+ fwrite(&k, sizeof(char), 1, file);
+ fflush(file);
+ fseek(file, 0, SEEK_SET);
+ fclose(file);
+
+ fp = open(".sendpage_tst.tmp", O_RDONLY);
+ clock_gettime(CLOCK_MONOTONIC, &s->start);
+ for (i = 0; i < cnt; i++) {
+ int sent = sendfile(fd, fp, NULL, iov_length);
+
+ if (!drop && sent < 0) {
+ perror("send loop error:");
+ close(fp);
+ return sent;
+ } else if (drop && sent >= 0) {
+ printf("sendpage loop error expected: %i\n", sent);
+ close(fp);
+ return -EIO;
+ }
+
+ if (sent > 0)
+ s->bytes_sent += sent;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ close(fp);
+ return 0;
+}
+
+static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
+ struct msg_stats *s, bool tx,
+ struct sockmap_options *opt)
+{
+ struct msghdr msg = {0};
+ int err, i, flags = MSG_NOSIGNAL;
+ struct iovec *iov;
+ unsigned char k;
+ bool data_test = opt->data_test;
+ bool drop = opt->drop_expected;
+
+ iov = calloc(iov_count, sizeof(struct iovec));
+ if (!iov)
+ return errno;
+
+ k = 0;
+ for (i = 0; i < iov_count; i++) {
+ unsigned char *d = calloc(iov_length, sizeof(char));
+
+ if (!d) {
+ fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
+ goto out_errno;
+ }
+ iov[i].iov_base = d;
+ iov[i].iov_len = iov_length;
+
+ if (data_test && tx) {
+ int j;
+
+ for (j = 0; j < iov_length; j++)
+ d[j] = k++;
+ }
+ }
+
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_count;
+ k = 0;
+
+ if (tx) {
+ clock_gettime(CLOCK_MONOTONIC, &s->start);
+ for (i = 0; i < cnt; i++) {
+ int sent = sendmsg(fd, &msg, flags);
+
+ if (!drop && sent < 0) {
+ perror("send loop error:");
+ goto out_errno;
+ } else if (drop && sent >= 0) {
+ printf("send loop error expected: %i\n", sent);
+ errno = -EIO;
+ goto out_errno;
+ }
+ if (sent > 0)
+ s->bytes_sent += sent;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ } else {
+ int slct, recv, max_fd = fd;
+ struct timeval timeout;
+ float total_bytes;
+ fd_set w;
+
+ total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
+ err = clock_gettime(CLOCK_MONOTONIC, &s->start);
+ if (err < 0)
+ perror("recv start time: ");
+ while (s->bytes_recvd < total_bytes) {
+ timeout.tv_sec = 1;
+ timeout.tv_usec = 0;
+
+ /* FD sets */
+ FD_ZERO(&w);
+ FD_SET(fd, &w);
+
+ slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
+ if (slct == -1) {
+ perror("select()");
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ } else if (!slct) {
+ fprintf(stderr, "unexpected timeout\n");
+ errno = -EIO;
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ }
+
+ recv = recvmsg(fd, &msg, flags);
+ if (recv < 0) {
+ if (errno != EWOULDBLOCK) {
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ perror("recv failed()\n");
+ goto out_errno;
+ }
+ }
+
+ s->bytes_recvd += recv;
+
+ if (data_test) {
+ int j;
+
+ for (i = 0; i < msg.msg_iovlen; i++) {
+ unsigned char *d = iov[i].iov_base;
+
+ for (j = 0;
+ j < iov[i].iov_len && recv; j++) {
+ if (d[j] != k++) {
+ errno = -EIO;
+ fprintf(stderr,
+ "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+ i, j, d[j], k - 1, d[j+1], k + 1);
+ goto out_errno;
+ }
+ recv--;
+ }
+ }
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ }
+
+ for (i = 0; i < iov_count; i++)
+ free(iov[i].iov_base);
+ free(iov);
+ return 0;
+out_errno:
+ for (i = 0; i < iov_count; i++)
+ free(iov[i].iov_base);
+ free(iov);
+ return errno;
+}
+
+static float giga = 1000000000;
+
+static inline float sentBps(struct msg_stats s)
+{
+ return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static inline float recvdBps(struct msg_stats s)
+{
+ return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static int sendmsg_test(int iov_count, int iov_buf, int cnt,
+ struct sockmap_options *opt)
+{
+ float sent_Bps = 0, recvd_Bps = 0;
+ int rx_fd, txpid, rxpid, err = 0;
+ struct msg_stats s = {0};
+ int status;
+
+ errno = 0;
+
+ if (opt->base)
+ rx_fd = p1;
+ else
+ rx_fd = p2;
+
+ rxpid = fork();
+ if (rxpid == 0) {
+ if (opt->drop_expected)
+ exit(1);
+
+ if (opt->sendpage)
+ iov_count = 1;
+ err = msg_loop(rx_fd, iov_count, iov_buf,
+ cnt, &s, false, opt);
+ if (err)
+ fprintf(stderr,
+ "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
+ iov_count, iov_buf, cnt, err);
+ shutdown(p2, SHUT_RDWR);
+ shutdown(p1, SHUT_RDWR);
+ if (s.end.tv_sec - s.start.tv_sec) {
+ sent_Bps = sentBps(s);
+ recvd_Bps = recvdBps(s);
+ }
+ fprintf(stdout,
+ "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ exit(1);
+ } else if (rxpid == -1) {
+ perror("msg_loop_rx: ");
+ return errno;
+ }
+
+ txpid = fork();
+ if (txpid == 0) {
+ if (opt->sendpage)
+ err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
+ else
+ err = msg_loop(c1, iov_count, iov_buf,
+ cnt, &s, true, opt);
+
+ if (err)
+ fprintf(stderr,
+ "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
+ iov_count, iov_buf, cnt, err);
+ shutdown(c1, SHUT_RDWR);
+ if (s.end.tv_sec - s.start.tv_sec) {
+ sent_Bps = sentBps(s);
+ recvd_Bps = recvdBps(s);
+ }
+ fprintf(stdout,
+ "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ exit(1);
+ } else if (txpid == -1) {
+ perror("msg_loop_tx: ");
+ return errno;
+ }
+
+ assert(waitpid(rxpid, &status, 0) == rxpid);
+ assert(waitpid(txpid, &status, 0) == txpid);
+ return err;
+}
+
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
+{
+ struct timeval timeout;
+ char buf[1024] = {0};
+ int sc;
+
+ timeout.tv_sec = 10;
+ timeout.tv_usec = 0;
/* Ping/Pong data from client to server */
sc = send(c1, buf, sizeof(buf), 0);
if (sc < 0) {
perror("send failed()\n");
- goto out;
+ return sc;
}
do {
- int s, rc, i;
+ int s, rc, i, max_fd = p2;
+ fd_set w;
/* FD sets */
FD_ZERO(&w);
@@ -193,7 +531,7 @@ static int sockmap_test_sockets(int rate, int dot)
if (rc < 0) {
if (errno != EWOULDBLOCK) {
perror("recv failed()\n");
- break;
+ return rc;
}
}
@@ -205,35 +543,115 @@ static int sockmap_test_sockets(int rate, int dot)
sc = send(i, buf, rc, 0);
if (sc < 0) {
perror("send failed()\n");
- break;
+ return sc;
}
}
- sleep(rate);
- if (dot) {
+
+ if (rate)
+ sleep(rate);
+
+ if (opt->verbose) {
printf(".");
fflush(stdout);
}
} while (running);
-out:
- close(s1);
- close(s2);
- close(p1);
- close(p2);
- close(c1);
- close(c2);
- return err;
+ return 0;
}
+enum {
+ PING_PONG,
+ SENDMSG,
+ BASE,
+ BASE_SENDPAGE,
+ SENDPAGE,
+};
+
int main(int argc, char **argv)
{
- int rate = 1, dot = 1;
+ int iov_count = 1, length = 1024, rate = 1, tx_prog_fd;
+ struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ int opt, longindex, err, cg_fd = 0;
+ struct sockmap_options options = {0};
+ int test = PING_PONG;
char filename[256];
- int err, cg_fd;
- char *cg_path;
- cg_path = argv[argc - 1];
+ while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 's':
+ txmsg_start = atoi(optarg);
+ break;
+ case 'e':
+ txmsg_end = atoi(optarg);
+ break;
+ case 'a':
+ txmsg_apply = atoi(optarg);
+ break;
+ case 'k':
+ txmsg_cork = atoi(optarg);
+ break;
+ case 'c':
+ cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, optarg);
+ return cg_fd;
+ }
+ break;
+ case 'r':
+ rate = atoi(optarg);
+ break;
+ case 'v':
+ options.verbose = 1;
+ break;
+ case 'i':
+ iov_count = atoi(optarg);
+ break;
+ case 'l':
+ length = atoi(optarg);
+ break;
+ case 'd':
+ options.data_test = true;
+ break;
+ case 't':
+ if (strcmp(optarg, "ping") == 0) {
+ test = PING_PONG;
+ } else if (strcmp(optarg, "sendmsg") == 0) {
+ test = SENDMSG;
+ } else if (strcmp(optarg, "base") == 0) {
+ test = BASE;
+ } else if (strcmp(optarg, "base_sendpage") == 0) {
+ test = BASE_SENDPAGE;
+ } else if (strcmp(optarg, "sendpage") == 0) {
+ test = SENDPAGE;
+ } else {
+ usage(argv);
+ return -1;
+ }
+ break;
+ case 0:
+ break;
+ case 'h':
+ default:
+ usage(argv);
+ return -1;
+ }
+ }
+
+ if (!cg_fd) {
+ fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
+ argv[0]);
+ return -1;
+ }
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
running = 1;
@@ -247,13 +665,9 @@ int main(int argc, char **argv)
return 1;
}
- /* Cgroup configuration */
- cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
- if (cg_fd < 0) {
- fprintf(stderr, "ERROR: (%i) open cg path failed: %s\n",
- cg_fd, cg_path);
- return cg_fd;
- }
+ /* If base test skip BPF setup */
+ if (test == BASE || test == BASE_SENDPAGE)
+ goto run;
/* Attach programs to sockmap */
err = bpf_prog_attach(prog_fd[0], map_fd[0],
@@ -280,12 +694,198 @@ int main(int argc, char **argv)
return err;
}
- err = sockmap_test_sockets(rate, dot);
+run:
+ err = sockmap_init_sockets();
if (err) {
fprintf(stderr, "ERROR: test socket failed: %d\n", err);
- return err;
+ goto out;
}
- return 0;
+
+ /* Attach txmsg program to sockmap */
+ if (txmsg_pass)
+ tx_prog_fd = prog_fd[3];
+ else if (txmsg_noisy)
+ tx_prog_fd = prog_fd[4];
+ else if (txmsg_redir)
+ tx_prog_fd = prog_fd[5];
+ else if (txmsg_redir_noisy)
+ tx_prog_fd = prog_fd[6];
+ else if (txmsg_drop)
+ tx_prog_fd = prog_fd[9];
+ /* apply and cork must be last */
+ else if (txmsg_apply)
+ tx_prog_fd = prog_fd[7];
+ else if (txmsg_cork)
+ tx_prog_fd = prog_fd[8];
+ else
+ tx_prog_fd = 0;
+
+ if (tx_prog_fd) {
+ int redir_fd, i = 0;
+
+ err = bpf_prog_attach(tx_prog_fd,
+ map_fd[1], BPF_SK_MSG_VERDICT, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+
+ err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
+ err, strerror(errno));
+ return err;
+ }
+
+ if (txmsg_redir || txmsg_redir_noisy)
+ redir_fd = c2;
+ else
+ redir_fd = c1;
+
+ err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
+ err, strerror(errno));
+ return err;
+ }
+
+ if (txmsg_apply) {
+ err = bpf_map_update_elem(map_fd[3],
+ &i, &txmsg_apply, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n",
+ err, strerror(errno));
+ return err;
+ }
+ }
+
+ if (txmsg_cork) {
+ err = bpf_map_update_elem(map_fd[4],
+ &i, &txmsg_cork, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n",
+ err, strerror(errno));
+ return err;
+ }
+ }
+
+ if (txmsg_start) {
+ err = bpf_map_update_elem(map_fd[5],
+ &i, &txmsg_start, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+ }
+
+ if (txmsg_end) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[5],
+ &i, &txmsg_end, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+ }
+
+ if (txmsg_ingress) {
+ int in = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+ i = 1;
+ err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
+ err, strerror(errno));
+ }
+ err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ i = 2;
+ err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+
+ if (txmsg_skb) {
+ int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
+ int ingress = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ i = 3;
+ err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+ }
+
+ if (txmsg_drop)
+ options.drop_expected = true;
+
+ if (test == PING_PONG)
+ err = forever_ping_pong(rate, &options);
+ else if (test == SENDMSG) {
+ options.base = false;
+ options.sendpage = false;
+ err = sendmsg_test(iov_count, length, rate, &options);
+ } else if (test == SENDPAGE) {
+ options.base = false;
+ options.sendpage = true;
+ err = sendmsg_test(iov_count, length, rate, &options);
+ } else if (test == BASE) {
+ options.base = true;
+ options.sendpage = false;
+ err = sendmsg_test(iov_count, length, rate, &options);
+ } else if (test == BASE_SENDPAGE) {
+ options.base = true;
+ options.sendpage = true;
+ err = sendmsg_test(iov_count, length, rate, &options);
+ } else
+ fprintf(stderr, "unknown test\n");
+out:
+ bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+ close(s1);
+ close(s2);
+ close(p1);
+ close(p2);
+ close(c1);
+ close(c2);
+ close(cg_fd);
+ return err;
}
void running_handler(int a)
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 09f255b..7abb79d 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -534,7 +534,7 @@ static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state,
/* Interrupt priority 2: Fifo trigger level reached */
if ((ier & UART_IER_RDI) &&
- (mdev_state->s[index].rxtx.count ==
+ (mdev_state->s[index].rxtx.count >=
mdev_state->s[index].intr_trigger_level))
*buf |= UART_IIR_RDI;
OpenPOWER on IntegriCloud