diff options
author | David S. Miller <davem@davemloft.net> | 2018-05-24 22:20:51 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-05-24 22:20:51 -0400 |
commit | 90fed9c94625718a3a10db7d1e8e4efe093bbf5f (patch) | |
tree | 09b3bc9ea679316372b139338179a230105306dc /samples | |
parent | 49a473f5b5f54f33e0bd8618158d33f83153c921 (diff) | |
parent | 10f678683e4026e43524b0492068a371d00fdeed (diff) | |
download | op-kernel-dev-90fed9c94625718a3a10db7d1e8e4efe093bbf5f.zip op-kernel-dev-90fed9c94625718a3a10db7d1e8e4efe093bbf5f.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says:
====================
pull-request: bpf-next 2018-05-24
The following pull-request contains BPF updates for your *net-next* tree.
The main changes are:
1) Björn Töpel cleans up AF_XDP (removes rebind, explicit cache alignment from uapi, etc).
2) David Ahern adds mtu checks to bpf_ipv{4,6}_fib_lookup() helpers.
3) Jesper Dangaard Brouer adds bulking support to ndo_xdp_xmit.
4) Jiong Wang adds support for indirect and arithmetic shifts to NFP
5) Martin KaFai Lau cleans up BTF uapi and makes the btf_header extensible.
6) Mathieu Xhonneux adds an End.BPF action to seg6local with BPF helpers allowing
to edit/grow/shrink a SRH and apply on a packet generic SRv6 actions.
7) Sandipan Das adds support for bpf2bpf function calls in ppc64 JIT.
8) Yonghong Song adds BPF_TASK_FD_QUERY command for introspection of tracing events.
9) other misc fixes from Gustavo A. R. Silva, Sirio Balmelli, John Fastabend, and Magnus Karlsson
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples')
-rw-r--r-- | samples/bpf/Makefile | 4 | ||||
-rw-r--r-- | samples/bpf/task_fd_query_kern.c | 19 | ||||
-rw-r--r-- | samples/bpf/task_fd_query_user.c | 382 | ||||
-rw-r--r-- | samples/bpf/xdp_monitor_kern.c | 49 | ||||
-rw-r--r-- | samples/bpf/xdp_monitor_user.c | 69 | ||||
-rw-r--r-- | samples/bpf/xdpsock_user.c | 135 |
6 files changed, 599 insertions, 59 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 62a99ab..1303af1 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -51,6 +51,7 @@ hostprogs-y += cpustat hostprogs-y += xdp_adjust_tail hostprogs-y += xdpsock hostprogs-y += xdp_fwd +hostprogs-y += task_fd_query # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := bpf_load.o xdpsock_user.o xdp_fwd-objs := bpf_load.o xdp_fwd_user.o +task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -160,6 +162,7 @@ always += cpustat_kern.o always += xdp_adjust_tail_kern.o always += xdpsock_kern.o always += xdp_fwd_kern.o +always += task_fd_query_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/ +HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/ HOST_LOADLIBES += $(LIBBPF) -lelf HOSTLOADLIBES_tracex4 += -lrt diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c new file mode 100644 index 0000000..f4b0a9e --- /dev/null +++ b/samples/bpf/task_fd_query_kern.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/version.h> +#include <linux/ptrace.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +SEC("kprobe/blk_start_request") +int bpf_prog1(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kretprobe/blk_account_io_completion") +int bpf_prog2(struct pt_regs *ctx) +{ + return 0; +} +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c new file mode 100644 index 0000000..8381d79 --- /dev/null +++ b/samples/bpf/task_fd_query_user.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <unistd.h> +#include <stdbool.h> +#include <string.h> +#include <stdint.h> +#include <fcntl.h> +#include <linux/bpf.h> +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "libbpf.h" +#include "bpf_load.h" +#include "bpf_util.h" +#include "perf-sys.h" +#include "trace_helpers.h" + +#define CHECK_PERROR_RET(condition) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + printf("FAIL: %s:\n", __func__); \ + perror(" "); \ + return -1; \ + } \ +}) + +#define CHECK_AND_RET(condition) ({ \ + int __ret = !!(condition); \ + if (__ret) \ + return -1; \ +}) + +static __u64 ptr_to_u64(void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type" +static int bpf_find_probe_type(const char *event_type) +{ + char buf[256]; + int fd, ret; + + ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type); + CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf)); + + fd = open(buf, O_RDONLY); + CHECK_PERROR_RET(fd < 0); + + ret = read(fd, buf, sizeof(buf)); + close(fd); + CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf)); + + errno = 0; + ret = (int)strtol(buf, NULL, 10); + CHECK_PERROR_RET(errno); + return ret; +} + +#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe" +static int bpf_get_retprobe_bit(const char *event_type) +{ + char buf[256]; + int fd, ret; + + ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type); + CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf)); + + fd = open(buf, O_RDONLY); + CHECK_PERROR_RET(fd < 0); + + ret = read(fd, buf, sizeof(buf)); + close(fd); + CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf)); + CHECK_PERROR_RET(strlen(buf) < strlen("config:")); + + errno = 0; + ret = (int)strtol(buf + strlen("config:"), NULL, 10); + CHECK_PERROR_RET(errno); + return ret; +} + +static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name, + __u32 expected_fd_type) +{ + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + char buf[256]; + int err; + + len = sizeof(buf); + err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len, + &prog_id, &fd_type, &probe_offset, + &probe_addr); + if (err < 0) { + printf("FAIL: %s, for event_fd idx %d, fn_name %s\n", + __func__, prog_fd_idx, fn_name); + perror(" :"); + return -1; + } + if (strcmp(buf, fn_name) != 0 || + fd_type != expected_fd_type || + probe_offset != 0x0 || probe_addr != 0x0) { + printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n", + prog_fd_idx); + printf("buf: %s, fd_type: %u, probe_offset: 0x%llx," + " probe_addr: 0x%llx\n", + buf, fd_type, probe_offset, probe_addr); + return -1; + } + return 0; +} + +static int test_nondebug_fs_kuprobe_common(const char *event_type, + const char *name, __u64 offset, __u64 addr, bool is_return, + char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, + __u64 *probe_offset, __u64 *probe_addr) +{ + int is_return_bit = bpf_get_retprobe_bit(event_type); + int type = bpf_find_probe_type(event_type); + struct perf_event_attr attr = {}; + int fd; + + if (type < 0 || is_return_bit < 0) { + printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n", + __func__, type, is_return_bit); + return -1; + } + + attr.sample_period = 1; + attr.wakeup_events = 1; + if (is_return) + attr.config |= 1 << is_return_bit; + + if (name) { + attr.config1 = ptr_to_u64((void *)name); + attr.config2 = offset; + } else { + attr.config1 = 0; + attr.config2 = addr; + } + attr.size = sizeof(attr); + attr.type = type; + + fd = sys_perf_event_open(&attr, -1, 0, -1, 0); + CHECK_PERROR_RET(fd < 0); + + CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0); + CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0); + CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len, + prog_id, fd_type, probe_offset, probe_addr) < 0); + + return 0; +} + +static int test_nondebug_fs_probe(const char *event_type, const char *name, + __u64 offset, __u64 addr, bool is_return, + __u32 expected_fd_type, + __u32 expected_ret_fd_type, + char *buf, __u32 buf_len) +{ + __u64 probe_offset, probe_addr; + __u32 prog_id, fd_type; + int err; + + err = test_nondebug_fs_kuprobe_common(event_type, name, + offset, addr, is_return, + buf, &buf_len, &prog_id, + &fd_type, &probe_offset, + &probe_addr); + if (err < 0) { + printf("FAIL: %s, " + "for name %s, offset 0x%llx, addr 0x%llx, is_return %d\n", + __func__, name ? name : "", offset, addr, is_return); + perror(" :"); + return -1; + } + if ((is_return && fd_type != expected_ret_fd_type) || + (!is_return && fd_type != expected_fd_type)) { + printf("FAIL: %s, incorrect fd_type %u\n", + __func__, fd_type); + return -1; + } + if (name) { + if (strcmp(name, buf) != 0) { + printf("FAIL: %s, incorrect buf %s\n", __func__, buf); + return -1; + } + if (probe_offset != offset) { + printf("FAIL: %s, incorrect probe_offset 0x%llx\n", + __func__, probe_offset); + return -1; + } + } else { + if (buf_len != 0) { + printf("FAIL: %s, incorrect buf %p\n", + __func__, buf); + return -1; + } + + if (probe_addr != addr) { + printf("FAIL: %s, incorrect probe_addr 0x%llx\n", + __func__, probe_addr); + return -1; + } + } + return 0; +} + +static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) +{ + const char *event_type = "uprobe"; + struct perf_event_attr attr = {}; + char buf[256], event_alias[256]; + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + int err, res, kfd, efd; + ssize_t bytes; + + snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", + event_type); + kfd = open(buf, O_WRONLY | O_APPEND, 0); + CHECK_PERROR_RET(kfd < 0); + + res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid()); + CHECK_PERROR_RET(res < 0 || res >= sizeof(event_alias)); + + res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", + is_return ? 'r' : 'p', event_type, event_alias, + binary_path, offset); + CHECK_PERROR_RET(res < 0 || res >= sizeof(buf)); + CHECK_PERROR_RET(write(kfd, buf, strlen(buf)) < 0); + + close(kfd); + kfd = -1; + + snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id", + event_type, event_alias); + efd = open(buf, O_RDONLY, 0); + CHECK_PERROR_RET(efd < 0); + + bytes = read(efd, buf, sizeof(buf)); + CHECK_PERROR_RET(bytes <= 0 || bytes >= sizeof(buf)); + close(efd); + buf[bytes] = '\0'; + + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_period = 1; + attr.wakeup_events = 1; + kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); + CHECK_PERROR_RET(kfd < 0); + CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0); + CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0); + + len = sizeof(buf); + err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len, + &prog_id, &fd_type, &probe_offset, + &probe_addr); + if (err < 0) { + printf("FAIL: %s, binary_path %s\n", __func__, binary_path); + perror(" :"); + return -1; + } + if ((is_return && fd_type != BPF_FD_TYPE_URETPROBE) || + (!is_return && fd_type != BPF_FD_TYPE_UPROBE)) { + printf("FAIL: %s, incorrect fd_type %u\n", __func__, + fd_type); + return -1; + } + if (strcmp(binary_path, buf) != 0) { + printf("FAIL: %s, incorrect buf %s\n", __func__, buf); + return -1; + } + if (probe_offset != offset) { + printf("FAIL: %s, incorrect probe_offset 0x%llx\n", __func__, + probe_offset); + return -1; + } + + close(kfd); + return 0; +} + +int main(int argc, char **argv) +{ + struct rlimit r = {1024*1024, RLIM_INFINITY}; + extern char __executable_start; + char filename[256], buf[256]; + __u64 uprobe_file_offset; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + + if (load_kallsyms()) { + printf("failed to process /proc/kallsyms\n"); + return 1; + } + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + /* test two functions in the corresponding *_kern.c file */ + CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request", + BPF_FD_TYPE_KPROBE)); + CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion", + BPF_FD_TYPE_KRETPROBE)); + + /* test nondebug fs kprobe */ + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0, + false, BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); +#ifdef __x86_64__ + /* set a kprobe on "bpf_check + 0x5", which is x64 specific */ + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x5, 0x0, + false, BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); +#endif + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0, + true, BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0, + ksym_get_addr("bpf_check"), false, + BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0, + ksym_get_addr("bpf_check"), false, + BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + NULL, 0)); + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0, + ksym_get_addr("bpf_check"), true, + BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0, + ksym_get_addr("bpf_check"), true, + BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + 0, 0)); + + /* test nondebug fs uprobe */ + /* the calculation of uprobe file offset is based on gcc 7.3.1 on x64 + * and the default linker script, which defines __executable_start as + * the start of the .text section. The calculation could be different + * on different systems with different compilers. The right way is + * to parse the ELF file. We took a shortcut here. + */ + uprobe_file_offset = (__u64)main - (__u64)&__executable_start; + CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0], + uprobe_file_offset, 0x0, false, + BPF_FD_TYPE_UPROBE, + BPF_FD_TYPE_URETPROBE, + buf, sizeof(buf))); + CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0], + uprobe_file_offset, 0x0, true, + BPF_FD_TYPE_UPROBE, + BPF_FD_TYPE_URETPROBE, + buf, sizeof(buf))); + + /* test debug fs uprobe */ + CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset, + false)); + CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset, + true)); + + return 0; +} diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c index 211db8d..ad10fe7 100644 --- a/samples/bpf/xdp_monitor_kern.c +++ b/samples/bpf/xdp_monitor_kern.c @@ -125,6 +125,7 @@ struct datarec { u64 processed; u64 dropped; u64 info; + u64 err; }; #define MAX_CPUS 64 @@ -208,3 +209,51 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) return 0; } + +struct bpf_map_def SEC("maps") devmap_xmit_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct datarec), + .max_entries = 1, +}; + +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct devmap_xmit_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int map_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + u32 map_index; // offset:16; size:4; signed:0; + int drops; // offset:20; size:4; signed:1; + int sent; // offset:24; size:4; signed:1; + int from_ifindex; // offset:28; size:4; signed:1; + int to_ifindex; // offset:32; size:4; signed:1; + int err; // offset:36; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_devmap_xmit") +int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx) +{ + struct datarec *rec; + u32 key = 0; + + rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key); + if (!rec) + return 0; + rec->processed += ctx->sent; + rec->dropped += ctx->drops; + + /* Record bulk events, then userspace can calc average bulk size */ + rec->info += 1; + + /* Record error cases, where no frame were sent */ + if (ctx->err) + rec->err++; + + /* Catch API error of drv ndo_xdp_xmit sent more than count */ + if (ctx->drops < 0) + rec->err++; + + return 1; +} diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index bf09b51..dd558cb 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -117,6 +117,7 @@ struct datarec { __u64 processed; __u64 dropped; __u64 info; + __u64 err; }; #define MAX_CPUS 64 @@ -141,6 +142,7 @@ struct stats_record { struct record_u64 xdp_exception[XDP_ACTION_MAX]; struct record xdp_cpumap_kthread; struct record xdp_cpumap_enqueue[MAX_CPUS]; + struct record xdp_devmap_xmit; }; static bool map_collect_record(int fd, __u32 key, struct record *rec) @@ -151,6 +153,7 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec) __u64 sum_processed = 0; __u64 sum_dropped = 0; __u64 sum_info = 0; + __u64 sum_err = 0; int i; if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { @@ -169,10 +172,13 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec) sum_dropped += values[i].dropped; rec->cpu[i].info = values[i].info; sum_info += values[i].info; + rec->cpu[i].err = values[i].err; + sum_err += values[i].err; } rec->total.processed = sum_processed; rec->total.dropped = sum_dropped; rec->total.info = sum_info; + rec->total.err = sum_err; return true; } @@ -273,6 +279,18 @@ static double calc_info(struct datarec *r, struct datarec *p, double period) return pps; } +static double calc_err(struct datarec *r, struct datarec *p, double period) +{ + __u64 packets = 0; + double pps = 0; + + if (period > 0) { + packets = r->err - p->err; + pps = packets / period; + } + return pps; +} + static void stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev, bool err_only) @@ -397,7 +415,7 @@ static void stats_print(struct stats_record *stats_rec, info = calc_info(r, p, t); if (info > 0) i_str = "sched"; - if (pps > 0) + if (pps > 0 || drop > 0) printf(fmt1, "cpumap-kthread", i, pps, drop, info, i_str); } @@ -409,6 +427,50 @@ static void stats_print(struct stats_record *stats_rec, printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str); } + /* devmap ndo_xdp_xmit stats */ + { + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n"; + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n"; + struct record *rec, *prev; + double drop, info, err; + char *i_str = ""; + char *err_str = ""; + + rec = &stats_rec->xdp_devmap_xmit; + prev = &stats_prev->xdp_devmap_xmit; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + pps = calc_pps(r, p, t); + drop = calc_drop(r, p, t); + info = calc_info(r, p, t); + err = calc_err(r, p, t); + if (info > 0) { + i_str = "bulk-average"; + info = (pps+drop) / info; /* calc avg bulk */ + } + if (err > 0) + err_str = "drv-err"; + if (pps > 0 || drop > 0) + printf(fmt1, "devmap-xmit", + i, pps, drop, info, i_str, err_str); + } + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop(&rec->total, &prev->total, t); + info = calc_info(&rec->total, &prev->total, t); + err = calc_err(&rec->total, &prev->total, t); + if (info > 0) { + i_str = "bulk-average"; + info = (pps+drop) / info; /* calc avg bulk */ + } + if (err > 0) + err_str = "drv-err"; + printf(fmt2, "devmap-xmit", "total", pps, drop, + info, i_str, err_str); + } + printf("\n"); } @@ -437,6 +499,9 @@ static bool stats_collect(struct stats_record *rec) fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */ map_collect_record(fd, 0, &rec->xdp_cpumap_kthread); + fd = map_data[4].fd; /* map4: devmap_xmit_cnt */ + map_collect_record(fd, 0, &rec->xdp_devmap_xmit); + return true; } @@ -480,6 +545,7 @@ static struct stats_record *alloc_stats_record(void) rec_sz = sizeof(struct datarec); rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz); + rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz); for (i = 0; i < MAX_CPUS; i++) rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz); @@ -498,6 +564,7 @@ static void free_stats_record(struct stats_record *r) free(r->xdp_exception[i].cpu); free(r->xdp_cpumap_kthread.cpu); + free(r->xdp_devmap_xmit.cpu); for (i = 0; i < MAX_CPUS; i++) free(r->xdp_cpumap_enqueue[i].cpu); diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 7fe60f6..e379eac 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1,15 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2017 - 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ +/* Copyright(c) 2017 - 2018 Intel Corporation. */ #include <assert.h> #include <errno.h> @@ -89,7 +79,10 @@ struct xdp_umem_uqueue { u32 cached_cons; u32 mask; u32 size; - struct xdp_umem_ring *ring; + u32 *producer; + u32 *consumer; + u32 *ring; + void *map; }; struct xdp_umem { @@ -104,7 +97,10 @@ struct xdp_uqueue { u32 cached_cons; u32 mask; u32 size; - struct xdp_rxtx_ring *ring; + u32 *producer; + u32 *consumer; + struct xdp_desc *ring; + void *map; }; struct xdpsock { @@ -165,7 +161,7 @@ static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb) return free_entries; /* Refresh the local tail pointer */ - q->cached_cons = q->ring->ptrs.consumer; + q->cached_cons = *q->consumer; return q->size - (q->cached_prod - q->cached_cons); } @@ -178,7 +174,7 @@ static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs) return free_entries; /* Refresh the local tail pointer */ - q->cached_cons = q->ring->ptrs.consumer + q->size; + q->cached_cons = *q->consumer + q->size; return q->cached_cons - q->cached_prod; } @@ -187,7 +183,7 @@ static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb) u32 entries = q->cached_prod - q->cached_cons; if (entries == 0) { - q->cached_prod = q->ring->ptrs.producer; + q->cached_prod = *q->producer; entries = q->cached_prod - q->cached_cons; } @@ -199,7 +195,7 @@ static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs) u32 entries = q->cached_prod - q->cached_cons; if (entries == 0) { - q->cached_prod = q->ring->ptrs.producer; + q->cached_prod = *q->producer; entries = q->cached_prod - q->cached_cons; } @@ -218,12 +214,12 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq, for (i = 0; i < nb; i++) { u32 idx = fq->cached_prod++ & fq->mask; - fq->ring->desc[idx] = d[i].idx; + fq->ring[idx] = d[i].idx; } u_smp_wmb(); - fq->ring->ptrs.producer = fq->cached_prod; + *fq->producer = fq->cached_prod; return 0; } @@ -239,12 +235,12 @@ static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d, for (i = 0; i < nb; i++) { u32 idx = fq->cached_prod++ & fq->mask; - fq->ring->desc[idx] = d[i]; + fq->ring[idx] = d[i]; } u_smp_wmb(); - fq->ring->ptrs.producer = fq->cached_prod; + *fq->producer = fq->cached_prod; return 0; } @@ -258,13 +254,13 @@ static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq, for (i = 0; i < entries; i++) { idx = cq->cached_cons++ & cq->mask; - d[i] = cq->ring->desc[idx]; + d[i] = cq->ring[idx]; } if (entries > 0) { u_smp_wmb(); - cq->ring->ptrs.consumer = cq->cached_cons; + *cq->consumer = cq->cached_cons; } return entries; @@ -280,7 +276,7 @@ static inline int xq_enq(struct xdp_uqueue *uq, const struct xdp_desc *descs, unsigned int ndescs) { - struct xdp_rxtx_ring *r = uq->ring; + struct xdp_desc *r = uq->ring; unsigned int i; if (xq_nb_free(uq, ndescs) < ndescs) @@ -289,21 +285,21 @@ static inline int xq_enq(struct xdp_uqueue *uq, for (i = 0; i < ndescs; i++) { u32 idx = uq->cached_prod++ & uq->mask; - r->desc[idx].idx = descs[i].idx; - r->desc[idx].len = descs[i].len; - r->desc[idx].offset = descs[i].offset; + r[idx].idx = descs[i].idx; + r[idx].len = descs[i].len; + r[idx].offset = descs[i].offset; } u_smp_wmb(); - r->ptrs.producer = uq->cached_prod; + *uq->producer = uq->cached_prod; return 0; } static inline int xq_enq_tx_only(struct xdp_uqueue *uq, __u32 idx, unsigned int ndescs) { - struct xdp_rxtx_ring *q = uq->ring; + struct xdp_desc *r = uq->ring; unsigned int i; if (xq_nb_free(uq, ndescs) < ndescs) @@ -312,14 +308,14 @@ static inline int xq_enq_tx_only(struct xdp_uqueue *uq, for (i = 0; i < ndescs; i++) { u32 idx = uq->cached_prod++ & uq->mask; - q->desc[idx].idx = idx + i; - q->desc[idx].len = sizeof(pkt_data) - 1; - q->desc[idx].offset = 0; + r[idx].idx = idx + i; + r[idx].len = sizeof(pkt_data) - 1; + r[idx].offset = 0; } u_smp_wmb(); - q->ptrs.producer = uq->cached_prod; + *uq->producer = uq->cached_prod; return 0; } @@ -327,7 +323,7 @@ static inline int xq_deq(struct xdp_uqueue *uq, struct xdp_desc *descs, int ndescs) { - struct xdp_rxtx_ring *r = uq->ring; + struct xdp_desc *r = uq->ring; unsigned int idx; int i, entries; @@ -337,13 +333,13 @@ static inline int xq_deq(struct xdp_uqueue *uq, for (i = 0; i < entries; i++) { idx = uq->cached_cons++ & uq->mask; - descs[i] = r->desc[idx]; + descs[i] = r[idx]; } if (entries > 0) { u_smp_wmb(); - r->ptrs.consumer = uq->cached_cons; + *uq->consumer = uq->cached_cons; } return entries; @@ -402,8 +398,10 @@ static size_t gen_eth_frame(char *frame) static struct xdp_umem *xdp_umem_configure(int sfd) { int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS; + struct xdp_mmap_offsets off; struct xdp_umem_reg mr; struct xdp_umem *umem; + socklen_t optlen; void *bufs; umem = calloc(1, sizeof(*umem)); @@ -423,25 +421,35 @@ static struct xdp_umem *xdp_umem_configure(int sfd) lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size, sizeof(int)) == 0); - umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) + - FQ_NUM_DESCS * sizeof(u32), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, sfd, - XDP_UMEM_PGOFF_FILL_RING); - lassert(umem->fq.ring != MAP_FAILED); + optlen = sizeof(off); + lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off, + &optlen) == 0); + + umem->fq.map = mmap(0, off.fr.desc + + FQ_NUM_DESCS * sizeof(u32), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, sfd, + XDP_UMEM_PGOFF_FILL_RING); + lassert(umem->fq.map != MAP_FAILED); umem->fq.mask = FQ_NUM_DESCS - 1; umem->fq.size = FQ_NUM_DESCS; + umem->fq.producer = umem->fq.map + off.fr.producer; + umem->fq.consumer = umem->fq.map + off.fr.consumer; + umem->fq.ring = umem->fq.map + off.fr.desc; - umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) + + umem->cq.map = mmap(0, off.cr.desc + CQ_NUM_DESCS * sizeof(u32), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, sfd, XDP_UMEM_PGOFF_COMPLETION_RING); - lassert(umem->cq.ring != MAP_FAILED); + lassert(umem->cq.map != MAP_FAILED); umem->cq.mask = CQ_NUM_DESCS - 1; umem->cq.size = CQ_NUM_DESCS; + umem->cq.producer = umem->cq.map + off.cr.producer; + umem->cq.consumer = umem->cq.map + off.cr.consumer; + umem->cq.ring = umem->cq.map + off.cr.desc; umem->frames = (char (*)[FRAME_SIZE])bufs; umem->fd = sfd; @@ -459,9 +467,11 @@ static struct xdp_umem *xdp_umem_configure(int sfd) static struct xdpsock *xsk_configure(struct xdp_umem *umem) { struct sockaddr_xdp sxdp = {}; + struct xdp_mmap_offsets off; int sfd, ndescs = NUM_DESCS; struct xdpsock *xsk; bool shared = true; + socklen_t optlen; u32 i; sfd = socket(PF_XDP, SOCK_RAW, 0); @@ -484,15 +494,18 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem) &ndescs, sizeof(int)) == 0); lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING, &ndescs, sizeof(int)) == 0); + optlen = sizeof(off); + lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off, + &optlen) == 0); /* Rx */ - xsk->rx.ring = mmap(NULL, - sizeof(struct xdp_ring) + - NUM_DESCS * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, sfd, - XDP_PGOFF_RX_RING); - lassert(xsk->rx.ring != MAP_FAILED); + xsk->rx.map = mmap(NULL, + off.rx.desc + + NUM_DESCS * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, sfd, + XDP_PGOFF_RX_RING); + lassert(xsk->rx.map != MAP_FAILED); if (!shared) { for (i = 0; i < NUM_DESCS / 2; i++) @@ -501,19 +514,25 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem) } /* Tx */ - xsk->tx.ring = mmap(NULL, - sizeof(struct xdp_ring) + - NUM_DESCS * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, sfd, - XDP_PGOFF_TX_RING); - lassert(xsk->tx.ring != MAP_FAILED); + xsk->tx.map = mmap(NULL, + off.tx.desc + + NUM_DESCS * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, sfd, + XDP_PGOFF_TX_RING); + lassert(xsk->tx.map != MAP_FAILED); xsk->rx.mask = NUM_DESCS - 1; xsk->rx.size = NUM_DESCS; + xsk->rx.producer = xsk->rx.map + off.rx.producer; + xsk->rx.consumer = xsk->rx.map + off.rx.consumer; + xsk->rx.ring = xsk->rx.map + off.rx.desc; xsk->tx.mask = NUM_DESCS - 1; xsk->tx.size = NUM_DESCS; + xsk->tx.producer = xsk->tx.map + off.tx.producer; + xsk->tx.consumer = xsk->tx.map + off.tx.consumer; + xsk->tx.ring = xsk->tx.map + off.tx.desc; sxdp.sxdp_family = PF_XDP; sxdp.sxdp_ifindex = opt_ifindex; |