From d912557b346099584bbbfa8d3c1e101c46e33b59 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 6 Jul 2015 16:20:07 +0200 Subject: samples: bpf: enable trace samples for s390x The trace bpf samples do not compile on s390x because they use x86 specific fields from the "pt_regs" structure. Fix this and access the fields via new PT_REGS macros. Signed-off-by: Michael Holzheu Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/bpf_helpers.h | 25 +++++++++++++++++++++++++ samples/bpf/tracex1_kern.c | 2 +- samples/bpf/tracex2_kern.c | 6 +++--- samples/bpf/tracex3_kern.c | 4 ++-- samples/bpf/tracex4_kern.c | 6 +++--- samples/bpf/tracex5_kern.c | 6 +++--- 6 files changed, 37 insertions(+), 12 deletions(-) (limited to 'samples') diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index bdf1c16..c77c872 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -60,4 +60,29 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +#if defined(__x86_64__) + +#define PT_REGS_PARM1(x) ((x)->di) +#define PT_REGS_PARM2(x) ((x)->si) +#define PT_REGS_PARM3(x) ((x)->dx) +#define PT_REGS_PARM4(x) ((x)->cx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->sp) +#define PT_REGS_FP(x) ((x)->bp) +#define PT_REGS_RC(x) ((x)->ax) +#define PT_REGS_SP(x) ((x)->sp) + +#elif defined(__s390x__) + +#define PT_REGS_PARM1(x) ((x)->gprs[2]) +#define PT_REGS_PARM2(x) ((x)->gprs[3]) +#define PT_REGS_PARM3(x) ((x)->gprs[4]) +#define PT_REGS_PARM4(x) ((x)->gprs[5]) +#define PT_REGS_PARM5(x) ((x)->gprs[6]) +#define PT_REGS_RET(x) ((x)->gprs[14]) +#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->gprs[2]) +#define PT_REGS_SP(x) ((x)->gprs[15]) + +#endif #endif diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c index 3162046..3f450a8 100644 --- a/samples/bpf/tracex1_kern.c +++ b/samples/bpf/tracex1_kern.c @@ -29,7 +29,7 @@ int bpf_prog1(struct pt_regs *ctx) int len; /* non-portable! works for the given kernel only */ - skb = (struct sk_buff *) ctx->di; + skb = (struct sk_buff *) PT_REGS_PARM1(ctx); dev = _(skb->dev); diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c index dc50f4f..b32367c 100644 --- a/samples/bpf/tracex2_kern.c +++ b/samples/bpf/tracex2_kern.c @@ -27,10 +27,10 @@ int bpf_prog2(struct pt_regs *ctx) long init_val = 1; long *value; - /* x64 specific: read ip of kfree_skb caller. + /* x64/s390x specific: read ip of kfree_skb caller. * non-portable version of __builtin_return_address(0) */ - bpf_probe_read(&loc, sizeof(loc), (void *)ctx->sp); + bpf_probe_read(&loc, sizeof(loc), (void *)PT_REGS_RET(ctx)); value = bpf_map_lookup_elem(&my_map, &loc); if (value) @@ -79,7 +79,7 @@ struct bpf_map_def SEC("maps") my_hist_map = { SEC("kprobe/sys_write") int bpf_prog3(struct pt_regs *ctx) { - long write_size = ctx->dx; /* arg3 */ + long write_size = PT_REGS_PARM3(ctx); long init_val = 1; long *value; struct hist_key key = {}; diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c index 255ff27..bf337fb 100644 --- a/samples/bpf/tracex3_kern.c +++ b/samples/bpf/tracex3_kern.c @@ -23,7 +23,7 @@ struct bpf_map_def SEC("maps") my_map = { SEC("kprobe/blk_mq_start_request") int bpf_prog1(struct pt_regs *ctx) { - long rq = ctx->di; + long rq = PT_REGS_PARM1(ctx); u64 val = bpf_ktime_get_ns(); bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY); @@ -51,7 +51,7 @@ struct bpf_map_def SEC("maps") lat_map = { SEC("kprobe/blk_update_request") int bpf_prog2(struct pt_regs *ctx) { - long rq = ctx->di; + long rq = PT_REGS_PARM1(ctx); u64 *value, l, base; u32 index; diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c index 126b805..ac46714 100644 --- a/samples/bpf/tracex4_kern.c +++ b/samples/bpf/tracex4_kern.c @@ -27,7 +27,7 @@ struct bpf_map_def SEC("maps") my_map = { SEC("kprobe/kmem_cache_free") int bpf_prog1(struct pt_regs *ctx) { - long ptr = ctx->si; + long ptr = PT_REGS_PARM2(ctx); bpf_map_delete_elem(&my_map, &ptr); return 0; @@ -36,11 +36,11 @@ int bpf_prog1(struct pt_regs *ctx) SEC("kretprobe/kmem_cache_alloc_node") int bpf_prog2(struct pt_regs *ctx) { - long ptr = ctx->ax; + long ptr = PT_REGS_RC(ctx); long ip = 0; /* get ip address of kmem_cache_alloc_node() caller */ - bpf_probe_read(&ip, sizeof(ip), (void *)(ctx->bp + sizeof(ip))); + bpf_probe_read(&ip, sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); struct pair v = { .val = bpf_ktime_get_ns(), diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c index b71fe07..b3f4295 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5_kern.c @@ -24,7 +24,7 @@ int bpf_prog1(struct pt_regs *ctx) { struct seccomp_data sd = {}; - bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); /* dispatch into next BPF program depending on syscall number */ bpf_tail_call(ctx, &progs, sd.nr); @@ -42,7 +42,7 @@ PROG(__NR_write)(struct pt_regs *ctx) { struct seccomp_data sd = {}; - bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); if (sd.args[2] == 512) { char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), @@ -55,7 +55,7 @@ PROG(__NR_read)(struct pt_regs *ctx) { struct seccomp_data sd = {}; - bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); if (sd.args[2] > 128 && sd.args[2] <= 1024) { char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), -- cgit v1.1 From 24b4d2abd0bd628f396dada3e915d395cbf459eb Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Thu, 23 Jul 2015 14:24:40 -0700 Subject: ebpf: Allow dereferences of PTR_TO_STACK registers mov %rsp, %r1 ; r1 = rsp add $-8, %r1 ; r1 = rsp - 8 store_q $123, -8(%rsp) ; *(u64*)r1 = 123 <- valid store_q $123, (%r1) ; *(u64*)r1 = 123 <- previously invalid mov $0, %r0 exit ; Always need to exit And we'd get the following error: 0: (bf) r1 = r10 1: (07) r1 += -8 2: (7a) *(u64 *)(r10 -8) = 999 3: (7a) *(u64 *)(r1 +0) = 999 R1 invalid mem access 'fp' Unable to load program We already know that a register is a stack address and the appropriate offset, so we should be able to validate those references as well. Signed-off-by: Alex Gartrell Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/test_verifier.c | 59 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'samples') diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index 6936059..ee0f110 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -822,6 +822,65 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "PTR_TO_STACK store/load", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "PTR_TO_STACK store/load - bad alignment on off", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "misaligned access off -6 size 8", + }, + { + "PTR_TO_STACK store/load - bad alignment on reg", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "misaligned access off -2 size 8", + }, + { + "PTR_TO_STACK store/load - out of bounds low", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off=-79992 size=8", + }, + { + "PTR_TO_STACK store/load - out of bounds high", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off=0 size=8", + }, }; static int probe_filter_length(struct bpf_insn *fp) -- cgit v1.1 From 47efb30274cbec1bd3c0c980a7ece328df2c16a8 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 6 Aug 2015 07:02:36 +0000 Subject: samples/bpf: example of get selected PMU counter value This is a simple example and shows how to use the new ability to get the selected Hardware PMU counter value. Signed-off-by: Kaixu Xia Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 +++ samples/bpf/bpf_helpers.h | 2 ++ samples/bpf/tracex6_kern.c | 26 ++++++++++++++++++ samples/bpf/tracex6_user.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+) create mode 100644 samples/bpf/tracex6_kern.c create mode 100644 samples/bpf/tracex6_user.c (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4450fed..63e7d50 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -12,6 +12,7 @@ hostprogs-y += tracex2 hostprogs-y += tracex3 hostprogs-y += tracex4 hostprogs-y += tracex5 +hostprogs-y += tracex6 hostprogs-y += lathist test_verifier-objs := test_verifier.o libbpf.o @@ -25,6 +26,7 @@ tracex2-objs := bpf_load.o libbpf.o tracex2_user.o tracex3-objs := bpf_load.o libbpf.o tracex3_user.o tracex4-objs := bpf_load.o libbpf.o tracex4_user.o tracex5-objs := bpf_load.o libbpf.o tracex5_user.o +tracex6-objs := bpf_load.o libbpf.o tracex6_user.o lathist-objs := bpf_load.o libbpf.o lathist_user.o # Tell kbuild to always build the programs @@ -37,6 +39,7 @@ always += tracex2_kern.o always += tracex3_kern.o always += tracex4_kern.o always += tracex5_kern.o +always += tracex6_kern.o always += tcbpf1_kern.o always += lathist_kern.o @@ -51,6 +54,7 @@ HOSTLOADLIBES_tracex2 += -lelf HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf +HOSTLOADLIBES_tracex6 += -lelf HOSTLOADLIBES_lathist += -lelf # point this to your LLVM backend with bpf support diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index c77c872..3a44d3a 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -31,6 +31,8 @@ static unsigned long long (*bpf_get_current_uid_gid)(void) = (void *) BPF_FUNC_get_current_uid_gid; static int (*bpf_get_current_comm)(void *buf, int buf_size) = (void *) BPF_FUNC_get_current_comm; +static int (*bpf_perf_event_read)(void *map, int index) = + (void *) BPF_FUNC_perf_event_read; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c new file mode 100644 index 0000000..23d1cff --- /dev/null +++ b/samples/bpf/tracex6_kern.c @@ -0,0 +1,26 @@ +#include +#include +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = 32, +}; + +SEC("kprobe/sys_write") +int bpf_prog1(struct pt_regs *ctx) +{ + u64 count; + u32 key = bpf_get_smp_processor_id(); + char fmt[] = "CPU-%d %llu\n"; + + count = bpf_perf_event_read(&my_map, key); + bpf_trace_printk(fmt, sizeof(fmt), key, count); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c new file mode 100644 index 0000000..928f05e --- /dev/null +++ b/samples/bpf/tracex6_user.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" + +#define SAMPLE_PERIOD 0x7fffffffffffffffULL + +static void test_bpf_perf_event(void) +{ + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + int *pmu_fd = malloc(nr_cpus * sizeof(int)); + unsigned long value; + int i; + + struct perf_event_attr attr_insn_pmu = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_HARDWARE, + .read_format = 0, + .sample_type = 0, + .config = 0,/* PMU: cycles */ + }; + + for (i = 0; i < nr_cpus; i++) { + pmu_fd[i] = perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0); + if (pmu_fd[i] < 0) + printf("event syscall failed\n"); + + bpf_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY); + ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); + } + + system("ls"); + system("pwd"); + system("sleep 2"); + + for (i = 0; i < nr_cpus; i++) + close(pmu_fd[i]); + + close(map_fd); + + free(pmu_fd); +} + +int main(int argc, char **argv) +{ + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + test_bpf_perf_event(); + + return 0; +} -- cgit v1.1 From 5ed3ccbd5ac11414d61c16182718e68868becc16 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Wed, 12 Aug 2015 09:37:53 +0000 Subject: bpf: fix build warnings and add function read_trace_pipe() There are two improvements in this patch: 1. Fix the build warnings; 2. Add function read_trace_pipe() to print the result on the screen; Before this patch, we can get the result through /sys/kernel/de bug/tracing/trace_pipe and get nothing on the screen. By applying this patch, the result can be printed on the screen. $ ./tracex6 ... tracex6-705 [003] d..1 131.428593: : CPU-3 19981414 sshd-683 [000] d..1 131.428727: : CPU-0 221682321 sshd-683 [000] d..1 131.428821: : CPU-0 221808766 sshd-683 [000] d..1 131.428950: : CPU-0 221982984 sshd-683 [000] d..1 131.429045: : CPU-0 222111851 tracex6-705 [003] d..1 131.429168: : CPU-3 20757551 sshd-683 [000] d..1 131.429170: : CPU-0 222281240 sshd-683 [000] d..1 131.429261: : CPU-0 222403340 sshd-683 [000] d..1 131.429378: : CPU-0 222561024 ... Signed-off-by: Kaixu Xia Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/tracex6_kern.c | 1 + samples/bpf/tracex6_user.c | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'samples') diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c index 23d1cff..be479c4 100644 --- a/samples/bpf/tracex6_kern.c +++ b/samples/bpf/tracex6_kern.c @@ -1,3 +1,4 @@ +#include #include #include #include "bpf_helpers.h" diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index 928f05e..8ea4976 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -17,8 +17,7 @@ static void test_bpf_perf_event(void) { int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); int *pmu_fd = malloc(nr_cpus * sizeof(int)); - unsigned long value; - int i; + int status, i; struct perf_event_attr attr_insn_pmu = { .freq = 0, @@ -32,22 +31,26 @@ static void test_bpf_perf_event(void) for (i = 0; i < nr_cpus; i++) { pmu_fd[i] = perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0); - if (pmu_fd[i] < 0) + if (pmu_fd[i] < 0) { printf("event syscall failed\n"); + goto exit; + } bpf_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY); ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); } - system("ls"); - system("pwd"); - system("sleep 2"); + status = system("ls > /dev/null"); + if (status) + goto exit; + status = system("sleep 2"); + if (status) + goto exit; +exit: for (i = 0; i < nr_cpus; i++) close(pmu_fd[i]); - - close(map_fd); - + close(map_fd[0]); free(pmu_fd); } @@ -63,6 +66,7 @@ int main(int argc, char **argv) } test_bpf_perf_event(); + read_trace_pipe(); return 0; } -- cgit v1.1