summaryrefslogtreecommitdiffstats
path: root/samples/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'samples/bpf')
-rw-r--r--samples/bpf/Makefile12
-rw-r--r--samples/bpf/bpf_helpers.h3
-rw-r--r--samples/bpf/bpf_load.c70
-rw-r--r--samples/bpf/bpf_load.h6
-rw-r--r--samples/bpf/fds_example.c2
-rw-r--r--samples/bpf/libbpf.c5
-rw-r--r--samples/bpf/libbpf.h2
-rw-r--r--samples/bpf/map_perf_test_kern.c100
-rw-r--r--samples/bpf/map_perf_test_user.c155
-rw-r--r--samples/bpf/offwaketime_kern.c131
-rw-r--r--samples/bpf/offwaketime_user.c120
-rw-r--r--samples/bpf/sock_example.c2
-rw-r--r--samples/bpf/spintest_kern.c68
-rw-r--r--samples/bpf/spintest_user.c50
-rw-r--r--samples/bpf/test_maps.c211
-rw-r--r--samples/bpf/test_verifier.c4
-rw-r--r--samples/bpf/tracex2_kern.c2
-rw-r--r--samples/bpf/tracex2_user.c7
-rw-r--r--samples/bpf/tracex3_kern.c8
-rw-r--r--samples/bpf/tracex3_user.c21
20 files changed, 952 insertions, 27 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index edd638b..502c9fc 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -16,6 +16,9 @@ hostprogs-y += tracex5
hostprogs-y += tracex6
hostprogs-y += trace_output
hostprogs-y += lathist
+hostprogs-y += offwaketime
+hostprogs-y += spintest
+hostprogs-y += map_perf_test
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
@@ -32,6 +35,9 @@ tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
lathist-objs := bpf_load.o libbpf.o lathist_user.o
+offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o
+spintest-objs := bpf_load.o libbpf.o spintest_user.o
+map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -47,6 +53,9 @@ always += tracex6_kern.o
always += trace_output_kern.o
always += tcbpf1_kern.o
always += lathist_kern.o
+always += offwaketime_kern.o
+always += spintest_kern.o
+always += map_perf_test_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
@@ -63,6 +72,9 @@ HOSTLOADLIBES_tracex5 += -lelf
HOSTLOADLIBES_tracex6 += -lelf
HOSTLOADLIBES_trace_output += -lelf -lrt
HOSTLOADLIBES_lathist += -lelf
+HOSTLOADLIBES_offwaketime += -lelf
+HOSTLOADLIBES_spintest += -lelf
+HOSTLOADLIBES_map_perf_test += -lelf -lrt
# point this to your LLVM backend with bpf support
LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 7ad19e1..9363500 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -39,6 +39,8 @@ static int (*bpf_redirect)(int ifindex, int flags) =
(void *) BPF_FUNC_redirect;
static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
(void *) BPF_FUNC_perf_event_output;
+static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
+ (void *) BPF_FUNC_get_stackid;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -59,6 +61,7 @@ struct bpf_map_def {
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
+ unsigned int map_flags;
};
static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index da86a8e..58f86bd 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -157,9 +157,13 @@ static int load_maps(struct bpf_map_def *maps, int len)
map_fd[i] = bpf_create_map(maps[i].type,
maps[i].key_size,
maps[i].value_size,
- maps[i].max_entries);
- if (map_fd[i] < 0)
+ maps[i].max_entries,
+ maps[i].map_flags);
+ if (map_fd[i] < 0) {
+ printf("failed to create a map: %d %s\n",
+ errno, strerror(errno));
return 1;
+ }
if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
prog_array_fd = map_fd[i];
@@ -343,3 +347,65 @@ void read_trace_pipe(void)
}
}
}
+
+#define MAX_SYMS 300000
+static struct ksym syms[MAX_SYMS];
+static int sym_cnt;
+
+static int ksym_cmp(const void *p1, const void *p2)
+{
+ return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
+}
+
+int load_kallsyms(void)
+{
+ FILE *f = fopen("/proc/kallsyms", "r");
+ char func[256], buf[256];
+ char symbol;
+ void *addr;
+ int i = 0;
+
+ if (!f)
+ return -ENOENT;
+
+ while (!feof(f)) {
+ if (!fgets(buf, sizeof(buf), f))
+ break;
+ if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
+ break;
+ if (!addr)
+ continue;
+ syms[i].addr = (long) addr;
+ syms[i].name = strdup(func);
+ i++;
+ }
+ sym_cnt = i;
+ qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
+ return 0;
+}
+
+struct ksym *ksym_search(long key)
+{
+ int start = 0, end = sym_cnt;
+ int result;
+
+ while (start < end) {
+ size_t mid = start + (end - start) / 2;
+
+ result = key - syms[mid].addr;
+ if (result < 0)
+ end = mid;
+ else if (result > 0)
+ start = mid + 1;
+ else
+ return &syms[mid];
+ }
+
+ if (start >= 1 && syms[start - 1].addr < key &&
+ key < syms[start].addr)
+ /* valid ksym */
+ return &syms[start - 1];
+
+ /* out of range. return _stext */
+ return &syms[0];
+}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index cbd7c2b..dfa57fe 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -23,5 +23,11 @@ extern int event_fd[MAX_PROGS];
int load_bpf_file(char *path);
void read_trace_pipe(void);
+struct ksym {
+ long addr;
+ char *name;
+};
+int load_kallsyms(void);
+struct ksym *ksym_search(long key);
#endif
diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c
index e2fd16c..625e797 100644
--- a/samples/bpf/fds_example.c
+++ b/samples/bpf/fds_example.c
@@ -44,7 +44,7 @@ static void usage(void)
static int bpf_map_create(void)
{
return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
- sizeof(uint32_t), 1024);
+ sizeof(uint32_t), 1024, 0);
}
static int bpf_prog_create(const char *object)
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
index 65a8d48..9969e35 100644
--- a/samples/bpf/libbpf.c
+++ b/samples/bpf/libbpf.c
@@ -19,13 +19,14 @@ static __u64 ptr_to_u64(void *ptr)
}
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
- int max_entries)
+ int max_entries, int map_flags)
{
union bpf_attr attr = {
.map_type = map_type,
.key_size = key_size,
.value_size = value_size,
- .max_entries = max_entries
+ .max_entries = max_entries,
+ .map_flags = map_flags,
};
return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index 014aacf..364582b 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -5,7 +5,7 @@
struct bpf_insn;
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
- int max_entries);
+ int max_entries, int map_flags);
int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags);
int bpf_lookup_elem(int fd, void *key, void *value);
int bpf_delete_elem(int fd, void *key);
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
new file mode 100644
index 0000000..311538e
--- /dev/null
+++ b/samples/bpf/map_perf_test_kern.c
@@ -0,0 +1,100 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define MAX_ENTRIES 1000
+
+struct bpf_map_def SEC("maps") hash_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = MAX_ENTRIES,
+};
+
+struct bpf_map_def SEC("maps") percpu_hash_map = {
+ .type = BPF_MAP_TYPE_PERCPU_HASH,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = MAX_ENTRIES,
+};
+
+struct bpf_map_def SEC("maps") hash_map_alloc = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = MAX_ENTRIES,
+ .map_flags = BPF_F_NO_PREALLOC,
+};
+
+struct bpf_map_def SEC("maps") percpu_hash_map_alloc = {
+ .type = BPF_MAP_TYPE_PERCPU_HASH,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = MAX_ENTRIES,
+ .map_flags = BPF_F_NO_PREALLOC,
+};
+
+SEC("kprobe/sys_getuid")
+int stress_hmap(struct pt_regs *ctx)
+{
+ u32 key = bpf_get_current_pid_tgid();
+ long init_val = 1;
+ long *value;
+
+ bpf_map_update_elem(&hash_map, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&hash_map, &key);
+ if (value)
+ bpf_map_delete_elem(&hash_map, &key);
+ return 0;
+}
+
+SEC("kprobe/sys_geteuid")
+int stress_percpu_hmap(struct pt_regs *ctx)
+{
+ u32 key = bpf_get_current_pid_tgid();
+ long init_val = 1;
+ long *value;
+
+ bpf_map_update_elem(&percpu_hash_map, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&percpu_hash_map, &key);
+ if (value)
+ bpf_map_delete_elem(&percpu_hash_map, &key);
+ return 0;
+}
+SEC("kprobe/sys_getgid")
+int stress_hmap_alloc(struct pt_regs *ctx)
+{
+ u32 key = bpf_get_current_pid_tgid();
+ long init_val = 1;
+ long *value;
+
+ bpf_map_update_elem(&hash_map_alloc, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&hash_map_alloc, &key);
+ if (value)
+ bpf_map_delete_elem(&hash_map_alloc, &key);
+ return 0;
+}
+
+SEC("kprobe/sys_getegid")
+int stress_percpu_hmap_alloc(struct pt_regs *ctx)
+{
+ u32 key = bpf_get_current_pid_tgid();
+ long init_val = 1;
+ long *value;
+
+ bpf_map_update_elem(&percpu_hash_map_alloc, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&percpu_hash_map_alloc, &key);
+ if (value)
+ bpf_map_delete_elem(&percpu_hash_map_alloc, &key);
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
new file mode 100644
index 0000000..95af56e
--- /dev/null
+++ b/samples/bpf/map_perf_test_user.c
@@ -0,0 +1,155 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <asm/unistd.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <linux/bpf.h>
+#include <string.h>
+#include <time.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_CNT 1000000
+
+static __u64 time_get_ns(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ull + ts.tv_nsec;
+}
+
+#define HASH_PREALLOC (1 << 0)
+#define PERCPU_HASH_PREALLOC (1 << 1)
+#define HASH_KMALLOC (1 << 2)
+#define PERCPU_HASH_KMALLOC (1 << 3)
+
+static int test_flags = ~0;
+
+static void test_hash_prealloc(int cpu)
+{
+ __u64 start_time;
+ int i;
+
+ start_time = time_get_ns();
+ for (i = 0; i < MAX_CNT; i++)
+ syscall(__NR_getuid);
+ printf("%d:hash_map_perf pre-alloc %lld events per sec\n",
+ cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
+static void test_percpu_hash_prealloc(int cpu)
+{
+ __u64 start_time;
+ int i;
+
+ start_time = time_get_ns();
+ for (i = 0; i < MAX_CNT; i++)
+ syscall(__NR_geteuid);
+ printf("%d:percpu_hash_map_perf pre-alloc %lld events per sec\n",
+ cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
+static void test_hash_kmalloc(int cpu)
+{
+ __u64 start_time;
+ int i;
+
+ start_time = time_get_ns();
+ for (i = 0; i < MAX_CNT; i++)
+ syscall(__NR_getgid);
+ printf("%d:hash_map_perf kmalloc %lld events per sec\n",
+ cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
+static void test_percpu_hash_kmalloc(int cpu)
+{
+ __u64 start_time;
+ int i;
+
+ start_time = time_get_ns();
+ for (i = 0; i < MAX_CNT; i++)
+ syscall(__NR_getegid);
+ printf("%d:percpu_hash_map_perf kmalloc %lld events per sec\n",
+ cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
+static void loop(int cpu)
+{
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ sched_setaffinity(0, sizeof(cpuset), &cpuset);
+
+ if (test_flags & HASH_PREALLOC)
+ test_hash_prealloc(cpu);
+
+ if (test_flags & PERCPU_HASH_PREALLOC)
+ test_percpu_hash_prealloc(cpu);
+
+ if (test_flags & HASH_KMALLOC)
+ test_hash_kmalloc(cpu);
+
+ if (test_flags & PERCPU_HASH_KMALLOC)
+ test_percpu_hash_kmalloc(cpu);
+}
+
+static void run_perf_test(int tasks)
+{
+ pid_t pid[tasks];
+ int i;
+
+ for (i = 0; i < tasks; i++) {
+ pid[i] = fork();
+ if (pid[i] == 0) {
+ loop(i);
+ exit(0);
+ } else if (pid[i] == -1) {
+ printf("couldn't spawn #%d process\n", i);
+ exit(1);
+ }
+ }
+ for (i = 0; i < tasks; i++) {
+ int status;
+
+ assert(waitpid(pid[i], &status, 0) == pid[i]);
+ assert(status == 0);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ char filename[256];
+ int num_cpu = 8;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ setrlimit(RLIMIT_MEMLOCK, &r);
+
+ if (argc > 1)
+ test_flags = atoi(argv[1]) ? : test_flags;
+
+ if (argc > 2)
+ num_cpu = atoi(argv[2]) ? : num_cpu;
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ run_perf_test(num_cpu);
+
+ return 0;
+}
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
new file mode 100644
index 0000000..c0aa5a9
--- /dev/null
+++ b/samples/bpf/offwaketime_kern.c
@@ -0,0 +1,131 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/ptrace.h>
+#include <uapi/linux/perf_event.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+
+#define MINBLOCK_US 1
+
+struct key_t {
+ char waker[TASK_COMM_LEN];
+ char target[TASK_COMM_LEN];
+ u32 wret;
+ u32 tret;
+};
+
+struct bpf_map_def SEC("maps") counts = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct key_t),
+ .value_size = sizeof(u64),
+ .max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") start = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = 10000,
+};
+
+struct wokeby_t {
+ char name[TASK_COMM_LEN];
+ u32 ret;
+};
+
+struct bpf_map_def SEC("maps") wokeby = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct wokeby_t),
+ .max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(u32),
+ .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
+ .max_entries = 10000,
+};
+
+#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
+
+SEC("kprobe/try_to_wake_up")
+int waker(struct pt_regs *ctx)
+{
+ struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
+ struct wokeby_t woke = {};
+ u32 pid;
+
+ pid = _(p->pid);
+
+ bpf_get_current_comm(&woke.name, sizeof(woke.name));
+ woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
+
+ bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
+ return 0;
+}
+
+static inline int update_counts(struct pt_regs *ctx, u32 pid, u64 delta)
+{
+ struct key_t key = {};
+ struct wokeby_t *woke;
+ u64 zero = 0, *val;
+
+ bpf_get_current_comm(&key.target, sizeof(key.target));
+ key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
+
+ woke = bpf_map_lookup_elem(&wokeby, &pid);
+ if (woke) {
+ key.wret = woke->ret;
+ __builtin_memcpy(&key.waker, woke->name, TASK_COMM_LEN);
+ bpf_map_delete_elem(&wokeby, &pid);
+ }
+
+ val = bpf_map_lookup_elem(&counts, &key);
+ if (!val) {
+ bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
+ val = bpf_map_lookup_elem(&counts, &key);
+ if (!val)
+ return 0;
+ }
+ (*val) += delta;
+ return 0;
+}
+
+SEC("kprobe/finish_task_switch")
+int oncpu(struct pt_regs *ctx)
+{
+ struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
+ u64 delta, ts, *tsp;
+ u32 pid;
+
+ /* record previous thread sleep time */
+ pid = _(p->pid);
+ ts = bpf_ktime_get_ns();
+ bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
+
+ /* calculate current thread's delta time */
+ pid = bpf_get_current_pid_tgid();
+ tsp = bpf_map_lookup_elem(&start, &pid);
+ if (!tsp)
+ /* missed start or filtered */
+ return 0;
+
+ delta = bpf_ktime_get_ns() - *tsp;
+ bpf_map_delete_elem(&start, &pid);
+ delta = delta / 1000;
+ if (delta < MINBLOCK_US)
+ return 0;
+
+ return update_counts(ctx, pid, delta);
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
new file mode 100644
index 0000000..6f002a9
--- /dev/null
+++ b/samples/bpf/offwaketime_user.c
@@ -0,0 +1,120 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <linux/bpf.h>
+#include <string.h>
+#include <linux/perf_event.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <sys/resource.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define PRINT_RAW_ADDR 0
+
+static void print_ksym(__u64 addr)
+{
+ struct ksym *sym;
+
+ if (!addr)
+ return;
+ sym = ksym_search(addr);
+ if (PRINT_RAW_ADDR)
+ printf("%s/%llx;", sym->name, addr);
+ else
+ printf("%s;", sym->name);
+}
+
+#define TASK_COMM_LEN 16
+
+struct key_t {
+ char waker[TASK_COMM_LEN];
+ char target[TASK_COMM_LEN];
+ __u32 wret;
+ __u32 tret;
+};
+
+static void print_stack(struct key_t *key, __u64 count)
+{
+ __u64 ip[PERF_MAX_STACK_DEPTH] = {};
+ static bool warned;
+ int i;
+
+ printf("%s;", key->target);
+ if (bpf_lookup_elem(map_fd[3], &key->tret, ip) != 0) {
+ printf("---;");
+ } else {
+ for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
+ print_ksym(ip[i]);
+ }
+ printf("-;");
+ if (bpf_lookup_elem(map_fd[3], &key->wret, ip) != 0) {
+ printf("---;");
+ } else {
+ for (i = 0; i < PERF_MAX_STACK_DEPTH; i++)
+ print_ksym(ip[i]);
+ }
+ printf(";%s %lld\n", key->waker, count);
+
+ if ((key->tret == -EEXIST || key->wret == -EEXIST) && !warned) {
+ printf("stackmap collisions seen. Consider increasing size\n");
+ warned = true;
+ } else if (((int)(key->tret) < 0 || (int)(key->wret) < 0)) {
+ printf("err stackid %d %d\n", key->tret, key->wret);
+ }
+}
+
+static void print_stacks(int fd)
+{
+ struct key_t key = {}, next_key;
+ __u64 value;
+
+ while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+ bpf_lookup_elem(fd, &next_key, &value);
+ print_stack(&next_key, value);
+ key = next_key;
+ }
+}
+
+static void int_exit(int sig)
+{
+ print_stacks(map_fd[0]);
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ char filename[256];
+ int delay = 1;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ setrlimit(RLIMIT_MEMLOCK, &r);
+
+ signal(SIGINT, int_exit);
+
+ if (load_kallsyms()) {
+ printf("failed to process /proc/kallsyms\n");
+ return 2;
+ }
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ if (argc > 1)
+ delay = atoi(argv[1]);
+ sleep(delay);
+ print_stacks(map_fd[0]);
+
+ return 0;
+}
diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
index a0ce251..28b60ba 100644
--- a/samples/bpf/sock_example.c
+++ b/samples/bpf/sock_example.c
@@ -34,7 +34,7 @@ static int test_sock(void)
long long value = 0, tcp_cnt, udp_cnt, icmp_cnt;
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
- 256);
+ 256, 0);
if (map_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
goto cleanup;
diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest_kern.c
new file mode 100644
index 0000000..4b27619
--- /dev/null
+++ b/samples/bpf/spintest_kern.c
@@ -0,0 +1,68 @@
+/* Copyright (c) 2016, Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/perf_event.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(long),
+ .value_size = sizeof(long),
+ .max_entries = 1024,
+};
+struct bpf_map_def SEC("maps") my_map2 = {
+ .type = BPF_MAP_TYPE_PERCPU_HASH,
+ .key_size = sizeof(long),
+ .value_size = sizeof(long),
+ .max_entries = 1024,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(u32),
+ .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
+ .max_entries = 10000,
+};
+
+#define PROG(foo) \
+int foo(struct pt_regs *ctx) \
+{ \
+ long v = ctx->ip, *val; \
+\
+ val = bpf_map_lookup_elem(&my_map, &v); \
+ bpf_map_update_elem(&my_map, &v, &v, BPF_ANY); \
+ bpf_map_update_elem(&my_map2, &v, &v, BPF_ANY); \
+ bpf_map_delete_elem(&my_map2, &v); \
+ bpf_get_stackid(ctx, &stackmap, BPF_F_REUSE_STACKID); \
+ return 0; \
+}
+
+/* add kprobes to all possible *spin* functions */
+SEC("kprobe/spin_unlock")PROG(p1)
+SEC("kprobe/spin_lock")PROG(p2)
+SEC("kprobe/mutex_spin_on_owner")PROG(p3)
+SEC("kprobe/rwsem_spin_on_owner")PROG(p4)
+SEC("kprobe/spin_unlock_irqrestore")PROG(p5)
+SEC("kprobe/_raw_spin_unlock_irqrestore")PROG(p6)
+SEC("kprobe/_raw_spin_unlock_bh")PROG(p7)
+SEC("kprobe/_raw_spin_unlock")PROG(p8)
+SEC("kprobe/_raw_spin_lock_irqsave")PROG(p9)
+SEC("kprobe/_raw_spin_trylock_bh")PROG(p10)
+SEC("kprobe/_raw_spin_lock_irq")PROG(p11)
+SEC("kprobe/_raw_spin_trylock")PROG(p12)
+SEC("kprobe/_raw_spin_lock")PROG(p13)
+SEC("kprobe/_raw_spin_lock_bh")PROG(p14)
+/* and to inner bpf helpers */
+SEC("kprobe/htab_map_update_elem")PROG(p15)
+SEC("kprobe/__htab_percpu_map_update_elem")PROG(p16)
+SEC("kprobe/htab_map_alloc")PROG(p17)
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
new file mode 100644
index 0000000..311ede5
--- /dev/null
+++ b/samples/bpf/spintest_user.c
@@ -0,0 +1,50 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/resource.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+int main(int ac, char **argv)
+{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ long key, next_key, value;
+ char filename[256];
+ struct ksym *sym;
+ int i;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ setrlimit(RLIMIT_MEMLOCK, &r);
+
+ if (load_kallsyms()) {
+ printf("failed to process /proc/kallsyms\n");
+ return 2;
+ }
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ for (i = 0; i < 5; i++) {
+ key = 0;
+ printf("kprobing funcs:");
+ while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[0], &next_key, &value);
+ assert(next_key == value);
+ sym = ksym_search(value);
+ printf(" %s", sym->name);
+ key = next_key;
+ }
+ if (key)
+ printf("\n");
+ key = 0;
+ while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0)
+ bpf_delete_elem(map_fd[0], &next_key);
+ sleep(1);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c
index 6299ee9..47bf085 100644
--- a/samples/bpf/test_maps.c
+++ b/samples/bpf/test_maps.c
@@ -2,6 +2,7 @@
* Testsuite for eBPF maps
*
* Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -17,13 +18,16 @@
#include <stdlib.h>
#include "libbpf.h"
+static int map_flags;
+
/* sanity tests for map API */
static void test_hashmap_sanity(int i, void *data)
{
long long key, next_key, value;
int map_fd;
- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 2);
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ 2, map_flags);
if (map_fd < 0) {
printf("failed to create hashmap '%s'\n", strerror(errno));
exit(1);
@@ -89,12 +93,107 @@ static void test_hashmap_sanity(int i, void *data)
close(map_fd);
}
+/* sanity tests for percpu map API */
+static void test_percpu_hashmap_sanity(int task, void *data)
+{
+ long long key, next_key;
+ int expected_key_mask = 0;
+ unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ long long value[nr_cpus];
+ int map_fd, i;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
+ sizeof(value[0]), 2, map_flags);
+ if (map_fd < 0) {
+ printf("failed to create hashmap '%s'\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < nr_cpus; i++)
+ value[i] = i + 100;
+ key = 1;
+ /* insert key=1 element */
+ assert(!(expected_key_mask & key));
+ assert(bpf_update_elem(map_fd, &key, value, BPF_ANY) == 0);
+ expected_key_mask |= key;
+
+ /* BPF_NOEXIST means: add new element if it doesn't exist */
+ assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == -1 &&
+ /* key=1 already exists */
+ errno == EEXIST);
+
+ /* -1 is an invalid flag */
+ assert(bpf_update_elem(map_fd, &key, value, -1) == -1 &&
+ errno == EINVAL);
+
+ /* check that key=1 can be found. value could be 0 if the lookup
+ * was run from a different cpu.
+ */
+ value[0] = 1;
+ assert(bpf_lookup_elem(map_fd, &key, value) == 0 && value[0] == 100);
+
+ key = 2;
+ /* check that key=2 is not found */
+ assert(bpf_lookup_elem(map_fd, &key, value) == -1 && errno == ENOENT);
+
+ /* BPF_EXIST means: update existing element */
+ assert(bpf_update_elem(map_fd, &key, value, BPF_EXIST) == -1 &&
+ /* key=2 is not there */
+ errno == ENOENT);
+
+ /* insert key=2 element */
+ assert(!(expected_key_mask & key));
+ assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
+ expected_key_mask |= key;
+
+ /* key=1 and key=2 were inserted, check that key=0 cannot be inserted
+ * due to max_entries limit
+ */
+ key = 0;
+ assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* check that key = 0 doesn't exist */
+ assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
+
+ /* iterate over two elements */
+ while (!bpf_get_next_key(map_fd, &key, &next_key)) {
+ assert((expected_key_mask & next_key) == next_key);
+ expected_key_mask &= ~next_key;
+
+ assert(bpf_lookup_elem(map_fd, &next_key, value) == 0);
+ for (i = 0; i < nr_cpus; i++)
+ assert(value[i] == i + 100);
+
+ key = next_key;
+ }
+ assert(errno == ENOENT);
+
+ /* Update with BPF_EXIST */
+ key = 1;
+ assert(bpf_update_elem(map_fd, &key, value, BPF_EXIST) == 0);
+
+ /* delete both elements */
+ key = 1;
+ assert(bpf_delete_elem(map_fd, &key) == 0);
+ key = 2;
+ assert(bpf_delete_elem(map_fd, &key) == 0);
+ assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
+
+ key = 0;
+ /* check that map is empty */
+ assert(bpf_get_next_key(map_fd, &key, &next_key) == -1 &&
+ errno == ENOENT);
+ close(map_fd);
+}
+
static void test_arraymap_sanity(int i, void *data)
{
int key, next_key, map_fd;
long long value;
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), 2);
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
+ 2, 0);
if (map_fd < 0) {
printf("failed to create arraymap '%s'\n", strerror(errno));
exit(1);
@@ -142,6 +241,94 @@ static void test_arraymap_sanity(int i, void *data)
close(map_fd);
}
+static void test_percpu_arraymap_many_keys(void)
+{
+ unsigned nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ unsigned nr_keys = 20000;
+ long values[nr_cpus];
+ int key, map_fd, i;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
+ sizeof(values[0]), nr_keys, 0);
+ if (map_fd < 0) {
+ printf("failed to create per-cpu arraymap '%s'\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < nr_cpus; i++)
+ values[i] = i + 10;
+
+ for (key = 0; key < nr_keys; key++)
+ assert(bpf_update_elem(map_fd, &key, values, BPF_ANY) == 0);
+
+ for (key = 0; key < nr_keys; key++) {
+ for (i = 0; i < nr_cpus; i++)
+ values[i] = 0;
+ assert(bpf_lookup_elem(map_fd, &key, values) == 0);
+ for (i = 0; i < nr_cpus; i++)
+ assert(values[i] == i + 10);
+ }
+
+ close(map_fd);
+}
+
+static void test_percpu_arraymap_sanity(int i, void *data)
+{
+ unsigned nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ long values[nr_cpus];
+ int key, next_key, map_fd;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
+ sizeof(values[0]), 2, 0);
+ if (map_fd < 0) {
+ printf("failed to create arraymap '%s'\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < nr_cpus; i++)
+ values[i] = i + 100;
+
+ key = 1;
+ /* insert key=1 element */
+ assert(bpf_update_elem(map_fd, &key, values, BPF_ANY) == 0);
+
+ values[0] = 0;
+ assert(bpf_update_elem(map_fd, &key, values, BPF_NOEXIST) == -1 &&
+ errno == EEXIST);
+
+ /* check that key=1 can be found */
+ assert(bpf_lookup_elem(map_fd, &key, values) == 0 && values[0] == 100);
+
+ key = 0;
+ /* check that key=0 is also found and zero initialized */
+ assert(bpf_lookup_elem(map_fd, &key, values) == 0 &&
+ values[0] == 0 && values[nr_cpus - 1] == 0);
+
+
+ /* check that key=2 cannot be inserted due to max_entries limit */
+ key = 2;
+ assert(bpf_update_elem(map_fd, &key, values, BPF_EXIST) == -1 &&
+ errno == E2BIG);
+
+ /* check that key = 2 doesn't exist */
+ assert(bpf_lookup_elem(map_fd, &key, values) == -1 && errno == ENOENT);
+
+ /* iterate over two elements */
+ assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 &&
+ next_key == 0);
+ assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 &&
+ next_key == 1);
+ assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ /* delete shouldn't succeed */
+ key = 1;
+ assert(bpf_delete_elem(map_fd, &key) == -1 && errno == EINVAL);
+
+ close(map_fd);
+}
+
#define MAP_SIZE (32 * 1024)
static void test_map_large(void)
{
@@ -154,7 +341,7 @@ static void test_map_large(void)
/* allocate 4Mbyte of memory */
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE);
+ MAP_SIZE, map_flags);
if (map_fd < 0) {
printf("failed to create large map '%s'\n", strerror(errno));
exit(1);
@@ -209,7 +396,9 @@ static void run_parallel(int tasks, void (*fn)(int i, void *data), void *data)
static void test_map_stress(void)
{
run_parallel(100, test_hashmap_sanity, NULL);
+ run_parallel(100, test_percpu_hashmap_sanity, NULL);
run_parallel(100, test_arraymap_sanity, NULL);
+ run_parallel(100, test_percpu_arraymap_sanity, NULL);
}
#define TASKS 1024
@@ -237,7 +426,7 @@ static void test_map_parallel(void)
int data[2];
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE);
+ MAP_SIZE, map_flags);
if (map_fd < 0) {
printf("failed to create map for parallel test '%s'\n",
strerror(errno));
@@ -279,13 +468,25 @@ static void test_map_parallel(void)
assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
}
-int main(void)
+static void run_all_tests(void)
{
test_hashmap_sanity(0, NULL);
+ test_percpu_hashmap_sanity(0, NULL);
test_arraymap_sanity(0, NULL);
+ test_percpu_arraymap_sanity(0, NULL);
+ test_percpu_arraymap_many_keys();
+
test_map_large();
test_map_parallel();
test_map_stress();
+}
+
+int main(void)
+{
+ map_flags = 0;
+ run_all_tests();
+ map_flags = BPF_F_NO_PREALLOC;
+ run_all_tests();
printf("test_maps: OK\n");
return 0;
}
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index 563c507..4b51a90 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -1198,7 +1198,7 @@ static int create_map(void)
int map_fd;
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
- sizeof(long long), sizeof(long long), 1024);
+ sizeof(long long), sizeof(long long), 1024, 0);
if (map_fd < 0)
printf("failed to create map '%s'\n", strerror(errno));
@@ -1210,7 +1210,7 @@ static int create_prog_array(void)
int map_fd;
map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY,
- sizeof(int), sizeof(int), 4);
+ sizeof(int), sizeof(int), 4, 0);
if (map_fd < 0)
printf("failed to create prog_array '%s'\n", strerror(errno));
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index b32367c..09c1adc 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -70,7 +70,7 @@ struct hist_key {
};
struct bpf_map_def SEC("maps") my_hist_map = {
- .type = BPF_MAP_TYPE_HASH,
+ .type = BPF_MAP_TYPE_PERCPU_HASH,
.key_size = sizeof(struct hist_key),
.value_size = sizeof(long),
.max_entries = 1024,
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index cd0241c..ab5b19e 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -37,6 +37,8 @@ struct hist_key {
static void print_hist_for_pid(int fd, void *task)
{
struct hist_key key = {}, next_key;
+ unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ long values[nr_cpus];
char starstr[MAX_STARS];
long value;
long data[MAX_INDEX] = {};
@@ -49,7 +51,10 @@ static void print_hist_for_pid(int fd, void *task)
key = next_key;
continue;
}
- bpf_lookup_elem(fd, &next_key, &value);
+ bpf_lookup_elem(fd, &next_key, values);
+ value = 0;
+ for (i = 0; i < nr_cpus; i++)
+ value += values[i];
ind = next_key.index;
data[ind] = value;
if (value && ind > max_ind)
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
index bf337fb..9974c3d 100644
--- a/samples/bpf/tracex3_kern.c
+++ b/samples/bpf/tracex3_kern.c
@@ -20,7 +20,7 @@ struct bpf_map_def SEC("maps") my_map = {
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
*/
-SEC("kprobe/blk_mq_start_request")
+SEC("kprobe/blk_start_request")
int bpf_prog1(struct pt_regs *ctx)
{
long rq = PT_REGS_PARM1(ctx);
@@ -42,13 +42,13 @@ static unsigned int log2l(unsigned long long n)
#define SLOTS 100
struct bpf_map_def SEC("maps") lat_map = {
- .type = BPF_MAP_TYPE_ARRAY,
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(u64),
.max_entries = SLOTS,
};
-SEC("kprobe/blk_update_request")
+SEC("kprobe/blk_account_io_completion")
int bpf_prog2(struct pt_regs *ctx)
{
long rq = PT_REGS_PARM1(ctx);
@@ -81,7 +81,7 @@ int bpf_prog2(struct pt_regs *ctx)
value = bpf_map_lookup_elem(&lat_map, &index);
if (value)
- __sync_fetch_and_add((long *)value, 1);
+ *value += 1;
return 0;
}
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index 0aaa933..48716f7 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -20,11 +20,13 @@
static void clear_stats(int fd)
{
+ unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ __u64 values[nr_cpus];
__u32 key;
- __u64 value = 0;
+ memset(values, 0, sizeof(values));
for (key = 0; key < SLOTS; key++)
- bpf_update_elem(fd, &key, &value, BPF_ANY);
+ bpf_update_elem(fd, &key, values, BPF_ANY);
}
const char *color[] = {
@@ -75,15 +77,20 @@ static void print_banner(void)
static void print_hist(int fd)
{
- __u32 key;
- __u64 value;
- __u64 cnt[SLOTS];
- __u64 max_cnt = 0;
+ unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
__u64 total_events = 0;
+ long values[nr_cpus];
+ __u64 max_cnt = 0;
+ __u64 cnt[SLOTS];
+ __u64 value;
+ __u32 key;
+ int i;
for (key = 0; key < SLOTS; key++) {
+ bpf_lookup_elem(fd, &key, values);
value = 0;
- bpf_lookup_elem(fd, &key, &value);
+ for (i = 0; i < nr_cpus; i++)
+ value += values[i];
cnt[key] = value;
total_events += value;
if (value > max_cnt)
OpenPOWER on IntegriCloud