summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/perf_event.h5
-rw-r--r--include/linux/trace_events.h17
-rw-r--r--include/uapi/linux/bpf.h26
-rw-r--r--kernel/bpf/syscall.c131
-rw-r--r--kernel/events/core.c8
-rw-r--r--kernel/trace/bpf_trace.c48
-rw-r--r--kernel/trace/trace_kprobe.c29
-rw-r--r--kernel/trace/trace_uprobe.c22
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/task_fd_query_kern.c19
-rw-r--r--samples/bpf/task_fd_query_user.c382
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-perf.rst81
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst5
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool9
-rw-r--r--tools/bpf/bpftool/main.c3
-rw-r--r--tools/bpf/bpftool/main.h1
-rw-r--r--tools/bpf/bpftool/perf.c246
-rw-r--r--tools/include/uapi/linux/bpf.h26
-rw-r--r--tools/lib/bpf/bpf.c23
-rw-r--r--tools/lib/bpf/bpf.h3
-rw-r--r--tools/testing/selftests/bpf/test_progs.c158
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c12
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h1
23 files changed, 1257 insertions, 2 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e71e99e..eec302b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -868,6 +868,7 @@ extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
extern void perf_event_delayed_put(struct task_struct *task);
extern struct file *perf_event_get(unsigned int fd);
+extern const struct perf_event *perf_get_event(struct file *file);
extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
extern void perf_event_print_debug(void);
extern void perf_pmu_disable(struct pmu *pmu);
@@ -1289,6 +1290,10 @@ static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
static inline void perf_event_delayed_put(struct task_struct *task) { }
static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); }
+static inline const struct perf_event *perf_get_event(struct file *file)
+{
+ return ERR_PTR(-EINVAL);
+}
static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
{
return ERR_PTR(-EINVAL);
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2bde3ef..d34144a 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info);
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
+int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
+ u32 *fd_type, const char **buf,
+ u64 *probe_offset, u64 *probe_addr);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
@@ -504,6 +507,13 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name
{
return NULL;
}
+static inline int bpf_get_perf_event_info(const struct perf_event *event,
+ u32 *prog_id, u32 *fd_type,
+ const char **buf, u64 *probe_offset,
+ u64 *probe_addr)
+{
+ return -EOPNOTSUPP;
+}
#endif
enum {
@@ -560,10 +570,17 @@ extern void perf_trace_del(struct perf_event *event, int flags);
#ifdef CONFIG_KPROBE_EVENTS
extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe);
extern void perf_kprobe_destroy(struct perf_event *event);
+extern int bpf_get_kprobe_info(const struct perf_event *event,
+ u32 *fd_type, const char **symbol,
+ u64 *probe_offset, u64 *probe_addr,
+ bool perf_type_tracepoint);
#endif
#ifdef CONFIG_UPROBE_EVENTS
extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe);
extern void perf_uprobe_destroy(struct perf_event *event);
+extern int bpf_get_uprobe_info(const struct perf_event *event,
+ u32 *fd_type, const char **filename,
+ u64 *probe_offset, bool perf_type_tracepoint);
#endif
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e95fec9..9b8c6e3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -97,6 +97,7 @@ enum bpf_cmd {
BPF_RAW_TRACEPOINT_OPEN,
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
+ BPF_TASK_FD_QUERY,
};
enum bpf_map_type {
@@ -380,6 +381,22 @@ union bpf_attr {
__u32 btf_log_size;
__u32 btf_log_level;
};
+
+ struct {
+ __u32 pid; /* input: pid */
+ __u32 fd; /* input: fd */
+ __u32 flags; /* input: flags */
+ __u32 buf_len; /* input/output: buf len */
+ __aligned_u64 buf; /* input/output:
+ * tp_name for tracepoint
+ * symbol for kprobe
+ * filename for uprobe
+ */
+ __u32 prog_id; /* output: prod_id */
+ __u32 fd_type; /* output: BPF_FD_TYPE_* */
+ __u64 probe_offset; /* output: probe_offset */
+ __u64 probe_addr; /* output: probe_addr */
+ } task_fd_query;
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -2557,4 +2574,13 @@ struct bpf_fib_lookup {
__u8 dmac[6]; /* ETH_ALEN */
};
+enum bpf_task_fd_type {
+ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_UPROBE, /* filename + offset */
+ BPF_FD_TYPE_URETPROBE, /* filename + offset */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 788456c..388d4fe 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -18,7 +18,9 @@
#include <linux/vmalloc.h>
#include <linux/mmzone.h>
#include <linux/anon_inodes.h>
+#include <linux/fdtable.h>
#include <linux/file.h>
+#include <linux/fs.h>
#include <linux/license.h>
#include <linux/filter.h>
#include <linux/version.h>
@@ -2178,6 +2180,132 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
return btf_get_fd_by_id(attr->btf_id);
}
+static int bpf_task_fd_query_copy(const union bpf_attr *attr,
+ union bpf_attr __user *uattr,
+ u32 prog_id, u32 fd_type,
+ const char *buf, u64 probe_offset,
+ u64 probe_addr)
+{
+ char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
+ u32 len = buf ? strlen(buf) : 0, input_len;
+ int err = 0;
+
+ if (put_user(len, &uattr->task_fd_query.buf_len))
+ return -EFAULT;
+ input_len = attr->task_fd_query.buf_len;
+ if (input_len && ubuf) {
+ if (!len) {
+ /* nothing to copy, just make ubuf NULL terminated */
+ char zero = '\0';
+
+ if (put_user(zero, ubuf))
+ return -EFAULT;
+ } else if (input_len >= len + 1) {
+ /* ubuf can hold the string with NULL terminator */
+ if (copy_to_user(ubuf, buf, len + 1))
+ return -EFAULT;
+ } else {
+ /* ubuf cannot hold the string with NULL terminator,
+ * do a partial copy with NULL terminator.
+ */
+ char zero = '\0';
+
+ err = -ENOSPC;
+ if (copy_to_user(ubuf, buf, input_len - 1))
+ return -EFAULT;
+ if (put_user(zero, ubuf + input_len - 1))
+ return -EFAULT;
+ }
+ }
+
+ if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
+ put_user(fd_type, &uattr->task_fd_query.fd_type) ||
+ put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
+ put_user(probe_addr, &uattr->task_fd_query.probe_addr))
+ return -EFAULT;
+
+ return err;
+}
+
+#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
+
+static int bpf_task_fd_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ pid_t pid = attr->task_fd_query.pid;
+ u32 fd = attr->task_fd_query.fd;
+ const struct perf_event *event;
+ struct files_struct *files;
+ struct task_struct *task;
+ struct file *file;
+ int err;
+
+ if (CHECK_ATTR(BPF_TASK_FD_QUERY))
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (attr->task_fd_query.flags != 0)
+ return -EINVAL;
+
+ task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
+ if (!task)
+ return -ENOENT;
+
+ files = get_files_struct(task);
+ put_task_struct(task);
+ if (!files)
+ return -ENOENT;
+
+ err = 0;
+ spin_lock(&files->file_lock);
+ file = fcheck_files(files, fd);
+ if (!file)
+ err = -EBADF;
+ else
+ get_file(file);
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+
+ if (err)
+ goto out;
+
+ if (file->f_op == &bpf_raw_tp_fops) {
+ struct bpf_raw_tracepoint *raw_tp = file->private_data;
+ struct bpf_raw_event_map *btp = raw_tp->btp;
+
+ err = bpf_task_fd_query_copy(attr, uattr,
+ raw_tp->prog->aux->id,
+ BPF_FD_TYPE_RAW_TRACEPOINT,
+ btp->tp->name, 0, 0);
+ goto put_file;
+ }
+
+ event = perf_get_event(file);
+ if (!IS_ERR(event)) {
+ u64 probe_offset, probe_addr;
+ u32 prog_id, fd_type;
+ const char *buf;
+
+ err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
+ &buf, &probe_offset,
+ &probe_addr);
+ if (!err)
+ err = bpf_task_fd_query_copy(attr, uattr, prog_id,
+ fd_type, buf,
+ probe_offset,
+ probe_addr);
+ goto put_file;
+ }
+
+ err = -ENOTSUPP;
+put_file:
+ fput(file);
+out:
+ return err;
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@@ -2264,6 +2392,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_BTF_GET_FD_BY_ID:
err = bpf_btf_get_fd_by_id(&attr);
break;
+ case BPF_TASK_FD_QUERY:
+ err = bpf_task_fd_query(&attr, uattr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 67612ce..6eeab86 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11212,6 +11212,14 @@ struct file *perf_event_get(unsigned int fd)
return file;
}
+const struct perf_event *perf_get_event(struct file *file)
+{
+ if (file->f_op != &perf_fops)
+ return ERR_PTR(-EINVAL);
+
+ return file->private_data;
+}
+
const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
{
if (!event)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ce2cbbf..81fdf2f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -14,6 +14,7 @@
#include <linux/uaccess.h>
#include <linux/ctype.h>
#include <linux/kprobes.h>
+#include <linux/syscalls.h>
#include <linux/error-injection.h>
#include "trace_probe.h"
@@ -1163,3 +1164,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
mutex_unlock(&bpf_event_mutex);
return err;
}
+
+int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
+ u32 *fd_type, const char **buf,
+ u64 *probe_offset, u64 *probe_addr)
+{
+ bool is_tracepoint, is_syscall_tp;
+ struct bpf_prog *prog;
+ int flags, err = 0;
+
+ prog = event->prog;
+ if (!prog)
+ return -ENOENT;
+
+ /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
+ if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
+ return -EOPNOTSUPP;
+
+ *prog_id = prog->aux->id;
+ flags = event->tp_event->flags;
+ is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
+ is_syscall_tp = is_syscall_trace_event(event->tp_event);
+
+ if (is_tracepoint || is_syscall_tp) {
+ *buf = is_tracepoint ? event->tp_event->tp->name
+ : event->tp_event->name;
+ *fd_type = BPF_FD_TYPE_TRACEPOINT;
+ *probe_offset = 0x0;
+ *probe_addr = 0x0;
+ } else {
+ /* kprobe/uprobe */
+ err = -EOPNOTSUPP;
+#ifdef CONFIG_KPROBE_EVENTS
+ if (flags & TRACE_EVENT_FL_KPROBE)
+ err = bpf_get_kprobe_info(event, fd_type, buf,
+ probe_offset, probe_addr,
+ event->attr.type == PERF_TYPE_TRACEPOINT);
+#endif
+#ifdef CONFIG_UPROBE_EVENTS
+ if (flags & TRACE_EVENT_FL_UPROBE)
+ err = bpf_get_uprobe_info(event, fd_type, buf,
+ probe_offset,
+ event->attr.type == PERF_TYPE_TRACEPOINT);
+#endif
+ }
+
+ return err;
+}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 02aed76..daa8157 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
head, NULL);
}
NOKPROBE_SYMBOL(kretprobe_perf_func);
+
+int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
+ const char **symbol, u64 *probe_offset,
+ u64 *probe_addr, bool perf_type_tracepoint)
+{
+ const char *pevent = trace_event_name(event->tp_event);
+ const char *group = event->tp_event->class->system;
+ struct trace_kprobe *tk;
+
+ if (perf_type_tracepoint)
+ tk = find_trace_kprobe(pevent, group);
+ else
+ tk = event->tp_event->data;
+ if (!tk)
+ return -EINVAL;
+
+ *fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
+ : BPF_FD_TYPE_KPROBE;
+ if (tk->symbol) {
+ *symbol = tk->symbol;
+ *probe_offset = tk->rp.kp.offset;
+ *probe_addr = 0;
+ } else {
+ *symbol = NULL;
+ *probe_offset = 0;
+ *probe_addr = (unsigned long)tk->rp.kp.addr;
+ }
+ return 0;
+}
#endif /* CONFIG_PERF_EVENTS */
/*
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index ac89287..bf89a51 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
{
__uprobe_perf_func(tu, func, regs, ucb, dsize);
}
+
+int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
+ const char **filename, u64 *probe_offset,
+ bool perf_type_tracepoint)
+{
+ const char *pevent = trace_event_name(event->tp_event);
+ const char *group = event->tp_event->class->system;
+ struct trace_uprobe *tu;
+
+ if (perf_type_tracepoint)
+ tu = find_probe_event(pevent, group);
+ else
+ tu = event->tp_event->data;
+ if (!tu)
+ return -EINVAL;
+
+ *fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE
+ : BPF_FD_TYPE_UPROBE;
+ *filename = tu->filename;
+ *probe_offset = tu->offset;
+ return 0;
+}
#endif /* CONFIG_PERF_EVENTS */
static int
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 62d1aa1..7dc85ed 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -51,6 +51,7 @@ hostprogs-y += cpustat
hostprogs-y += xdp_adjust_tail
hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
+hostprogs-y += task_fd_query
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
xdpsock-objs := bpf_load.o xdpsock_user.o
xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
+task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -160,6 +162,7 @@ always += cpustat_kern.o
always += xdp_adjust_tail_kern.o
always += xdpsock_kern.o
always += xdp_fwd_kern.o
+always += task_fd_query_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
HOST_LOADLIBES += $(LIBBPF) -lelf
HOSTLOADLIBES_tracex4 += -lrt
diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c
new file mode 100644
index 0000000..f4b0a9e
--- /dev/null
+++ b/samples/bpf/task_fd_query_kern.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+SEC("kprobe/blk_start_request")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kretprobe/blk_account_io_completion")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
new file mode 100644
index 0000000..8381d79
--- /dev/null
+++ b/samples/bpf/task_fd_query_user.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "perf-sys.h"
+#include "trace_helpers.h"
+
+#define CHECK_PERROR_RET(condition) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("FAIL: %s:\n", __func__); \
+ perror(" "); \
+ return -1; \
+ } \
+})
+
+#define CHECK_AND_RET(condition) ({ \
+ int __ret = !!(condition); \
+ if (__ret) \
+ return -1; \
+})
+
+static __u64 ptr_to_u64(void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
+static int bpf_find_probe_type(const char *event_type)
+{
+ char buf[256];
+ int fd, ret;
+
+ ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+ fd = open(buf, O_RDONLY);
+ CHECK_PERROR_RET(fd < 0);
+
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+ errno = 0;
+ ret = (int)strtol(buf, NULL, 10);
+ CHECK_PERROR_RET(errno);
+ return ret;
+}
+
+#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
+static int bpf_get_retprobe_bit(const char *event_type)
+{
+ char buf[256];
+ int fd, ret;
+
+ ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+ fd = open(buf, O_RDONLY);
+ CHECK_PERROR_RET(fd < 0);
+
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+ CHECK_PERROR_RET(strlen(buf) < strlen("config:"));
+
+ errno = 0;
+ ret = (int)strtol(buf + strlen("config:"), NULL, 10);
+ CHECK_PERROR_RET(errno);
+ return ret;
+}
+
+static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name,
+ __u32 expected_fd_type)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ char buf[256];
+ int err;
+
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len,
+ &prog_id, &fd_type, &probe_offset,
+ &probe_addr);
+ if (err < 0) {
+ printf("FAIL: %s, for event_fd idx %d, fn_name %s\n",
+ __func__, prog_fd_idx, fn_name);
+ perror(" :");
+ return -1;
+ }
+ if (strcmp(buf, fn_name) != 0 ||
+ fd_type != expected_fd_type ||
+ probe_offset != 0x0 || probe_addr != 0x0) {
+ printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n",
+ prog_fd_idx);
+ printf("buf: %s, fd_type: %u, probe_offset: 0x%llx,"
+ " probe_addr: 0x%llx\n",
+ buf, fd_type, probe_offset, probe_addr);
+ return -1;
+ }
+ return 0;
+}
+
+static int test_nondebug_fs_kuprobe_common(const char *event_type,
+ const char *name, __u64 offset, __u64 addr, bool is_return,
+ char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
+ __u64 *probe_offset, __u64 *probe_addr)
+{
+ int is_return_bit = bpf_get_retprobe_bit(event_type);
+ int type = bpf_find_probe_type(event_type);
+ struct perf_event_attr attr = {};
+ int fd;
+
+ if (type < 0 || is_return_bit < 0) {
+ printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n",
+ __func__, type, is_return_bit);
+ return -1;
+ }
+
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ if (is_return)
+ attr.config |= 1 << is_return_bit;
+
+ if (name) {
+ attr.config1 = ptr_to_u64((void *)name);
+ attr.config2 = offset;
+ } else {
+ attr.config1 = 0;
+ attr.config2 = addr;
+ }
+ attr.size = sizeof(attr);
+ attr.type = type;
+
+ fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
+ CHECK_PERROR_RET(fd < 0);
+
+ CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0);
+ CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
+ CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len,
+ prog_id, fd_type, probe_offset, probe_addr) < 0);
+
+ return 0;
+}
+
+static int test_nondebug_fs_probe(const char *event_type, const char *name,
+ __u64 offset, __u64 addr, bool is_return,
+ __u32 expected_fd_type,
+ __u32 expected_ret_fd_type,
+ char *buf, __u32 buf_len)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 prog_id, fd_type;
+ int err;
+
+ err = test_nondebug_fs_kuprobe_common(event_type, name,
+ offset, addr, is_return,
+ buf, &buf_len, &prog_id,
+ &fd_type, &probe_offset,
+ &probe_addr);
+ if (err < 0) {
+ printf("FAIL: %s, "
+ "for name %s, offset 0x%llx, addr 0x%llx, is_return %d\n",
+ __func__, name ? name : "", offset, addr, is_return);
+ perror(" :");
+ return -1;
+ }
+ if ((is_return && fd_type != expected_ret_fd_type) ||
+ (!is_return && fd_type != expected_fd_type)) {
+ printf("FAIL: %s, incorrect fd_type %u\n",
+ __func__, fd_type);
+ return -1;
+ }
+ if (name) {
+ if (strcmp(name, buf) != 0) {
+ printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
+ return -1;
+ }
+ if (probe_offset != offset) {
+ printf("FAIL: %s, incorrect probe_offset 0x%llx\n",
+ __func__, probe_offset);
+ return -1;
+ }
+ } else {
+ if (buf_len != 0) {
+ printf("FAIL: %s, incorrect buf %p\n",
+ __func__, buf);
+ return -1;
+ }
+
+ if (probe_addr != addr) {
+ printf("FAIL: %s, incorrect probe_addr 0x%llx\n",
+ __func__, probe_addr);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
+{
+ const char *event_type = "uprobe";
+ struct perf_event_attr attr = {};
+ char buf[256], event_alias[256];
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ int err, res, kfd, efd;
+ ssize_t bytes;
+
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events",
+ event_type);
+ kfd = open(buf, O_WRONLY | O_APPEND, 0);
+ CHECK_PERROR_RET(kfd < 0);
+
+ res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid());
+ CHECK_PERROR_RET(res < 0 || res >= sizeof(event_alias));
+
+ res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx",
+ is_return ? 'r' : 'p', event_type, event_alias,
+ binary_path, offset);
+ CHECK_PERROR_RET(res < 0 || res >= sizeof(buf));
+ CHECK_PERROR_RET(write(kfd, buf, strlen(buf)) < 0);
+
+ close(kfd);
+ kfd = -1;
+
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id",
+ event_type, event_alias);
+ efd = open(buf, O_RDONLY, 0);
+ CHECK_PERROR_RET(efd < 0);
+
+ bytes = read(efd, buf, sizeof(buf));
+ CHECK_PERROR_RET(bytes <= 0 || bytes >= sizeof(buf));
+ close(efd);
+ buf[bytes] = '\0';
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ CHECK_PERROR_RET(kfd < 0);
+ CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
+ CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0);
+
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len,
+ &prog_id, &fd_type, &probe_offset,
+ &probe_addr);
+ if (err < 0) {
+ printf("FAIL: %s, binary_path %s\n", __func__, binary_path);
+ perror(" :");
+ return -1;
+ }
+ if ((is_return && fd_type != BPF_FD_TYPE_URETPROBE) ||
+ (!is_return && fd_type != BPF_FD_TYPE_UPROBE)) {
+ printf("FAIL: %s, incorrect fd_type %u\n", __func__,
+ fd_type);
+ return -1;
+ }
+ if (strcmp(binary_path, buf) != 0) {
+ printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
+ return -1;
+ }
+ if (probe_offset != offset) {
+ printf("FAIL: %s, incorrect probe_offset 0x%llx\n", __func__,
+ probe_offset);
+ return -1;
+ }
+
+ close(kfd);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {1024*1024, RLIM_INFINITY};
+ extern char __executable_start;
+ char filename[256], buf[256];
+ __u64 uprobe_file_offset;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
+ if (load_kallsyms()) {
+ printf("failed to process /proc/kallsyms\n");
+ return 1;
+ }
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ /* test two functions in the corresponding *_kern.c file */
+ CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request",
+ BPF_FD_TYPE_KPROBE));
+ CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
+ BPF_FD_TYPE_KRETPROBE));
+
+ /* test nondebug fs kprobe */
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
+ false, BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+#ifdef __x86_64__
+ /* set a kprobe on "bpf_check + 0x5", which is x64 specific */
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x5, 0x0,
+ false, BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+#endif
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
+ true, BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), false,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), false,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ NULL, 0));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), true,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), true,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ 0, 0));
+
+ /* test nondebug fs uprobe */
+ /* the calculation of uprobe file offset is based on gcc 7.3.1 on x64
+ * and the default linker script, which defines __executable_start as
+ * the start of the .text section. The calculation could be different
+ * on different systems with different compilers. The right way is
+ * to parse the ELF file. We took a shortcut here.
+ */
+ uprobe_file_offset = (__u64)main - (__u64)&__executable_start;
+ CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
+ uprobe_file_offset, 0x0, false,
+ BPF_FD_TYPE_UPROBE,
+ BPF_FD_TYPE_URETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
+ uprobe_file_offset, 0x0, true,
+ BPF_FD_TYPE_UPROBE,
+ BPF_FD_TYPE_URETPROBE,
+ buf, sizeof(buf)));
+
+ /* test debug fs uprobe */
+ CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
+ false));
+ CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
+ true));
+
+ return 0;
+}
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
new file mode 100644
index 0000000..e3eb0ea
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -0,0 +1,81 @@
+================
+bpftool-perf
+================
+-------------------------------------------------------------------------------
+tool for inspection of perf related bpf prog attachments
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **perf** *COMMAND*
+
+ *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+
+ *COMMANDS* :=
+ { **show** | **list** | **help** }
+
+PERF COMMANDS
+=============
+
+| **bpftool** **perf { show | list }**
+| **bpftool** **perf help**
+
+DESCRIPTION
+===========
+ **bpftool perf { show | list }**
+ List all raw_tracepoint, tracepoint, kprobe attachment in the system.
+
+ Output will start with process id and file descriptor in that process,
+ followed by bpf program id, attachment information, and attachment point.
+ The attachment point for raw_tracepoint/tracepoint is the trace probe name.
+ The attachment point for k[ret]probe is either symbol name and offset,
+ or a kernel virtual address.
+ The attachment point for u[ret]probe is the file name and the file offset.
+
+ **bpftool perf help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -v, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+EXAMPLES
+========
+
+| **# bpftool perf**
+
+::
+
+ pid 21711 fd 5: prog_id 5 kprobe func __x64_sys_write offset 0
+ pid 21765 fd 5: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0
+ pid 21767 fd 5: prog_id 8 tracepoint sys_enter_nanosleep
+ pid 21800 fd 5: prog_id 9 uprobe filename /home/yhs/a.out offset 1159
+
+|
+| **# bpftool -j perf**
+
+::
+
+ [{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
+ {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
+ {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
+ {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
+
+
+SEE ALSO
+========
+ **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 564cb0d..b6f5d56 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -16,7 +16,7 @@ SYNOPSIS
**bpftool** **version**
- *OBJECT* := { **map** | **program** | **cgroup** }
+ *OBJECT* := { **map** | **program** | **cgroup** | **perf** }
*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@@ -30,6 +30,8 @@ SYNOPSIS
*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
+ *PERF-COMMANDS* := { **show** | **list** | **help** }
+
DESCRIPTION
===========
*bpftool* allows for inspection and simple modification of BPF objects
@@ -56,3 +58,4 @@ OPTIONS
SEE ALSO
========
**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
+ **bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index b301c9b..7bc198d 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -448,6 +448,15 @@ _bpftool()
;;
esac
;;
+ perf)
+ case $command in
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'help \
+ show list' -- "$cur" ) )
+ ;;
+ esac
+ ;;
esac
} &&
complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1ec852d..eea7f14 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -87,7 +87,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map | cgroup }\n"
+ " OBJECT := { prog | map | cgroup | perf }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, bin_name, bin_name);
@@ -216,6 +216,7 @@ static const struct cmd cmds[] = {
{ "prog", do_prog },
{ "map", do_map },
{ "cgroup", do_cgroup },
+ { "perf", do_perf },
{ "version", do_version },
{ 0 }
};
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 6173cd9..63fdb31 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -119,6 +119,7 @@ int do_prog(int argc, char **arg);
int do_map(int argc, char **arg);
int do_event_pipe(int argc, char **argv);
int do_cgroup(int argc, char **arg);
+int do_perf(int argc, char **arg);
int prog_parse_fd(int *argc, char ***argv);
int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
new file mode 100644
index 0000000..ac6b1a1
--- /dev/null
+++ b/tools/bpf/bpftool/perf.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+// Author: Yonghong Song <yhs@fb.com>
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <ftw.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+/* 0: undecided, 1: supported, 2: not supported */
+static int perf_query_supported;
+static bool has_perf_query_support(void)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ char buf[256];
+ int fd;
+
+ if (perf_query_supported)
+ goto out;
+
+ fd = open(bin_name, O_RDONLY);
+ if (fd < 0) {
+ p_err("perf_query_support: %s", strerror(errno));
+ goto out;
+ }
+
+ /* the following query will fail as no bpf attachment,
+ * the expected errno is ENOTSUPP
+ */
+ errno = 0;
+ len = sizeof(buf);
+ bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+
+ if (errno == 524 /* ENOTSUPP */) {
+ perf_query_supported = 1;
+ goto close_fd;
+ }
+
+ perf_query_supported = 2;
+ p_err("perf_query_support: %s", strerror(errno));
+ fprintf(stderr,
+ "HINT: non root or kernel doesn't support TASK_FD_QUERY\n");
+
+close_fd:
+ close(fd);
+out:
+ return perf_query_supported == 1;
+}
+
+static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
+ char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+ jsonw_start_object(json_wtr);
+ jsonw_int_field(json_wtr, "pid", pid);
+ jsonw_int_field(json_wtr, "fd", fd);
+ jsonw_uint_field(json_wtr, "prog_id", prog_id);
+ switch (fd_type) {
+ case BPF_FD_TYPE_RAW_TRACEPOINT:
+ jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint");
+ jsonw_string_field(json_wtr, "tracepoint", buf);
+ break;
+ case BPF_FD_TYPE_TRACEPOINT:
+ jsonw_string_field(json_wtr, "fd_type", "tracepoint");
+ jsonw_string_field(json_wtr, "tracepoint", buf);
+ break;
+ case BPF_FD_TYPE_KPROBE:
+ jsonw_string_field(json_wtr, "fd_type", "kprobe");
+ if (buf[0] != '\0') {
+ jsonw_string_field(json_wtr, "func", buf);
+ jsonw_lluint_field(json_wtr, "offset", probe_offset);
+ } else {
+ jsonw_lluint_field(json_wtr, "addr", probe_addr);
+ }
+ break;
+ case BPF_FD_TYPE_KRETPROBE:
+ jsonw_string_field(json_wtr, "fd_type", "kretprobe");
+ if (buf[0] != '\0') {
+ jsonw_string_field(json_wtr, "func", buf);
+ jsonw_lluint_field(json_wtr, "offset", probe_offset);
+ } else {
+ jsonw_lluint_field(json_wtr, "addr", probe_addr);
+ }
+ break;
+ case BPF_FD_TYPE_UPROBE:
+ jsonw_string_field(json_wtr, "fd_type", "uprobe");
+ jsonw_string_field(json_wtr, "filename", buf);
+ jsonw_lluint_field(json_wtr, "offset", probe_offset);
+ break;
+ case BPF_FD_TYPE_URETPROBE:
+ jsonw_string_field(json_wtr, "fd_type", "uretprobe");
+ jsonw_string_field(json_wtr, "filename", buf);
+ jsonw_lluint_field(json_wtr, "offset", probe_offset);
+ break;
+ }
+ jsonw_end_object(json_wtr);
+}
+
+static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
+ char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+ printf("pid %d fd %d: prog_id %u ", pid, fd, prog_id);
+ switch (fd_type) {
+ case BPF_FD_TYPE_RAW_TRACEPOINT:
+ printf("raw_tracepoint %s\n", buf);
+ break;
+ case BPF_FD_TYPE_TRACEPOINT:
+ printf("tracepoint %s\n", buf);
+ break;
+ case BPF_FD_TYPE_KPROBE:
+ if (buf[0] != '\0')
+ printf("kprobe func %s offset %llu\n", buf,
+ probe_offset);
+ else
+ printf("kprobe addr %llu\n", probe_addr);
+ break;
+ case BPF_FD_TYPE_KRETPROBE:
+ if (buf[0] != '\0')
+ printf("kretprobe func %s offset %llu\n", buf,
+ probe_offset);
+ else
+ printf("kretprobe addr %llu\n", probe_addr);
+ break;
+ case BPF_FD_TYPE_UPROBE:
+ printf("uprobe filename %s offset %llu\n", buf, probe_offset);
+ break;
+ case BPF_FD_TYPE_URETPROBE:
+ printf("uretprobe filename %s offset %llu\n", buf,
+ probe_offset);
+ break;
+ }
+}
+
+static int show_proc(const char *fpath, const struct stat *sb,
+ int tflag, struct FTW *ftwbuf)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ int err, pid = 0, fd = 0;
+ const char *pch;
+ char buf[4096];
+
+ /* prefix always /proc */
+ pch = fpath + 5;
+ if (*pch == '\0')
+ return 0;
+
+ /* pid should be all numbers */
+ pch++;
+ while (isdigit(*pch)) {
+ pid = pid * 10 + *pch - '0';
+ pch++;
+ }
+ if (*pch == '\0')
+ return 0;
+ if (*pch != '/')
+ return FTW_SKIP_SUBTREE;
+
+ /* check /proc/<pid>/fd directory */
+ pch++;
+ if (strncmp(pch, "fd", 2))
+ return FTW_SKIP_SUBTREE;
+ pch += 2;
+ if (*pch == '\0')
+ return 0;
+ if (*pch != '/')
+ return FTW_SKIP_SUBTREE;
+
+ /* check /proc/<pid>/fd/<fd_num> */
+ pch++;
+ while (isdigit(*pch)) {
+ fd = fd * 10 + *pch - '0';
+ pch++;
+ }
+ if (*pch != '\0')
+ return FTW_SKIP_SUBTREE;
+
+ /* query (pid, fd) for potential perf events */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type,
+ &probe_offset, &probe_addr);
+ if (err < 0)
+ return 0;
+
+ if (json_output)
+ print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset,
+ probe_addr);
+ else
+ print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset,
+ probe_addr);
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ int flags = FTW_ACTIONRETVAL | FTW_PHYS;
+ int err = 0, nopenfd = 16;
+
+ if (!has_perf_query_support())
+ return -1;
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
+ p_err("%s", strerror(errno));
+ err = -1;
+ }
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+ fprintf(stderr,
+ "Usage: %s %s { show | list | help }\n"
+ "",
+ bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_perf(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e95fec9..9b8c6e3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -97,6 +97,7 @@ enum bpf_cmd {
BPF_RAW_TRACEPOINT_OPEN,
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
+ BPF_TASK_FD_QUERY,
};
enum bpf_map_type {
@@ -380,6 +381,22 @@ union bpf_attr {
__u32 btf_log_size;
__u32 btf_log_level;
};
+
+ struct {
+ __u32 pid; /* input: pid */
+ __u32 fd; /* input: fd */
+ __u32 flags; /* input: flags */
+ __u32 buf_len; /* input/output: buf len */
+ __aligned_u64 buf; /* input/output:
+ * tp_name for tracepoint
+ * symbol for kprobe
+ * filename for uprobe
+ */
+ __u32 prog_id; /* output: prod_id */
+ __u32 fd_type; /* output: BPF_FD_TYPE_* */
+ __u64 probe_offset; /* output: probe_offset */
+ __u64 probe_addr; /* output: probe_addr */
+ } task_fd_query;
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -2557,4 +2574,13 @@ struct bpf_fib_lookup {
__u8 dmac[6]; /* ETH_ALEN */
};
+enum bpf_task_fd_type {
+ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_UPROBE, /* filename + offset */
+ BPF_FD_TYPE_URETPROBE, /* filename + offset */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 442b4cd..9ddc89d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -643,3 +643,26 @@ retry:
return fd;
}
+
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
+ __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
+ __u64 *probe_addr)
+{
+ union bpf_attr attr = {};
+ int err;
+
+ attr.task_fd_query.pid = pid;
+ attr.task_fd_query.fd = fd;
+ attr.task_fd_query.flags = flags;
+ attr.task_fd_query.buf = ptr_to_u64(buf);
+ attr.task_fd_query.buf_len = *buf_len;
+
+ err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+ *buf_len = attr.task_fd_query.buf_len;
+ *prog_id = attr.task_fd_query.prog_id;
+ *fd_type = attr.task_fd_query.fd_type;
+ *probe_offset = attr.task_fd_query.probe_offset;
+ *probe_addr = attr.task_fd_query.probe_addr;
+
+ return err;
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index d12344f..0639a30 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -107,4 +107,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
int bpf_raw_tracepoint_open(const char *name, int prog_fd);
int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
bool do_log);
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
+ __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
+ __u64 *probe_addr);
#endif
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 3ecf733..0ef6820 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1542,6 +1542,162 @@ close_prog_noerr:
bpf_object__close(obj);
}
+static void test_task_fd_query_rawtp(void)
+{
+ const char *file = "./test_get_stack_rawtp.o";
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ struct bpf_object *obj;
+ int efd, err, prog_fd;
+ __u32 duration = 0;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ return;
+
+ efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
+ if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ /* query (getpid(), efd) */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
+ errno))
+ goto close_prog;
+
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ strcmp(buf, "sys_enter") == 0;
+ if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
+ fd_type, buf))
+ goto close_prog;
+
+ /* test zero len */
+ len = 0;
+ err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query (len = 0)", "err %d errno %d\n",
+ err, errno))
+ goto close_prog;
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ len == strlen("sys_enter");
+ if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+ goto close_prog;
+
+ /* test empty buffer */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), efd, 0, 0, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query (buf = 0)", "err %d errno %d\n",
+ err, errno))
+ goto close_prog;
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ len == strlen("sys_enter");
+ if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+ goto close_prog;
+
+ /* test smaller buffer */
+ len = 3;
+ err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err >= 0 || errno != ENOSPC, "bpf_task_fd_query (len = 3)",
+ "err %d errno %d\n", err, errno))
+ goto close_prog;
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ len == strlen("sys_enter") &&
+ strcmp(buf, "sy") == 0;
+ if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+ goto close_prog;
+
+ goto close_prog_noerr;
+close_prog:
+ error_cnt++;
+close_prog_noerr:
+ bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp_core(const char *probe_name,
+ const char *tp_name)
+{
+ const char *file = "./test_tracepoint.o";
+ int err, bytes, efd, prog_fd, pmu_fd;
+ struct perf_event_attr attr = {};
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ struct bpf_object *obj;
+ __u32 duration = 0;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+ "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ /* query (getpid(), pmu_fd) */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = (fd_type == BPF_FD_TYPE_TRACEPOINT) && !strcmp(buf, tp_name);
+ if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
+ fd_type, buf))
+ goto close_pmu;
+
+ close(pmu_fd);
+ goto close_prog_noerr;
+
+close_pmu:
+ close(pmu_fd);
+close_prog:
+ error_cnt++;
+close_prog_noerr:
+ bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp(void)
+{
+ test_task_fd_query_tp_core("sched/sched_switch",
+ "sched_switch");
+ test_task_fd_query_tp_core("syscalls/sys_enter_read",
+ "sys_enter_read");
+}
+
int main(void)
{
jit_enabled = is_jit_enabled();
@@ -1561,6 +1717,8 @@ int main(void)
test_stacktrace_build_id_nmi();
test_stacktrace_map_raw_tp();
test_get_stack_raw_tp();
+ test_task_fd_query_rawtp();
+ test_task_fd_query_tp();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 8fb4fe8..3868dcb 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -72,6 +72,18 @@ struct ksym *ksym_search(long key)
return &syms[0];
}
+long ksym_get_addr(const char *name)
+{
+ int i;
+
+ for (i = 0; i < sym_cnt; i++) {
+ if (strcmp(syms[i].name, name) == 0)
+ return syms[i].addr;
+ }
+
+ return 0;
+}
+
static int page_size;
static int page_cnt = 8;
static struct perf_event_mmap_page *header;
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 36d90e3..3b4bcf7 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -11,6 +11,7 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
+long ksym_get_addr(const char *name);
typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
OpenPOWER on IntegriCloud