summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h20
-rw-r--r--include/linux/ftrace_event.h14
-rw-r--r--include/linux/perf_event.h121
-rw-r--r--include/linux/watchdog.h8
-rw-r--r--include/uapi/linux/bpf.h5
-rw-r--r--include/uapi/linux/perf_event.h115
6 files changed, 256 insertions, 27 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bbfceb7..c2e2111 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -113,8 +113,6 @@ struct bpf_prog_type_list {
enum bpf_prog_type type;
};
-void bpf_register_prog_type(struct bpf_prog_type_list *tl);
-
struct bpf_prog;
struct bpf_prog_aux {
@@ -129,11 +127,25 @@ struct bpf_prog_aux {
};
#ifdef CONFIG_BPF_SYSCALL
+void bpf_register_prog_type(struct bpf_prog_type_list *tl);
+
void bpf_prog_put(struct bpf_prog *prog);
+struct bpf_prog *bpf_prog_get(u32 ufd);
#else
-static inline void bpf_prog_put(struct bpf_prog *prog) {}
+static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
+{
+}
+
+static inline struct bpf_prog *bpf_prog_get(u32 ufd)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void bpf_prog_put(struct bpf_prog *prog)
+{
+}
#endif
-struct bpf_prog *bpf_prog_get(u32 ufd);
+
/* verify correctness of eBPF program */
int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 112cf49..46e83c2 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -13,6 +13,7 @@ struct trace_array;
struct trace_buffer;
struct tracer;
struct dentry;
+struct bpf_prog;
struct trace_print_flags {
unsigned long mask;
@@ -252,6 +253,7 @@ enum {
TRACE_EVENT_FL_WAS_ENABLED_BIT,
TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
TRACE_EVENT_FL_TRACEPOINT_BIT,
+ TRACE_EVENT_FL_KPROBE_BIT,
};
/*
@@ -265,6 +267,7 @@ enum {
* it is best to clear the buffers that used it).
* USE_CALL_FILTER - For ftrace internal events, don't use file filter
* TRACEPOINT - Event is a tracepoint
+ * KPROBE - Event is a kprobe
*/
enum {
TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -274,6 +277,7 @@ enum {
TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
+ TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT),
};
struct ftrace_event_call {
@@ -303,6 +307,7 @@ struct ftrace_event_call {
#ifdef CONFIG_PERF_EVENTS
int perf_refcount;
struct hlist_head __percpu *perf_events;
+ struct bpf_prog *prog;
int (*perf_perm)(struct ftrace_event_call *,
struct perf_event *);
@@ -548,6 +553,15 @@ event_trigger_unlock_commit_regs(struct ftrace_event_file *file,
event_triggers_post_call(file, tt);
}
+#ifdef CONFIG_BPF_SYSCALL
+unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
+#else
+static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
+{
+ return 1;
+}
+#endif
+
enum {
FILTER_OTHER = 0,
FILTER_STATIC_STRING,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2b62198..61992cf 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -53,6 +53,7 @@ struct perf_guest_info_callbacks {
#include <linux/sysfs.h>
#include <linux/perf_regs.h>
#include <linux/workqueue.h>
+#include <linux/cgroup.h>
#include <asm/local.h>
struct perf_callchain_entry {
@@ -118,10 +119,19 @@ struct hw_perf_event {
struct hrtimer hrtimer;
};
struct { /* tracepoint */
- struct task_struct *tp_target;
/* for tp_event->class */
struct list_head tp_list;
};
+ struct { /* intel_cqm */
+ int cqm_state;
+ int cqm_rmid;
+ struct list_head cqm_events_entry;
+ struct list_head cqm_groups_entry;
+ struct list_head cqm_group_entry;
+ };
+ struct { /* itrace */
+ int itrace_started;
+ };
#ifdef CONFIG_HAVE_HW_BREAKPOINT
struct { /* breakpoint */
/*
@@ -129,12 +139,12 @@ struct hw_perf_event {
* problem hw_breakpoint has with context
* creation and event initalization.
*/
- struct task_struct *bp_target;
struct arch_hw_breakpoint info;
struct list_head bp_list;
};
#endif
};
+ struct task_struct *target;
int state;
local64_t prev_count;
u64 sample_period;
@@ -166,6 +176,11 @@ struct perf_event;
* pmu::capabilities flags
*/
#define PERF_PMU_CAP_NO_INTERRUPT 0x01
+#define PERF_PMU_CAP_NO_NMI 0x02
+#define PERF_PMU_CAP_AUX_NO_SG 0x04
+#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08
+#define PERF_PMU_CAP_EXCLUSIVE 0x10
+#define PERF_PMU_CAP_ITRACE 0x20
/**
* struct pmu - generic performance monitoring unit
@@ -186,6 +201,7 @@ struct pmu {
int * __percpu pmu_disable_count;
struct perf_cpu_context * __percpu pmu_cpu_context;
+ atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
int task_ctx_nr;
int hrtimer_interval_ms;
@@ -262,9 +278,32 @@ struct pmu {
int (*event_idx) (struct perf_event *event); /*optional */
/*
- * flush branch stack on context-switches (needed in cpu-wide mode)
+ * context-switches callback
+ */
+ void (*sched_task) (struct perf_event_context *ctx,
+ bool sched_in);
+ /*
+ * PMU specific data size
+ */
+ size_t task_ctx_size;
+
+
+ /*
+ * Return the count value for a counter.
+ */
+ u64 (*count) (struct perf_event *event); /*optional*/
+
+ /*
+ * Set up pmu-private data structures for an AUX area
*/
- void (*flush_branch_stack) (void);
+ void *(*setup_aux) (int cpu, void **pages,
+ int nr_pages, bool overwrite);
+ /* optional */
+
+ /*
+ * Free pmu-private AUX data structures
+ */
+ void (*free_aux) (void *aux); /* optional */
};
/**
@@ -300,6 +339,7 @@ struct swevent_hlist {
#define PERF_ATTACH_CONTEXT 0x01
#define PERF_ATTACH_GROUP 0x02
#define PERF_ATTACH_TASK 0x04
+#define PERF_ATTACH_TASK_DATA 0x08
struct perf_cgroup;
struct ring_buffer;
@@ -438,6 +478,7 @@ struct perf_event {
struct pid_namespace *ns;
u64 id;
+ u64 (*clock)(void);
perf_overflow_handler_t overflow_handler;
void *overflow_handler_context;
@@ -504,7 +545,7 @@ struct perf_event_context {
u64 generation;
int pin_count;
int nr_cgroups; /* cgroup evts */
- int nr_branch_stack; /* branch_stack evt */
+ void *task_ctx_data; /* pmu specific data */
struct rcu_head rcu_head;
struct delayed_work orphans_remove;
@@ -536,12 +577,52 @@ struct perf_output_handle {
struct ring_buffer *rb;
unsigned long wakeup;
unsigned long size;
- void *addr;
+ union {
+ void *addr;
+ unsigned long head;
+ };
int page;
};
+#ifdef CONFIG_CGROUP_PERF
+
+/*
+ * perf_cgroup_info keeps track of time_enabled for a cgroup.
+ * This is a per-cpu dynamically allocated data structure.
+ */
+struct perf_cgroup_info {
+ u64 time;
+ u64 timestamp;
+};
+
+struct perf_cgroup {
+ struct cgroup_subsys_state css;
+ struct perf_cgroup_info __percpu *info;
+};
+
+/*
+ * Must ensure cgroup is pinned (css_get) before calling
+ * this function. In other words, we cannot call this function
+ * if there is no cgroup event for the current CPU context.
+ */
+static inline struct perf_cgroup *
+perf_cgroup_from_task(struct task_struct *task)
+{
+ return container_of(task_css(task, perf_event_cgrp_id),
+ struct perf_cgroup, css);
+}
+#endif /* CONFIG_CGROUP_PERF */
+
#ifdef CONFIG_PERF_EVENTS
+extern void *perf_aux_output_begin(struct perf_output_handle *handle,
+ struct perf_event *event);
+extern void perf_aux_output_end(struct perf_output_handle *handle,
+ unsigned long size, bool truncated);
+extern int perf_aux_output_skip(struct perf_output_handle *handle,
+ unsigned long size);
+extern void *perf_get_aux(struct perf_output_handle *handle);
+
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
extern void perf_pmu_unregister(struct pmu *pmu);
@@ -558,6 +639,8 @@ extern void perf_event_delayed_put(struct task_struct *task);
extern void perf_event_print_debug(void);
extern void perf_pmu_disable(struct pmu *pmu);
extern void perf_pmu_enable(struct pmu *pmu);
+extern void perf_sched_cb_dec(struct pmu *pmu);
+extern void perf_sched_cb_inc(struct pmu *pmu);
extern int perf_event_task_disable(void);
extern int perf_event_task_enable(void);
extern int perf_event_refresh(struct perf_event *event, int refresh);
@@ -731,6 +814,11 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
__perf_event_task_sched_out(prev, next);
}
+static inline u64 __perf_event_count(struct perf_event *event)
+{
+ return local64_read(&event->count) + atomic64_read(&event->child_count);
+}
+
extern void perf_event_mmap(struct vm_area_struct *vma);
extern struct perf_guest_info_callbacks *perf_guest_cbs;
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -800,6 +888,16 @@ static inline bool has_branch_stack(struct perf_event *event)
return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
}
+static inline bool needs_branch_stack(struct perf_event *event)
+{
+ return event->attr.branch_sample_type != 0;
+}
+
+static inline bool has_aux(struct perf_event *event)
+{
+ return event->pmu->setup_aux;
+}
+
extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size);
extern void perf_output_end(struct perf_output_handle *handle);
@@ -815,6 +913,17 @@ extern void perf_event_disable(struct perf_event *event);
extern int __perf_event_disable(void *info);
extern void perf_event_task_tick(void);
#else /* !CONFIG_PERF_EVENTS: */
+static inline void *
+perf_aux_output_begin(struct perf_output_handle *handle,
+ struct perf_event *event) { return NULL; }
+static inline void
+perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
+ bool truncated) { }
+static inline int
+perf_aux_output_skip(struct perf_output_handle *handle,
+ unsigned long size) { return -EINVAL; }
+static inline void *
+perf_get_aux(struct perf_output_handle *handle) { return NULL; }
static inline void
perf_event_task_sched_in(struct task_struct *prev,
struct task_struct *task) { }
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 395b70e..a746bf5 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -137,4 +137,12 @@ extern int watchdog_init_timeout(struct watchdog_device *wdd,
extern int watchdog_register_device(struct watchdog_device *);
extern void watchdog_unregister_device(struct watchdog_device *);
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+void watchdog_nmi_disable_all(void);
+void watchdog_nmi_enable_all(void);
+#else
+static inline void watchdog_nmi_disable_all(void) {}
+static inline void watchdog_nmi_enable_all(void) {}
+#endif
+
#endif /* ifndef _LINUX_WATCHDOG_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 45da7ec..cc47ef4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -118,6 +118,7 @@ enum bpf_map_type {
enum bpf_prog_type {
BPF_PROG_TYPE_UNSPEC,
BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
};
/* flags for BPF_MAP_UPDATE_ELEM command */
@@ -151,6 +152,7 @@ union bpf_attr {
__u32 log_level; /* verbosity level of verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied buffer */
+ __u32 kern_version; /* checked when prog_type=kprobe */
};
} __attribute__((aligned(8)));
@@ -162,6 +164,9 @@ enum bpf_func_id {
BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
+ BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */
+ BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */
+ BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */
__BPF_FUNC_MAX_ID,
};
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9b79abb..309211b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -152,21 +152,42 @@ enum perf_event_sample_format {
* The branch types can be combined, however BRANCH_ANY covers all types
* of branches and therefore it supersedes all the other types.
*/
+enum perf_branch_sample_type_shift {
+ PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
+ PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
+ PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
+
+ PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
+ PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
+ PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
+ PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
+ PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
+ PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
+ PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
+ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */
+
+ PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */
+
+ PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
+};
+
enum perf_branch_sample_type {
- PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */
- PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */
- PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */
-
- PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */
- PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */
- PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
- PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */
- PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */
- PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */
- PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */
- PERF_SAMPLE_BRANCH_COND = 1U << 10, /* conditional branches */
-
- PERF_SAMPLE_BRANCH_MAX = 1U << 11, /* non-ABI */
+ PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
+ PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
+ PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
+
+ PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
+
+ PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+
+ PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
#define PERF_SAMPLE_BRANCH_PLM_ALL \
@@ -240,6 +261,7 @@ enum perf_event_read_format {
#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
/* add: sample_stack_user */
#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
+#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
/*
* Hardware event_id to monitor via a performance monitoring event:
@@ -305,7 +327,8 @@ struct perf_event_attr {
exclude_callchain_user : 1, /* exclude user callchains */
mmap2 : 1, /* include mmap with inode data */
comm_exec : 1, /* flag comm events that are due to an exec */
- __reserved_1 : 39;
+ use_clockid : 1, /* use @clockid for time fields */
+ __reserved_1 : 38;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -334,8 +357,7 @@ struct perf_event_attr {
*/
__u32 sample_stack_user;
- /* Align to u64. */
- __u32 __reserved_2;
+ __s32 clockid;
/*
* Defines set of regs to dump for each sample
* state captured on:
@@ -345,6 +367,12 @@ struct perf_event_attr {
* See asm/perf_regs.h for details.
*/
__u64 sample_regs_intr;
+
+ /*
+ * Wakeup watermark for AUX area
+ */
+ __u32 aux_watermark;
+ __u32 __reserved_2; /* align to __u64 */
};
#define perf_flags(attr) (*(&(attr)->read_format + 1))
@@ -360,6 +388,7 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
@@ -500,9 +529,30 @@ struct perf_event_mmap_page {
* In this case the kernel will not over-write unread data.
*
* See perf_output_put_handle() for the data ordering.
+ *
+ * data_{offset,size} indicate the location and size of the perf record
+ * buffer within the mmapped area.
*/
__u64 data_head; /* head in the data section */
__u64 data_tail; /* user-space written tail */
+ __u64 data_offset; /* where the buffer starts */
+ __u64 data_size; /* data buffer size */
+
+ /*
+ * AUX area is defined by aux_{offset,size} fields that should be set
+ * by the userspace, so that
+ *
+ * aux_offset >= data_offset + data_size
+ *
+ * prior to mmap()ing it. Size of the mmap()ed area should be aux_size.
+ *
+ * Ring buffer pointers aux_{head,tail} have the same semantics as
+ * data_{head,tail} and same ordering rules apply.
+ */
+ __u64 aux_head;
+ __u64 aux_tail;
+ __u64 aux_offset;
+ __u64 aux_size;
};
#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0)
@@ -725,6 +775,31 @@ enum perf_event_type {
*/
PERF_RECORD_MMAP2 = 10,
+ /*
+ * Records that new data landed in the AUX buffer part.
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u64 aux_offset;
+ * u64 aux_size;
+ * u64 flags;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_AUX = 11,
+
+ /*
+ * Indicates that instruction trace has started
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid;
+ * u32 tid;
+ * };
+ */
+ PERF_RECORD_ITRACE_START = 12,
+
PERF_RECORD_MAX, /* non-ABI */
};
@@ -742,6 +817,12 @@ enum perf_callchain_context {
PERF_CONTEXT_MAX = (__u64)-4095,
};
+/**
+ * PERF_RECORD_AUX::flags bits
+ */
+#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
+
#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
#define PERF_FLAG_FD_OUTPUT (1UL << 1)
#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
OpenPOWER on IntegriCloud