diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-05-20 08:19:20 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-05-20 08:20:14 +0200 |
commit | 21f77d231fabd33c5de61fbff31818d93203353e (patch) | |
tree | 74bd85f1184b26409605884bf65ae1c1ba5d724c /include | |
parent | b0a434fb7412937d55f15b8897c5646c81497bbe (diff) | |
parent | a29d5c9b8167dbc21a7ca8c0302e3799f9063b4e (diff) | |
download | op-kernel-dev-21f77d231fabd33c5de61fbff31818d93203353e.zip op-kernel-dev-21f77d231fabd33c5de61fbff31818d93203353e.tar.gz |
Merge tag 'perf-core-for-mingo-20160516' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
User visible changes:
- Honour the kernel.perf_event_max_stack knob more precisely by not counting
PERF_CONTEXT_{KERNEL,USER} when deciding when to stop adding entries to
the perf_sample->ip_callchain[] array (Arnaldo Carvalho de Melo)
- Fix identation of 'stalled-backend-cycles' in 'perf stat' (Namhyung Kim)
- Update runtime using 'cpu-clock' event in 'perf stat' (Namhyung Kim)
- Use 'cpu-clock' for cpu targets in 'perf stat' (Namhyung Kim)
- Avoid fractional digits for integer scales in 'perf stat' (Andi Kleen)
- Store vdso buildid unconditionally, as it appears in callchains and
we're not checking those when creating the build-id table, so we
end up not being able to resolve VDSO symbols when doing analysis
on a different machine than the one where recording was done, possibly
of a different arch even (arm -> x86_64) (He Kuang)
Infrastructure changes:
- Generalize max_stack sysctl handler, will be used for configuring
multiple kernel knobs related to callchains (Arnaldo Carvalho de Melo)
Cleanups:
- Introduce DSO__NAME_KALLSYMS and DSO__NAME_KCORE, to stop using
open coded strings (Masami Hiramatsu)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/perf_event.h | 34 | ||||
-rw-r--r-- | include/uapi/linux/perf_event.h | 1 |
2 files changed, 30 insertions, 5 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9e1c3ad..6b87be9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -61,6 +61,14 @@ struct perf_callchain_entry { __u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */ }; +struct perf_callchain_entry_ctx { + struct perf_callchain_entry *entry; + u32 max_stack; + u32 nr; + short contexts; + bool contexts_maxed; +}; + struct perf_raw_record { u32 size; void *data; @@ -1063,20 +1071,36 @@ extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); -extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); -extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); +extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, - bool crosstask, bool add_mark); + u32 max_stack, bool crosstask, bool add_mark); extern int get_callchain_buffers(void); extern void put_callchain_buffers(void); extern int sysctl_perf_event_max_stack; +extern int sysctl_perf_event_max_contexts_per_stack; + +static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip) +{ + if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) { + struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; + ++ctx->contexts; + return 0; + } else { + ctx->contexts_maxed = true; + return -1; /* no more room, stop walking the stack */ + } +} -static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) { - if (entry->nr < sysctl_perf_event_max_stack) { + if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) { + struct perf_callchain_entry *entry = ctx->entry; entry->ip[entry->nr++] = ip; + ++ctx->nr; return 0; } else { return -1; /* no more room, stop walking the stack */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 43fc8d2..36ce552 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -862,6 +862,7 @@ enum perf_event_type { }; #define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, |