diff options
-rw-r--r-- | arch/x86/include/asm/hw_breakpoint.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/hw_breakpoint.c | 51 | ||||
-rw-r--r-- | include/linux/perf_event.h | 8 | ||||
-rw-r--r-- | kernel/hw_breakpoint.c | 78 | ||||
-rw-r--r-- | tools/perf/builtin-trace.c | 32 | ||||
-rw-r--r-- | tools/perf/util/parse-events.c | 11 |
6 files changed, 119 insertions, 63 deletions
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 9422553..528a11e 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -20,10 +20,10 @@ struct arch_hw_breakpoint { #include <linux/list.h> /* Available HW breakpoint length encodings */ +#define X86_BREAKPOINT_LEN_X 0x00 #define X86_BREAKPOINT_LEN_1 0x40 #define X86_BREAKPOINT_LEN_2 0x44 #define X86_BREAKPOINT_LEN_4 0x4c -#define X86_BREAKPOINT_LEN_EXECUTE 0x40 #ifdef CONFIG_X86_64 #define X86_BREAKPOINT_LEN_8 0x48 diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a8f1b80..a474ec3 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type, { /* Len */ switch (x86_len) { + case X86_BREAKPOINT_LEN_X: + *gen_len = sizeof(long); + break; case X86_BREAKPOINT_LEN_1: *gen_len = HW_BREAKPOINT_LEN_1; break; @@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp) info->address = bp->attr.bp_addr; + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_W: + info->type = X86_BREAKPOINT_WRITE; + break; + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + info->type = X86_BREAKPOINT_RW; + break; + case HW_BREAKPOINT_X: + info->type = X86_BREAKPOINT_EXECUTE; + /* + * x86 inst breakpoints need to have a specific undefined len. + * But we still need to check userspace is not trying to setup + * an unsupported length, to get a range breakpoint for example. + */ + if (bp->attr.bp_len == sizeof(long)) { + info->len = X86_BREAKPOINT_LEN_X; + return 0; + } + default: + return -EINVAL; + } + /* Len */ switch (bp->attr.bp_len) { case HW_BREAKPOINT_LEN_1: @@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp) return -EINVAL; } - /* Type */ - switch (bp->attr.bp_type) { - case HW_BREAKPOINT_W: - info->type = X86_BREAKPOINT_WRITE; - break; - case HW_BREAKPOINT_W | HW_BREAKPOINT_R: - info->type = X86_BREAKPOINT_RW; - break; - case HW_BREAKPOINT_X: - info->type = X86_BREAKPOINT_EXECUTE; - break; - default: - return -EINVAL; - } - return 0; } /* @@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) ret = -EINVAL; switch (info->len) { + case X86_BREAKPOINT_LEN_X: + align = sizeof(long) -1; + break; case X86_BREAKPOINT_LEN_1: align = 0; break; @@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) perf_bp_event(bp, args->regs); + /* + * Set up resume flag to avoid breakpoint recursion when + * returning back to origin. + */ + if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) + args->regs->flags |= X86_EFLAGS_RF; + rcu_read_unlock(); } /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 63b5aa5..937495c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -533,8 +533,10 @@ struct hw_perf_event { struct hrtimer hrtimer; }; #ifdef CONFIG_HAVE_HW_BREAKPOINT - /* breakpoint */ - struct arch_hw_breakpoint info; + struct { /* breakpoint */ + struct arch_hw_breakpoint info; + struct list_head bp_list; + }; #endif }; local64_t prev_count; @@ -934,7 +936,7 @@ extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); #ifndef perf_arch_fetch_caller_regs static inline void -perf_arch_fetch_caller_regs(struct regs *regs, unsigned long ip) { } +perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } #endif /* diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 7a56b22..e34d94d 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -41,6 +41,7 @@ #include <linux/sched.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/list.h> #include <linux/cpu.h> #include <linux/smp.h> @@ -62,6 +63,9 @@ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]); static int nr_slots[TYPE_MAX]; +/* Keep track of the breakpoints attached to tasks */ +static LIST_HEAD(bp_task_head); + static int constraints_initialized; /* Gather the number of total pinned and un-pinned bp in a cpuset */ @@ -103,33 +107,21 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) return 0; } -static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type) +/* + * Count the number of breakpoints of the same type and same task. + * The given event must be not on the list. + */ +static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type) { - struct perf_event_context *ctx = tsk->perf_event_ctxp; - struct list_head *list; - struct perf_event *bp; - unsigned long flags; + struct perf_event_context *ctx = bp->ctx; + struct perf_event *iter; int count = 0; - if (WARN_ONCE(!ctx, "No perf context for this task")) - return 0; - - list = &ctx->event_list; - - raw_spin_lock_irqsave(&ctx->lock, flags); - - /* - * The current breakpoint counter is not included in the list - * at the open() callback time - */ - list_for_each_entry(bp, list, event_entry) { - if (bp->attr.type == PERF_TYPE_BREAKPOINT) - if (find_slot_idx(bp) == type) - count += hw_breakpoint_weight(bp); + list_for_each_entry(iter, &bp_task_head, hw.bp_list) { + if (iter->ctx == ctx && find_slot_idx(iter) == type) + count += hw_breakpoint_weight(iter); } - raw_spin_unlock_irqrestore(&ctx->lock, flags); - return count; } @@ -149,7 +141,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, if (!tsk) slots->pinned += max_task_bp_pinned(cpu, type); else - slots->pinned += task_bp_pinned(tsk, type); + slots->pinned += task_bp_pinned(bp, type); slots->flexible = per_cpu(nr_bp_flexible[type], cpu); return; @@ -162,7 +154,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, if (!tsk) nr += max_task_bp_pinned(cpu, type); else - nr += task_bp_pinned(tsk, type); + nr += task_bp_pinned(bp, type); if (nr > slots->pinned) slots->pinned = nr; @@ -188,7 +180,7 @@ fetch_this_slot(struct bp_busy_slots *slots, int weight) /* * Add a pinned breakpoint for the given task in our constraint table */ -static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, +static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable, enum bp_type_idx type, int weight) { unsigned int *tsk_pinned; @@ -196,10 +188,11 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, int old_idx = 0; int idx = 0; - old_count = task_bp_pinned(tsk, type); + old_count = task_bp_pinned(bp, type); old_idx = old_count - 1; idx = old_idx + weight; + /* tsk_pinned[n] is the number of tasks having n breakpoints */ tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); if (enable) { tsk_pinned[idx]++; @@ -222,23 +215,30 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int cpu = bp->cpu; struct task_struct *tsk = bp->ctx->task; + /* Pinned counter cpu profiling */ + if (!tsk) { + + if (enable) + per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight; + else + per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight; + return; + } + /* Pinned counter task profiling */ - if (tsk) { - if (cpu >= 0) { - toggle_bp_task_slot(tsk, cpu, enable, type, weight); - return; - } + if (!enable) + list_del(&bp->hw.bp_list); + + if (cpu >= 0) { + toggle_bp_task_slot(bp, cpu, enable, type, weight); + } else { for_each_online_cpu(cpu) - toggle_bp_task_slot(tsk, cpu, enable, type, weight); - return; + toggle_bp_task_slot(bp, cpu, enable, type, weight); } - /* Pinned counter cpu profiling */ if (enable) - per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight; - else - per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight; + list_add_tail(&bp->hw.bp_list, &bp_task_head); } /* @@ -301,6 +301,10 @@ static int __reserve_bp_slot(struct perf_event *bp) weight = hw_breakpoint_weight(bp); fetch_bp_busy_slots(&slots, bp, type); + /* + * Simulate the addition of this breakpoint to the constraints + * and see the result. + */ fetch_this_slot(&slots, weight); /* Flexible counters need to keep at least one slot */ diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index dddf3f0..294da72 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -11,8 +11,9 @@ static char const *script_name; static char const *generate_script_lang; -static bool debug_ordering; +static bool debug_mode; static u64 last_timestamp; +static u64 nr_unordered; static int default_start_script(const char *script __unused, int argc __unused, @@ -91,13 +92,15 @@ static int process_sample_event(event_t *event, struct perf_session *session) } if (session->sample_type & PERF_SAMPLE_RAW) { - if (debug_ordering) { + if (debug_mode) { if (data.time < last_timestamp) { pr_err("Samples misordered, previous: %llu " "this: %llu\n", last_timestamp, data.time); + nr_unordered++; } last_timestamp = data.time; + return 0; } /* * FIXME: better resolve from pid from the struct trace_entry @@ -113,6 +116,15 @@ static int process_sample_event(event_t *event, struct perf_session *session) return 0; } +static u64 nr_lost; + +static int process_lost_event(event_t *event, struct perf_session *session __used) +{ + nr_lost += event->lost.lost; + + return 0; +} + static struct perf_event_ops event_ops = { .sample = process_sample_event, .comm = event__process_comm, @@ -120,6 +132,7 @@ static struct perf_event_ops event_ops = { .event_type = event__process_event_type, .tracing_data = event__process_tracing_data, .build_id = event__process_build_id, + .lost = process_lost_event, .ordered_samples = true, }; @@ -132,9 +145,18 @@ static void sig_handler(int sig __unused) static int __cmd_trace(struct perf_session *session) { + int ret; + signal(SIGINT, sig_handler); - return perf_session__process_events(session, &event_ops); + ret = perf_session__process_events(session, &event_ops); + + if (debug_mode) { + pr_err("Misordered timestamps: %llu\n", nr_unordered); + pr_err("Lost events: %llu\n", nr_lost); + } + + return ret; } struct script_spec { @@ -544,8 +566,8 @@ static const struct option options[] = { "generate perf-trace.xx script in specified language"), OPT_STRING('i', "input", &input_name, "file", "input file name"), - OPT_BOOLEAN('d', "debug-ordering", &debug_ordering, - "check that samples time ordering is monotonic"), + OPT_BOOLEAN('d', "debug-mode", &debug_mode, + "do various checks like samples ordering and lost events"), OPT_END() }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9bf0f40..4af5bd5 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -602,8 +602,15 @@ parse_breakpoint_event(const char **strp, struct perf_event_attr *attr) return EVT_FAILED; } - /* We should find a nice way to override the access type */ - attr->bp_len = HW_BREAKPOINT_LEN_4; + /* + * We should find a nice way to override the access length + * Provide some defaults for now + */ + if (attr->bp_type == HW_BREAKPOINT_X) + attr->bp_len = sizeof(long); + else + attr->bp_len = HW_BREAKPOINT_LEN_4; + attr->type = PERF_TYPE_BREAKPOINT; return EVT_HANDLED; |