summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c51
-rw-r--r--include/linux/perf_event.h8
-rw-r--r--kernel/hw_breakpoint.c78
-rw-r--r--tools/perf/builtin-trace.c32
-rw-r--r--tools/perf/util/parse-events.c11
6 files changed, 119 insertions, 63 deletions
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 9422553..528a11e 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -20,10 +20,10 @@ struct arch_hw_breakpoint {
#include <linux/list.h>
/* Available HW breakpoint length encodings */
+#define X86_BREAKPOINT_LEN_X 0x00
#define X86_BREAKPOINT_LEN_1 0x40
#define X86_BREAKPOINT_LEN_2 0x44
#define X86_BREAKPOINT_LEN_4 0x4c
-#define X86_BREAKPOINT_LEN_EXECUTE 0x40
#ifdef CONFIG_X86_64
#define X86_BREAKPOINT_LEN_8 0x48
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a8f1b80..a474ec3 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
{
/* Len */
switch (x86_len) {
+ case X86_BREAKPOINT_LEN_X:
+ *gen_len = sizeof(long);
+ break;
case X86_BREAKPOINT_LEN_1:
*gen_len = HW_BREAKPOINT_LEN_1;
break;
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp)
info->address = bp->attr.bp_addr;
+ /* Type */
+ switch (bp->attr.bp_type) {
+ case HW_BREAKPOINT_W:
+ info->type = X86_BREAKPOINT_WRITE;
+ break;
+ case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+ info->type = X86_BREAKPOINT_RW;
+ break;
+ case HW_BREAKPOINT_X:
+ info->type = X86_BREAKPOINT_EXECUTE;
+ /*
+ * x86 inst breakpoints need to have a specific undefined len.
+ * But we still need to check userspace is not trying to setup
+ * an unsupported length, to get a range breakpoint for example.
+ */
+ if (bp->attr.bp_len == sizeof(long)) {
+ info->len = X86_BREAKPOINT_LEN_X;
+ return 0;
+ }
+ default:
+ return -EINVAL;
+ }
+
/* Len */
switch (bp->attr.bp_len) {
case HW_BREAKPOINT_LEN_1:
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp)
return -EINVAL;
}
- /* Type */
- switch (bp->attr.bp_type) {
- case HW_BREAKPOINT_W:
- info->type = X86_BREAKPOINT_WRITE;
- break;
- case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
- info->type = X86_BREAKPOINT_RW;
- break;
- case HW_BREAKPOINT_X:
- info->type = X86_BREAKPOINT_EXECUTE;
- break;
- default:
- return -EINVAL;
- }
-
return 0;
}
/*
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
ret = -EINVAL;
switch (info->len) {
+ case X86_BREAKPOINT_LEN_X:
+ align = sizeof(long) -1;
+ break;
case X86_BREAKPOINT_LEN_1:
align = 0;
break;
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
perf_bp_event(bp, args->regs);
+ /*
+ * Set up resume flag to avoid breakpoint recursion when
+ * returning back to origin.
+ */
+ if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
+ args->regs->flags |= X86_EFLAGS_RF;
+
rcu_read_unlock();
}
/*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 63b5aa5..937495c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -533,8 +533,10 @@ struct hw_perf_event {
struct hrtimer hrtimer;
};
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- /* breakpoint */
- struct arch_hw_breakpoint info;
+ struct { /* breakpoint */
+ struct arch_hw_breakpoint info;
+ struct list_head bp_list;
+ };
#endif
};
local64_t prev_count;
@@ -934,7 +936,7 @@ extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
#ifndef perf_arch_fetch_caller_regs
static inline void
-perf_arch_fetch_caller_regs(struct regs *regs, unsigned long ip) { }
+perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
#endif
/*
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 7a56b22..e34d94d 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -41,6 +41,7 @@
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/list.h>
#include <linux/cpu.h>
#include <linux/smp.h>
@@ -62,6 +63,9 @@ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
static int nr_slots[TYPE_MAX];
+/* Keep track of the breakpoints attached to tasks */
+static LIST_HEAD(bp_task_head);
+
static int constraints_initialized;
/* Gather the number of total pinned and un-pinned bp in a cpuset */
@@ -103,33 +107,21 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
return 0;
}
-static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
+/*
+ * Count the number of breakpoints of the same type and same task.
+ * The given event must be not on the list.
+ */
+static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
{
- struct perf_event_context *ctx = tsk->perf_event_ctxp;
- struct list_head *list;
- struct perf_event *bp;
- unsigned long flags;
+ struct perf_event_context *ctx = bp->ctx;
+ struct perf_event *iter;
int count = 0;
- if (WARN_ONCE(!ctx, "No perf context for this task"))
- return 0;
-
- list = &ctx->event_list;
-
- raw_spin_lock_irqsave(&ctx->lock, flags);
-
- /*
- * The current breakpoint counter is not included in the list
- * at the open() callback time
- */
- list_for_each_entry(bp, list, event_entry) {
- if (bp->attr.type == PERF_TYPE_BREAKPOINT)
- if (find_slot_idx(bp) == type)
- count += hw_breakpoint_weight(bp);
+ list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
+ if (iter->ctx == ctx && find_slot_idx(iter) == type)
+ count += hw_breakpoint_weight(iter);
}
- raw_spin_unlock_irqrestore(&ctx->lock, flags);
-
return count;
}
@@ -149,7 +141,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
if (!tsk)
slots->pinned += max_task_bp_pinned(cpu, type);
else
- slots->pinned += task_bp_pinned(tsk, type);
+ slots->pinned += task_bp_pinned(bp, type);
slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
return;
@@ -162,7 +154,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
if (!tsk)
nr += max_task_bp_pinned(cpu, type);
else
- nr += task_bp_pinned(tsk, type);
+ nr += task_bp_pinned(bp, type);
if (nr > slots->pinned)
slots->pinned = nr;
@@ -188,7 +180,7 @@ fetch_this_slot(struct bp_busy_slots *slots, int weight)
/*
* Add a pinned breakpoint for the given task in our constraint table
*/
-static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
+static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
enum bp_type_idx type, int weight)
{
unsigned int *tsk_pinned;
@@ -196,10 +188,11 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
int old_idx = 0;
int idx = 0;
- old_count = task_bp_pinned(tsk, type);
+ old_count = task_bp_pinned(bp, type);
old_idx = old_count - 1;
idx = old_idx + weight;
+ /* tsk_pinned[n] is the number of tasks having n breakpoints */
tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
if (enable) {
tsk_pinned[idx]++;
@@ -222,23 +215,30 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
int cpu = bp->cpu;
struct task_struct *tsk = bp->ctx->task;
+ /* Pinned counter cpu profiling */
+ if (!tsk) {
+
+ if (enable)
+ per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
+ else
+ per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
+ return;
+ }
+
/* Pinned counter task profiling */
- if (tsk) {
- if (cpu >= 0) {
- toggle_bp_task_slot(tsk, cpu, enable, type, weight);
- return;
- }
+ if (!enable)
+ list_del(&bp->hw.bp_list);
+
+ if (cpu >= 0) {
+ toggle_bp_task_slot(bp, cpu, enable, type, weight);
+ } else {
for_each_online_cpu(cpu)
- toggle_bp_task_slot(tsk, cpu, enable, type, weight);
- return;
+ toggle_bp_task_slot(bp, cpu, enable, type, weight);
}
- /* Pinned counter cpu profiling */
if (enable)
- per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
- else
- per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
+ list_add_tail(&bp->hw.bp_list, &bp_task_head);
}
/*
@@ -301,6 +301,10 @@ static int __reserve_bp_slot(struct perf_event *bp)
weight = hw_breakpoint_weight(bp);
fetch_bp_busy_slots(&slots, bp, type);
+ /*
+ * Simulate the addition of this breakpoint to the constraints
+ * and see the result.
+ */
fetch_this_slot(&slots, weight);
/* Flexible counters need to keep at least one slot */
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index dddf3f0..294da72 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -11,8 +11,9 @@
static char const *script_name;
static char const *generate_script_lang;
-static bool debug_ordering;
+static bool debug_mode;
static u64 last_timestamp;
+static u64 nr_unordered;
static int default_start_script(const char *script __unused,
int argc __unused,
@@ -91,13 +92,15 @@ static int process_sample_event(event_t *event, struct perf_session *session)
}
if (session->sample_type & PERF_SAMPLE_RAW) {
- if (debug_ordering) {
+ if (debug_mode) {
if (data.time < last_timestamp) {
pr_err("Samples misordered, previous: %llu "
"this: %llu\n", last_timestamp,
data.time);
+ nr_unordered++;
}
last_timestamp = data.time;
+ return 0;
}
/*
* FIXME: better resolve from pid from the struct trace_entry
@@ -113,6 +116,15 @@ static int process_sample_event(event_t *event, struct perf_session *session)
return 0;
}
+static u64 nr_lost;
+
+static int process_lost_event(event_t *event, struct perf_session *session __used)
+{
+ nr_lost += event->lost.lost;
+
+ return 0;
+}
+
static struct perf_event_ops event_ops = {
.sample = process_sample_event,
.comm = event__process_comm,
@@ -120,6 +132,7 @@ static struct perf_event_ops event_ops = {
.event_type = event__process_event_type,
.tracing_data = event__process_tracing_data,
.build_id = event__process_build_id,
+ .lost = process_lost_event,
.ordered_samples = true,
};
@@ -132,9 +145,18 @@ static void sig_handler(int sig __unused)
static int __cmd_trace(struct perf_session *session)
{
+ int ret;
+
signal(SIGINT, sig_handler);
- return perf_session__process_events(session, &event_ops);
+ ret = perf_session__process_events(session, &event_ops);
+
+ if (debug_mode) {
+ pr_err("Misordered timestamps: %llu\n", nr_unordered);
+ pr_err("Lost events: %llu\n", nr_lost);
+ }
+
+ return ret;
}
struct script_spec {
@@ -544,8 +566,8 @@ static const struct option options[] = {
"generate perf-trace.xx script in specified language"),
OPT_STRING('i', "input", &input_name, "file",
"input file name"),
- OPT_BOOLEAN('d', "debug-ordering", &debug_ordering,
- "check that samples time ordering is monotonic"),
+ OPT_BOOLEAN('d', "debug-mode", &debug_mode,
+ "do various checks like samples ordering and lost events"),
OPT_END()
};
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 9bf0f40..4af5bd5 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -602,8 +602,15 @@ parse_breakpoint_event(const char **strp, struct perf_event_attr *attr)
return EVT_FAILED;
}
- /* We should find a nice way to override the access type */
- attr->bp_len = HW_BREAKPOINT_LEN_4;
+ /*
+ * We should find a nice way to override the access length
+ * Provide some defaults for now
+ */
+ if (attr->bp_type == HW_BREAKPOINT_X)
+ attr->bp_len = sizeof(long);
+ else
+ attr->bp_len = HW_BREAKPOINT_LEN_4;
+
attr->type = PERF_TYPE_BREAKPOINT;
return EVT_HANDLED;
OpenPOWER on IntegriCloud