From 67516844625f45f0ce148a01c27bf41f591872b2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Jul 2013 18:56:31 +0200 Subject: perf: Remove the 'match' callback for auxiliary events processing It gives the following benefits: - only one function pointer is passed along the way - the 'match' function is called within output function and could be inlined by the compiler Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1373388991-9711-1-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 79 ++++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 40 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index eba8fb5..708ab70 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4680,12 +4680,10 @@ perf_event_read_event(struct perf_event *event, perf_output_end(&handle); } -typedef int (perf_event_aux_match_cb)(struct perf_event *event, void *data); typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data); static void perf_event_aux_ctx(struct perf_event_context *ctx, - perf_event_aux_match_cb match, perf_event_aux_output_cb output, void *data) { @@ -4696,15 +4694,12 @@ perf_event_aux_ctx(struct perf_event_context *ctx, continue; if (!event_filter_match(event)) continue; - if (match(event, data)) - output(event, data); + output(event, data); } } static void -perf_event_aux(perf_event_aux_match_cb match, - perf_event_aux_output_cb output, - void *data, +perf_event_aux(perf_event_aux_output_cb output, void *data, struct perf_event_context *task_ctx) { struct perf_cpu_context *cpuctx; @@ -4717,7 +4712,7 @@ perf_event_aux(perf_event_aux_match_cb match, cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); if (cpuctx->unique_pmu != pmu) goto next; - perf_event_aux_ctx(&cpuctx->ctx, match, output, data); + perf_event_aux_ctx(&cpuctx->ctx, output, data); if (task_ctx) goto next; ctxn = pmu->task_ctx_nr; @@ -4725,14 +4720,14 @@ perf_event_aux(perf_event_aux_match_cb match, goto next; ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); if (ctx) - perf_event_aux_ctx(ctx, match, output, data); + perf_event_aux_ctx(ctx, output, data); next: put_cpu_ptr(pmu->pmu_cpu_context); } if (task_ctx) { preempt_disable(); - perf_event_aux_ctx(task_ctx, match, output, data); + perf_event_aux_ctx(task_ctx, output, data); preempt_enable(); } rcu_read_unlock(); @@ -4759,6 +4754,12 @@ struct perf_task_event { } event_id; }; +static int perf_event_task_match(struct perf_event *event) +{ + return event->attr.comm || event->attr.mmap || + event->attr.mmap_data || event->attr.task; +} + static void perf_event_task_output(struct perf_event *event, void *data) { @@ -4768,6 +4769,9 @@ static void perf_event_task_output(struct perf_event *event, struct task_struct *task = task_event->task; int ret, size = task_event->event_id.header.size; + if (!perf_event_task_match(event)) + return; + perf_event_header__init_id(&task_event->event_id.header, &sample, event); ret = perf_output_begin(&handle, event, @@ -4790,13 +4794,6 @@ out: task_event->event_id.header.size = size; } -static int perf_event_task_match(struct perf_event *event, - void *data __maybe_unused) -{ - return event->attr.comm || event->attr.mmap || - event->attr.mmap_data || event->attr.task; -} - static void perf_event_task(struct task_struct *task, struct perf_event_context *task_ctx, int new) @@ -4825,8 +4822,7 @@ static void perf_event_task(struct task_struct *task, }, }; - perf_event_aux(perf_event_task_match, - perf_event_task_output, + perf_event_aux(perf_event_task_output, &task_event, task_ctx); } @@ -4853,6 +4849,11 @@ struct perf_comm_event { } event_id; }; +static int perf_event_comm_match(struct perf_event *event) +{ + return event->attr.comm; +} + static void perf_event_comm_output(struct perf_event *event, void *data) { @@ -4862,6 +4863,9 @@ static void perf_event_comm_output(struct perf_event *event, int size = comm_event->event_id.header.size; int ret; + if (!perf_event_comm_match(event)) + return; + perf_event_header__init_id(&comm_event->event_id.header, &sample, event); ret = perf_output_begin(&handle, event, comm_event->event_id.header.size); @@ -4883,12 +4887,6 @@ out: comm_event->event_id.header.size = size; } -static int perf_event_comm_match(struct perf_event *event, - void *data __maybe_unused) -{ - return event->attr.comm; -} - static void perf_event_comm_event(struct perf_comm_event *comm_event) { char comm[TASK_COMM_LEN]; @@ -4903,8 +4901,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; - perf_event_aux(perf_event_comm_match, - perf_event_comm_output, + perf_event_aux(perf_event_comm_output, comm_event, NULL); } @@ -4967,6 +4964,17 @@ struct perf_mmap_event { } event_id; }; +static int perf_event_mmap_match(struct perf_event *event, + void *data) +{ + struct perf_mmap_event *mmap_event = data; + struct vm_area_struct *vma = mmap_event->vma; + int executable = vma->vm_flags & VM_EXEC; + + return (!executable && event->attr.mmap_data) || + (executable && event->attr.mmap); +} + static void perf_event_mmap_output(struct perf_event *event, void *data) { @@ -4976,6 +4984,9 @@ static void perf_event_mmap_output(struct perf_event *event, int size = mmap_event->event_id.header.size; int ret; + if (!perf_event_mmap_match(event, data)) + return; + perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); ret = perf_output_begin(&handle, event, mmap_event->event_id.header.size); @@ -4996,17 +5007,6 @@ out: mmap_event->event_id.header.size = size; } -static int perf_event_mmap_match(struct perf_event *event, - void *data) -{ - struct perf_mmap_event *mmap_event = data; - struct vm_area_struct *vma = mmap_event->vma; - int executable = vma->vm_flags & VM_EXEC; - - return (!executable && event->attr.mmap_data) || - (executable && event->attr.mmap); -} - static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) { struct vm_area_struct *vma = mmap_event->vma; @@ -5070,8 +5070,7 @@ got_name: mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; - perf_event_aux(perf_event_mmap_match, - perf_event_mmap_output, + perf_event_aux(perf_event_mmap_output, mmap_event, NULL); -- cgit v1.1 From a5cdd40c9877e9aba704c020fd65d26b5cfecf18 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Jul 2013 17:09:07 +0200 Subject: perf: Update perf_event_type documentation Due to a discussion with Adrian I had a good look at the perf_event_type record layout and found the documentation to be somewhat unclear. Cc: Adrian Hunter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20130716150907.GL23818@dyad.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 5e2bce9..1274114 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4462,20 +4462,6 @@ void perf_output_sample(struct perf_output_handle *handle, } } - if (!event->attr.watermark) { - int wakeup_events = event->attr.wakeup_events; - - if (wakeup_events) { - struct ring_buffer *rb = handle->rb; - int events = local_inc_return(&rb->events); - - if (events >= wakeup_events) { - local_sub(wakeup_events, &rb->events); - local_inc(&rb->wakeup); - } - } - } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { if (data->br_stack) { size_t size; @@ -4511,16 +4497,31 @@ void perf_output_sample(struct perf_output_handle *handle, } } - if (sample_type & PERF_SAMPLE_STACK_USER) + if (sample_type & PERF_SAMPLE_STACK_USER) { perf_output_sample_ustack(handle, data->stack_user_size, data->regs_user.regs); + } if (sample_type & PERF_SAMPLE_WEIGHT) perf_output_put(handle, data->weight); if (sample_type & PERF_SAMPLE_DATA_SRC) perf_output_put(handle, data->data_src.val); + + if (!event->attr.watermark) { + int wakeup_events = event->attr.wakeup_events; + + if (wakeup_events) { + struct ring_buffer *rb = handle->rb; + int events = local_inc_return(&rb->events); + + if (events >= wakeup_events) { + local_sub(wakeup_events, &rb->events); + local_inc(&rb->wakeup); + } + } + } } void perf_prepare_sample(struct perf_event_header *header, -- cgit v1.1 From 6050cb0b0b366092d1383bc23d7b16cd26db00f0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 23 Jul 2013 02:30:59 +0200 Subject: perf: Fix branch stack refcount leak on callchain init failure On callchain buffers allocation failure, free_event() is called and all the accounting performed in perf_event_alloc() for that event is cancelled. But if the event has branch stack sampling, it is unaccounted as well from the branch stack sampling events refcounts. This is a bug because this accounting is performed after the callchain buffer allocation. As a result, the branch stack sampling events refcount can become negative. To fix this, move the branch stack event accounting before the callchain buffer allocation. Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1374539466-4799-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 1274114..f35aa7e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6567,6 +6567,12 @@ done: atomic_inc(&nr_comm_events); if (event->attr.task) atomic_inc(&nr_task_events); + if (has_branch_stack(event)) { + static_key_slow_inc(&perf_sched_events.key); + if (!(event->attach_state & PERF_ATTACH_TASK)) + atomic_inc(&per_cpu(perf_branch_stack_events, + event->cpu)); + } if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { err = get_callchain_buffers(); if (err) { @@ -6574,12 +6580,6 @@ done: return ERR_PTR(err); } } - if (has_branch_stack(event)) { - static_key_slow_inc(&perf_sched_events.key); - if (!(event->attach_state & PERF_ATTACH_TASK)) - atomic_inc(&per_cpu(perf_branch_stack_events, - event->cpu)); - } } return event; -- cgit v1.1 From 90983b16078ab0fdc58f0dab3e8e3da79c9579a2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 23 Jul 2013 02:31:00 +0200 Subject: perf: Sanitize get_callchain_buffer() In case of allocation failure, get_callchain_buffer() keeps the refcount incremented for the current event. As a result, when get_callchain_buffers() returns an error, we must cleanup what it did by cancelling its last refcount with a call to put_callchain_buffers(). This is a hack in order to be able to call free_event() after that failure. The original purpose of that was to simplify the failure path. But this error handling is actually counter intuitive, ugly and not very easy to follow because one expect to see the resources used to perform a service to be cleaned by the callee if case of failure, not by the caller. So lets clean this up by cancelling the refcount from get_callchain_buffer() in case of failure. And correctly free the event accordingly in perf_event_alloc(). Signed-off-by: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1374539466-4799-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/callchain.c | 2 ++ kernel/events/core.c | 41 +++++++++++++++++++++-------------------- 2 files changed, 23 insertions(+), 20 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index c772061..76a8bc5 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -117,6 +117,8 @@ int get_callchain_buffers(void) err = alloc_callchain_buffers(); exit: mutex_unlock(&callchain_mutex); + if (err) + atomic_dec(&nr_callchain_events); return err; } diff --git a/kernel/events/core.c b/kernel/events/core.c index f35aa7e..3b99862 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6457,7 +6457,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, struct pmu *pmu; struct perf_event *event; struct hw_perf_event *hwc; - long err; + long err = -EINVAL; if ((unsigned)cpu >= nr_cpu_ids) { if (!task || cpu != -1) @@ -6540,25 +6540,23 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, * we currently do not support PERF_FORMAT_GROUP on inherited events */ if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) - goto done; + goto err_ns; pmu = perf_init_event(event); - -done: - err = 0; if (!pmu) - err = -EINVAL; - else if (IS_ERR(pmu)) + goto err_ns; + else if (IS_ERR(pmu)) { err = PTR_ERR(pmu); - - if (err) { - if (event->ns) - put_pid_ns(event->ns); - kfree(event); - return ERR_PTR(err); + goto err_ns; } if (!event->parent) { + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { + err = get_callchain_buffers(); + if (err) + goto err_pmu; + } + if (event->attach_state & PERF_ATTACH_TASK) static_key_slow_inc(&perf_sched_events.key); if (event->attr.mmap || event->attr.mmap_data) @@ -6573,16 +6571,19 @@ done: atomic_inc(&per_cpu(perf_branch_stack_events, event->cpu)); } - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { - err = get_callchain_buffers(); - if (err) { - free_event(event); - return ERR_PTR(err); - } - } } return event; + +err_pmu: + if (event->destroy) + event->destroy(event); +err_ns: + if (event->ns) + put_pid_ns(event->ns); + kfree(event); + + return ERR_PTR(err); } static int perf_copy_attr(struct perf_event_attr __user *uattr, -- cgit v1.1 From 766d6c076928191d75ad5b0d0f58f52b1e7682d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 23 Jul 2013 02:31:01 +0200 Subject: perf: Factor out event accounting code to account_event()/__free_event() Gather all the event accounting code to a single place, once all the prerequisites are completed. This simplifies the refcounting. Original-patch-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1374539466-4799-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 79 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 32 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 3b99862..158fd57 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3128,6 +3128,21 @@ static void free_event_rcu(struct rcu_head *head) static void ring_buffer_put(struct ring_buffer *rb); static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); +static void __free_event(struct perf_event *event) +{ + if (!event->parent) { + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + put_callchain_buffers(); + } + + if (event->destroy) + event->destroy(event); + + if (event->ctx) + put_ctx(event->ctx); + + call_rcu(&event->rcu_head, free_event_rcu); +} static void free_event(struct perf_event *event) { irq_work_sync(&event->pending); @@ -3141,8 +3156,6 @@ static void free_event(struct perf_event *event) atomic_dec(&nr_comm_events); if (event->attr.task) atomic_dec(&nr_task_events); - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) - put_callchain_buffers(); if (is_cgroup_event(event)) { atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); static_key_slow_dec_deferred(&perf_sched_events); @@ -3180,13 +3193,8 @@ static void free_event(struct perf_event *event) if (is_cgroup_event(event)) perf_detach_cgroup(event); - if (event->destroy) - event->destroy(event); - - if (event->ctx) - put_ctx(event->ctx); - call_rcu(&event->rcu_head, free_event_rcu); + __free_event(event); } int perf_event_release_kernel(struct perf_event *event) @@ -6443,6 +6451,29 @@ unlock: return pmu; } +static void account_event(struct perf_event *event) +{ + if (event->attach_state & PERF_ATTACH_TASK) + static_key_slow_inc(&perf_sched_events.key); + if (event->attr.mmap || event->attr.mmap_data) + atomic_inc(&nr_mmap_events); + if (event->attr.comm) + atomic_inc(&nr_comm_events); + if (event->attr.task) + atomic_inc(&nr_task_events); + if (has_branch_stack(event)) { + static_key_slow_inc(&perf_sched_events.key); + if (!(event->attach_state & PERF_ATTACH_TASK)) + atomic_inc(&per_cpu(perf_branch_stack_events, + event->cpu)); + } + + if (is_cgroup_event(event)) { + atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); + static_key_slow_inc(&perf_sched_events.key); + } +} + /* * Allocate and initialize a event structure */ @@ -6556,21 +6587,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (err) goto err_pmu; } - - if (event->attach_state & PERF_ATTACH_TASK) - static_key_slow_inc(&perf_sched_events.key); - if (event->attr.mmap || event->attr.mmap_data) - atomic_inc(&nr_mmap_events); - if (event->attr.comm) - atomic_inc(&nr_comm_events); - if (event->attr.task) - atomic_inc(&nr_task_events); - if (has_branch_stack(event)) { - static_key_slow_inc(&perf_sched_events.key); - if (!(event->attach_state & PERF_ATTACH_TASK)) - atomic_inc(&per_cpu(perf_branch_stack_events, - event->cpu)); - } } return event; @@ -6865,17 +6881,14 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & PERF_FLAG_PID_CGROUP) { err = perf_cgroup_connect(pid, event, &attr, group_leader); - if (err) - goto err_alloc; - /* - * one more event: - * - that has cgroup constraint on event->cpu - * - that may need work on context switch - */ - atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); - static_key_slow_inc(&perf_sched_events.key); + if (err) { + __free_event(event); + goto err_task; + } } + account_event(event); + /* * Special case software events and allow them to be part of * any hardware group. @@ -7071,6 +7084,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, goto err; } + account_event(event); + ctx = find_get_context(event->pmu, task, cpu); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); -- cgit v1.1 From 4beb31f3657348a8b702dd014d01c520e522012f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 23 Jul 2013 02:31:02 +0200 Subject: perf: Split the per-cpu accounting part of the event accounting code This way we can use the per-cpu handling seperately. This is going to be used by to fix the event migration code accounting. Original-patch-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1374539466-4799-5-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 87 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 32 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 158fd57..3a4b73a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3128,6 +3128,40 @@ static void free_event_rcu(struct rcu_head *head) static void ring_buffer_put(struct ring_buffer *rb); static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); +static void unaccount_event_cpu(struct perf_event *event, int cpu) +{ + if (event->parent) + return; + + if (has_branch_stack(event)) { + if (!(event->attach_state & PERF_ATTACH_TASK)) + atomic_dec(&per_cpu(perf_branch_stack_events, cpu)); + } + if (is_cgroup_event(event)) + atomic_dec(&per_cpu(perf_cgroup_events, cpu)); +} + +static void unaccount_event(struct perf_event *event) +{ + if (event->parent) + return; + + if (event->attach_state & PERF_ATTACH_TASK) + static_key_slow_dec_deferred(&perf_sched_events); + if (event->attr.mmap || event->attr.mmap_data) + atomic_dec(&nr_mmap_events); + if (event->attr.comm) + atomic_dec(&nr_comm_events); + if (event->attr.task) + atomic_dec(&nr_task_events); + if (is_cgroup_event(event)) + static_key_slow_dec_deferred(&perf_sched_events); + if (has_branch_stack(event)) + static_key_slow_dec_deferred(&perf_sched_events); + + unaccount_event_cpu(event, event->cpu); +} + static void __free_event(struct perf_event *event) { if (!event->parent) { @@ -3147,29 +3181,7 @@ static void free_event(struct perf_event *event) { irq_work_sync(&event->pending); - if (!event->parent) { - if (event->attach_state & PERF_ATTACH_TASK) - static_key_slow_dec_deferred(&perf_sched_events); - if (event->attr.mmap || event->attr.mmap_data) - atomic_dec(&nr_mmap_events); - if (event->attr.comm) - atomic_dec(&nr_comm_events); - if (event->attr.task) - atomic_dec(&nr_task_events); - if (is_cgroup_event(event)) { - atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); - static_key_slow_dec_deferred(&perf_sched_events); - } - - if (has_branch_stack(event)) { - static_key_slow_dec_deferred(&perf_sched_events); - /* is system-wide event */ - if (!(event->attach_state & PERF_ATTACH_TASK)) { - atomic_dec(&per_cpu(perf_branch_stack_events, - event->cpu)); - } - } - } + unaccount_event(event); if (event->rb) { struct ring_buffer *rb; @@ -6451,8 +6463,24 @@ unlock: return pmu; } +static void account_event_cpu(struct perf_event *event, int cpu) +{ + if (event->parent) + return; + + if (has_branch_stack(event)) { + if (!(event->attach_state & PERF_ATTACH_TASK)) + atomic_inc(&per_cpu(perf_branch_stack_events, cpu)); + } + if (is_cgroup_event(event)) + atomic_inc(&per_cpu(perf_cgroup_events, cpu)); +} + static void account_event(struct perf_event *event) { + if (event->parent) + return; + if (event->attach_state & PERF_ATTACH_TASK) static_key_slow_inc(&perf_sched_events.key); if (event->attr.mmap || event->attr.mmap_data) @@ -6461,17 +6489,12 @@ static void account_event(struct perf_event *event) atomic_inc(&nr_comm_events); if (event->attr.task) atomic_inc(&nr_task_events); - if (has_branch_stack(event)) { + if (has_branch_stack(event)) static_key_slow_inc(&perf_sched_events.key); - if (!(event->attach_state & PERF_ATTACH_TASK)) - atomic_inc(&per_cpu(perf_branch_stack_events, - event->cpu)); - } - - if (is_cgroup_event(event)) { - atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); + if (is_cgroup_event(event)) static_key_slow_inc(&perf_sched_events.key); - } + + account_event_cpu(event, event->cpu); } /* -- cgit v1.1 From 9a545de019b536771feefb76f85e5038b65c2190 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 23 Jul 2013 02:31:03 +0200 Subject: perf: Migrate per cpu event accounting When an event is migrated, move the event per-cpu accounting accordingly so that branch stack and cgroup events work correctly on the new CPU. Original-patch-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1374539466-4799-6-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 3a4b73a..63bdec9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7145,6 +7145,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) list_for_each_entry_safe(event, tmp, &src_ctx->event_list, event_entry) { perf_remove_from_context(event); + unaccount_event_cpu(event, src_cpu); put_ctx(src_ctx); list_add(&event->event_entry, &events); } @@ -7157,6 +7158,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) list_del(&event->event_entry); if (event->state >= PERF_EVENT_STATE_OFF) event->state = PERF_EVENT_STATE_INACTIVE; + account_event_cpu(event, dst_cpu); perf_install_in_context(dst_ctx, event, dst_cpu); get_ctx(dst_ctx); } -- cgit v1.1 From ba8a75c16e292c0a3a87406a77508cbbc6cf4ee2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 23 Jul 2013 02:31:04 +0200 Subject: perf: Account freq events per cpu This is going to be used by the full dynticks subsystem as a finer-grained information to know when to keep and when to stop the tick. Original-patch-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1374539466-4799-7-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 63bdec9..3fe385a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -141,6 +141,7 @@ enum event_type_t { struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); +static DEFINE_PER_CPU(atomic_t, perf_freq_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -3139,6 +3140,9 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu) } if (is_cgroup_event(event)) atomic_dec(&per_cpu(perf_cgroup_events, cpu)); + + if (event->attr.freq) + atomic_dec(&per_cpu(perf_freq_events, cpu)); } static void unaccount_event(struct perf_event *event) @@ -6474,6 +6478,9 @@ static void account_event_cpu(struct perf_event *event, int cpu) } if (is_cgroup_event(event)) atomic_inc(&per_cpu(perf_cgroup_events, cpu)); + + if (event->attr.freq) + atomic_inc(&per_cpu(perf_freq_events, cpu)); } static void account_event(struct perf_event *event) -- cgit v1.1 From d84153d6c96f61aa06429586284639f32debf03e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 23 Jul 2013 02:31:05 +0200 Subject: perf: Implement finer grained full dynticks kick Currently the full dynticks subsystem keep the tick alive as long as there are perf events running. This prevents the tick from being stopped as long as features such that the lockup detectors are running. As a temporary fix, the lockup detector is disabled by default when full dynticks is built but this is not a long term viable solution. To fix this, only keep the tick alive when an event configured with a frequency rather than a period is running on the CPU, or when an event throttles on the CPU. These are the only purposes of the perf tick, especially now that the rotation of flexible events is handled from a seperate hrtimer. The tick can be shutdown the rest of the time. Original-patch-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1374539466-4799-8-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 3fe385a..916cf1f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -870,12 +870,8 @@ static void perf_pmu_rotate_start(struct pmu *pmu) WARN_ON(!irqs_disabled()); - if (list_empty(&cpuctx->rotation_list)) { - int was_empty = list_empty(head); + if (list_empty(&cpuctx->rotation_list)) list_add(&cpuctx->rotation_list, head); - if (was_empty) - tick_nohz_full_kick(); - } } static void get_ctx(struct perf_event_context *ctx) @@ -1875,6 +1871,9 @@ static int __perf_install_in_context(void *info) perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, task_ctx); + if (atomic_read(&__get_cpu_var(perf_freq_events))) + tick_nohz_full_kick(); + return 0; } @@ -2812,10 +2811,11 @@ done: #ifdef CONFIG_NO_HZ_FULL bool perf_event_can_stop_tick(void) { - if (list_empty(&__get_cpu_var(rotation_list))) - return true; - else + if (atomic_read(&__get_cpu_var(perf_freq_events)) || + __this_cpu_read(perf_throttled_count)) return false; + else + return true; } #endif @@ -5202,6 +5202,7 @@ static int __perf_event_overflow(struct perf_event *event, __this_cpu_inc(perf_throttled_count); hwc->interrupts = MAX_INTERRUPTS; perf_log_throttle(event, 0); + tick_nohz_full_kick(); ret = 1; } } -- cgit v1.1 From cf4957f17f2a89984915ea808876d9c82225b862 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Oct 2012 13:37:58 +0200 Subject: perf: Add PERF_EVENT_IOC_ID ioctl to return event ID The only way to get the event ID is by reading the event fd, followed by parsing the ID value out of the returned data. While this is ok for current read format used by perf tool, it is not ok when we use PERF_FORMAT_GROUP format. With this format the data are returned for the whole group and there's no way to find out what ID belongs to our fd (if we are not group leader event). Adding a simple ioctl that returns event primary ID for given fd. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-v1bn5cto707jn0bon34afqr1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 916cf1f..5200b60 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3568,6 +3568,15 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_EVENT_IOC_PERIOD: return perf_event_period(event, (u64 __user *)arg); + case PERF_EVENT_IOC_ID: + { + u64 id = primary_event_id(event); + + if (copy_to_user((void __user *)arg, &id, sizeof(id))) + return -EFAULT; + return 0; + } + case PERF_EVENT_IOC_SET_OUTPUT: { int ret; -- cgit v1.1 From 6f5ab0019fd328b50a8488c9e5193fc1dbd8d6ed Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 15 Oct 2012 20:13:45 +0200 Subject: perf: Do not get values from disabled counters in group format read It's possible some of the counters in the group could be disabled when sampling member of the event group is reading the rest via PERF_SAMPLE_READ sample type processing. Disabled counters could then produce wrong numbers. Fixing that by reading only enabled counters for PERF_SAMPLE_READ sample type processing. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-wwkjb0bbcuslnz0klrmqi26r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 5200b60..e82e700 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4388,7 +4388,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, list_for_each_entry(sub, &leader->sibling_list, group_entry) { n = 0; - if (sub != event) + if ((sub != event) && + (sub->state == PERF_EVENT_STATE_ACTIVE)) sub->pmu->read(sub); values[n++] = perf_event_count(sub); -- cgit v1.1 From fc3b86d673e41ac66b4ba5b75a90c2fcafb90089 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 2 Aug 2013 18:29:54 +0200 Subject: perf: Roll back callchain buffer refcount under the callchain mutex When we fail to allocate the callchain buffers, we roll back the refcount we did and return from get_callchain_buffers(). However we take the refcount and allocate under the callchain lock but the rollback is done outside the lock. As a result, while we roll back, some concurrent callchain user may call get_callchain_buffers(), see the non-zero refcount and give up because the buffers are NULL without itself retrying the allocation. The consequences aren't that bad but that behaviour looks weird enough and it's better to give their chances to the following callchain users where we failed. Reported-by: Jiri Olsa Signed-off-by: Frederic Weisbecker Acked-by: Jiri Olsa Cc: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1375460996-16329-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/callchain.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/events') diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 76a8bc5..97b67df 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -116,10 +116,11 @@ int get_callchain_buffers(void) err = alloc_callchain_buffers(); exit: - mutex_unlock(&callchain_mutex); if (err) atomic_dec(&nr_callchain_events); + mutex_unlock(&callchain_mutex); + return err; } -- cgit v1.1 From 948b26b6ddd08a57cb95ebb0dc96fde2edd5c383 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 2 Aug 2013 18:29:55 +0200 Subject: perf: Account freq events globally Freq events may not always be affine to a particular CPU. As such, account_event_cpu() may crash if we account per cpu a freq event that has event->cpu == -1. To solve this, lets account freq events globally. In practice this doesn't change much the picture because perf tools create per-task perf events with one event per CPU by default. Profiling a single CPU is usually a corner case so there is no much point in optimizing things that way. Reported-by: Jiri Olsa Suggested-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Tested-by: Jiri Olsa Cc: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1375460996-16329-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index e82e700..2e675e8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -141,11 +141,11 @@ enum event_type_t { struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); -static DEFINE_PER_CPU(atomic_t, perf_freq_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; static atomic_t nr_task_events __read_mostly; +static atomic_t nr_freq_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -1871,9 +1871,6 @@ static int __perf_install_in_context(void *info) perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, task_ctx); - if (atomic_read(&__get_cpu_var(perf_freq_events))) - tick_nohz_full_kick(); - return 0; } @@ -2811,7 +2808,7 @@ done: #ifdef CONFIG_NO_HZ_FULL bool perf_event_can_stop_tick(void) { - if (atomic_read(&__get_cpu_var(perf_freq_events)) || + if (atomic_read(&nr_freq_events) || __this_cpu_read(perf_throttled_count)) return false; else @@ -3140,9 +3137,6 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu) } if (is_cgroup_event(event)) atomic_dec(&per_cpu(perf_cgroup_events, cpu)); - - if (event->attr.freq) - atomic_dec(&per_cpu(perf_freq_events, cpu)); } static void unaccount_event(struct perf_event *event) @@ -3158,6 +3152,8 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_comm_events); if (event->attr.task) atomic_dec(&nr_task_events); + if (event->attr.freq) + atomic_dec(&nr_freq_events); if (is_cgroup_event(event)) static_key_slow_dec_deferred(&perf_sched_events); if (has_branch_stack(event)) @@ -6489,9 +6485,6 @@ static void account_event_cpu(struct perf_event *event, int cpu) } if (is_cgroup_event(event)) atomic_inc(&per_cpu(perf_cgroup_events, cpu)); - - if (event->attr.freq) - atomic_inc(&per_cpu(perf_freq_events, cpu)); } static void account_event(struct perf_event *event) @@ -6507,6 +6500,10 @@ static void account_event(struct perf_event *event) atomic_inc(&nr_comm_events); if (event->attr.task) atomic_inc(&nr_task_events); + if (event->attr.freq) { + if (atomic_inc_return(&nr_freq_events) == 1) + tick_nohz_full_kick_all(); + } if (has_branch_stack(event)) static_key_slow_inc(&perf_sched_events.key); if (is_cgroup_event(event)) -- cgit v1.1 From 5ec4c599a52362896c3e7c6a31ba6145dca9c6f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 2 Aug 2013 21:16:30 +0200 Subject: perf: Do not compute time values unnecessarily We should not be calling calc_timer_values() for events that do not actually have an mmap()'ed userpage. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20130802191630.GT27162@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 2e675e8..928fae7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3670,6 +3670,10 @@ void perf_event_update_userpage(struct perf_event *event) u64 enabled, running, now; rcu_read_lock(); + rb = rcu_dereference(event->rb); + if (!rb) + goto unlock; + /* * compute total_time_enabled, total_time_running * based on snapshot values taken when the event @@ -3680,12 +3684,8 @@ void perf_event_update_userpage(struct perf_event *event) * NMI context */ calc_timer_values(event, &now, &enabled, &running); - rb = rcu_dereference(event->rb); - if (!rb) - goto unlock; userpg = rb->user_page; - /* * Disable preemption so as to not let the corresponding user-space * spin too long if we get preempted. -- cgit v1.1 From ff3d527cebc1fa3707c617bfe9e74f53fcfb0955 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 27 Aug 2013 11:23:07 +0300 Subject: perf: make events stream always parsable The event stream is not always parsable because the format of a sample is dependent on the sample_type of the selected event. When there is more than one selected event and the sample_types are not the same then parsing becomes problematic. A sample can be matched to its selected event using the ID that is allocated when the event is opened. Unfortunately, to get the ID from the sample means first parsing it. This patch adds a new sample format bit PERF_SAMPLE_IDENTIFER that puts the ID at a fixed position so that the ID can be retrieved without parsing the sample. For sample events, that is the first position immediately after the header. For non-sample events, that is the last position. In this respect parsing samples requires that the sample_type and ID values are recorded. For example, perf tools records struct perf_event_attr and the IDs within the perf.data file. Those must be read first before it is possible to parse samples found later in the perf.data file. Signed-off-by: Adrian Hunter Tested-by: Stephane Eranian Acked-by: Peter Zijlstra Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1377591794-30553-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 928fae7..15d0f24 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1213,6 +1213,9 @@ static void perf_event__id_header_size(struct perf_event *event) if (sample_type & PERF_SAMPLE_TIME) size += sizeof(data->time); + if (sample_type & PERF_SAMPLE_IDENTIFIER) + size += sizeof(data->id); + if (sample_type & PERF_SAMPLE_ID) size += sizeof(data->id); @@ -4280,7 +4283,7 @@ static void __perf_event_header__init_id(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_TIME) data->time = perf_clock(); - if (sample_type & PERF_SAMPLE_ID) + if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) data->id = primary_event_id(event); if (sample_type & PERF_SAMPLE_STREAM_ID) @@ -4319,6 +4322,9 @@ static void __perf_event__output_id_sample(struct perf_output_handle *handle, if (sample_type & PERF_SAMPLE_CPU) perf_output_put(handle, data->cpu_entry); + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + perf_output_put(handle, data->id); } void perf_event__output_id_sample(struct perf_event *event, @@ -4432,6 +4438,9 @@ void perf_output_sample(struct perf_output_handle *handle, perf_output_put(handle, *header); + if (sample_type & PERF_SAMPLE_IDENTIFIER) + perf_output_put(handle, data->id); + if (sample_type & PERF_SAMPLE_IP) perf_output_put(handle, data->ip); -- cgit v1.1 From ae23bff1d71f8b416ed740bc458df67355c77c92 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 24 Aug 2013 16:45:54 +0200 Subject: perf: Prevent race in unthrottling code The current throttling code triggers WARN below via following workload (only hit on AMD machine with 48 CPUs): # while [ 1 ]; do perf record perf bench sched messaging; done WARNING: at arch/x86/kernel/cpu/perf_event.c:1054 x86_pmu_start+0xc6/0x100() SNIP Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x61/0x80 [] warn_slowpath_null+0x1a/0x20 [] x86_pmu_start+0xc6/0x100 [] perf_adjust_freq_unthr_context.part.75+0x182/0x1a0 [] perf_event_task_tick+0xc8/0xf0 [] scheduler_tick+0xd1/0x140 [] update_process_times+0x66/0x80 [] tick_sched_handle.isra.15+0x25/0x60 [] tick_sched_timer+0x41/0x60 [] __run_hrtimer+0x74/0x1d0 [] ? tick_sched_handle.isra.15+0x60/0x60 [] hrtimer_interrupt+0xf7/0x240 [] smp_apic_timer_interrupt+0x69/0x9c [] apic_timer_interrupt+0x6d/0x80 [] ? __perf_event_task_sched_in+0x184/0x1a0 [] ? kfree_skbmem+0x37/0x90 [] ? __slab_free+0x1ac/0x30f [] ? kfree+0xfd/0x130 [] kmem_cache_free+0x1b2/0x1d0 [] kfree_skbmem+0x37/0x90 [] consume_skb+0x34/0x80 [] unix_stream_recvmsg+0x4e7/0x820 [] sock_aio_read.part.7+0x116/0x130 [] ? __perf_sw_event+0x19c/0x1e0 [] sock_aio_read+0x21/0x30 [] do_sync_read+0x80/0xb0 [] vfs_read+0x145/0x170 [] SyS_read+0x49/0xa0 [] ? __audit_syscall_exit+0x1f6/0x2a0 [] system_call_fastpath+0x16/0x1b ---[ end trace 622b7e226c4a766a ]--- The reason is a race in perf_event_task_tick() throttling code. The race flow (simplified code): - perf_throttled_count is per cpu variable and is CPU throttling flag, here starting with 0 - perf_throttled_seq is sequence/domain for allowed count of interrupts within the tick, gets increased each tick on single CPU (CPU bounded event): ... workload perf_event_task_tick: | | T0 inc(perf_throttled_seq) | T1 needs_unthr = xchg(perf_throttled_count, 0) == 0 tick gets interrupted: ... event gets throttled under new seq ... T2 last NMI comes, event is throttled - inc(perf_throttled_count) back to tick: | perf_adjust_freq_unthr_context: | | T3 unthrottling is skiped for event (needs_unthr == 0) | T4 event is stop and started via freq adjustment | tick ends ... workload ... no sample is hit for event ... perf_event_task_tick: | | T5 needs_unthr = xchg(perf_throttled_count, 0) != 0 (from T2) | T6 unthrottling is done on event (interrupts == MAX_INTERRUPTS) | event is already started (from T4) -> WARN Fixing this by not checking needs_unthr again and thus check all events for unthrottling. Signed-off-by: Jiri Olsa Reported-by: Jan Stancek Suggested-by: Peter Zijlstra Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1377355554-8934-1-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index f86599e..258eaaf 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2712,7 +2712,7 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, hwc = &event->hw; - if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) { + if (hwc->interrupts == MAX_INTERRUPTS) { hwc->interrupts = 0; perf_log_throttle(event, 1); event->pmu->start(event, 0); -- cgit v1.1 From 13d7a2410fa637f450a29ecb515ac318ee40c741 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 21 Aug 2013 12:10:24 +0200 Subject: perf: Add attr->mmap2 attribute to an event Adds a new PERF_RECORD_MMAP2 record type which is essence an expanded version of PERF_RECORD_MMAP. Used to request mmap records with more information about the mapping, including device major, minor and the inode number and generation for mappings associated with files or shared memory segments. Works for code and data (with attr->mmap_data set). Existing PERF_RECORD_MMAP record is unmodified by this patch. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Al Viro Link: http://lkml.kernel.org/r/1377079825-19057-2-git-send-email-eranian@google.com [ Added Al to the Cc:. Are the ino, maj/min exports of vma->vm_file OK? ] Signed-off-by: Ingo Molnar --- kernel/events/core.c | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 15d0f24..c7ee497 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4776,7 +4776,7 @@ next: /* * task tracking -- fork/exit * - * enabled by: attr.comm | attr.mmap | attr.mmap_data | attr.task + * enabled by: attr.comm | attr.mmap | attr.mmap2 | attr.mmap_data | attr.task */ struct perf_task_event { @@ -4796,8 +4796,9 @@ struct perf_task_event { static int perf_event_task_match(struct perf_event *event) { - return event->attr.comm || event->attr.mmap || - event->attr.mmap_data || event->attr.task; + return event->attr.comm || event->attr.mmap || + event->attr.mmap2 || event->attr.mmap_data || + event->attr.task; } static void perf_event_task_output(struct perf_event *event, @@ -4992,6 +4993,9 @@ struct perf_mmap_event { const char *file_name; int file_size; + int maj, min; + u64 ino; + u64 ino_generation; struct { struct perf_event_header header; @@ -5012,7 +5016,7 @@ static int perf_event_mmap_match(struct perf_event *event, int executable = vma->vm_flags & VM_EXEC; return (!executable && event->attr.mmap_data) || - (executable && event->attr.mmap); + (executable && (event->attr.mmap || event->attr.mmap2)); } static void perf_event_mmap_output(struct perf_event *event, @@ -5027,6 +5031,13 @@ static void perf_event_mmap_output(struct perf_event *event, if (!perf_event_mmap_match(event, data)) return; + if (event->attr.mmap2) { + mmap_event->event_id.header.type = PERF_RECORD_MMAP2; + mmap_event->event_id.header.size += sizeof(mmap_event->maj); + mmap_event->event_id.header.size += sizeof(mmap_event->min); + mmap_event->event_id.header.size += sizeof(mmap_event->ino); + } + perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); ret = perf_output_begin(&handle, event, mmap_event->event_id.header.size); @@ -5037,6 +5048,14 @@ static void perf_event_mmap_output(struct perf_event *event, mmap_event->event_id.tid = perf_event_tid(event, current); perf_output_put(&handle, mmap_event->event_id); + + if (event->attr.mmap2) { + perf_output_put(&handle, mmap_event->maj); + perf_output_put(&handle, mmap_event->min); + perf_output_put(&handle, mmap_event->ino); + perf_output_put(&handle, mmap_event->ino_generation); + } + __output_copy(&handle, mmap_event->file_name, mmap_event->file_size); @@ -5051,6 +5070,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) { struct vm_area_struct *vma = mmap_event->vma; struct file *file = vma->vm_file; + int maj = 0, min = 0; + u64 ino = 0, gen = 0; unsigned int size; char tmp[16]; char *buf = NULL; @@ -5059,6 +5080,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) memset(tmp, 0, sizeof(tmp)); if (file) { + struct inode *inode; + dev_t dev; /* * d_path works from the end of the rb backwards, so we * need to add enough zero bytes after the string to handle @@ -5074,6 +5097,13 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) name = strncpy(tmp, "//toolong", sizeof(tmp)); goto got_name; } + inode = file_inode(vma->vm_file); + dev = inode->i_sb->s_dev; + ino = inode->i_ino; + gen = inode->i_generation; + maj = MAJOR(dev); + min = MINOR(dev); + } else { if (arch_vma_name(mmap_event->vma)) { name = strncpy(tmp, arch_vma_name(mmap_event->vma), @@ -5104,6 +5134,10 @@ got_name: mmap_event->file_name = name; mmap_event->file_size = size; + mmap_event->maj = maj; + mmap_event->min = min; + mmap_event->ino = ino; + mmap_event->ino_generation = gen; if (!(vma->vm_flags & VM_EXEC)) mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA; @@ -5140,6 +5174,10 @@ void perf_event_mmap(struct vm_area_struct *vma) .len = vma->vm_end - vma->vm_start, .pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT, }, + /* .maj (attr_mmap2 only) */ + /* .min (attr_mmap2 only) */ + /* .ino (attr_mmap2 only) */ + /* .ino_generation (attr_mmap2 only) */ }; perf_event_mmap_event(&mmap_event); -- cgit v1.1