From 516792e67c39d31701641ab355acdb9cbfec0643 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 31 Aug 2015 15:12:56 +0300 Subject: perf/core: Delete PF_EXITING checks from perf_cgroup_exit() callback cgroup_exit() is not called from copy_process() after commit: e8604cb43690 ("cgroup: fix spurious lockdep warning in cgroup_exit()") from do_exit(). So this check is useless and the comment is obsolete. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/55E444C8.3020402@odin.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index f548f69..76e64be 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9297,14 +9297,6 @@ static void perf_cgroup_exit(struct cgroup_subsys_state *css, struct cgroup_subsys_state *old_css, struct task_struct *task) { - /* - * cgroup_exit() is called in the copy_process() failure path. - * Ignore this case since the task hasn't ran yet, this avoids - * trying to poke a half freed task state from generic code. - */ - if (!(task->flags & PF_EXITING)) - return; - task_function_call(task, __perf_cgroup_move, task); } -- cgit v1.1 From fbbe07011581990ef74dfac06dc8511b1a14badb Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:45 -0700 Subject: perf/core: Add a 'flags' parameter to the PMU transactional interfaces Currently, the PMU interface allows reading only one counter at a time. But some PMUs like the 24x7 counters in Power, support reading several counters at once. To leveage this functionality, extend the transaction interface to support a "transaction type". The first type, PERF_PMU_TXN_ADD, refers to the existing transactions, i.e. used to _schedule_ all the events on the PMU as a group. A second transaction type, PERF_PMU_TXN_READ, will be used in a follow-on patch, by the 24x7 counters to read several counters at once. Extend the transaction interfaces to the PMU to accept a 'txn_flags' parameter and use this parameter to ignore any transactions that are not of type PERF_PMU_TXN_ADD. Thanks to Peter Zijlstra for his input. Signed-off-by: Sukadev Bhattiprolu [peterz: s390 compile fix] Signed-off-by: Peter Zijlstra (Intel) Acked-by: Michael Ellerman Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-3-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 76e64be..c80cee8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1914,7 +1914,7 @@ group_sched_in(struct perf_event *group_event, if (group_event->state == PERF_EVENT_STATE_OFF) return 0; - pmu->start_txn(pmu); + pmu->start_txn(pmu, PERF_PMU_TXN_ADD); if (event_sched_in(group_event, cpuctx, ctx)) { pmu->cancel_txn(pmu); @@ -7267,24 +7267,49 @@ static void perf_pmu_nop_void(struct pmu *pmu) { } +static void perf_pmu_nop_txn(struct pmu *pmu, unsigned int flags) +{ +} + static int perf_pmu_nop_int(struct pmu *pmu) { return 0; } -static void perf_pmu_start_txn(struct pmu *pmu) +DEFINE_PER_CPU(unsigned int, nop_txn_flags); + +static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags) { + __this_cpu_write(nop_txn_flags, flags); + + if (flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); } static int perf_pmu_commit_txn(struct pmu *pmu) { + unsigned int flags = __this_cpu_read(nop_txn_flags); + + __this_cpu_write(nop_txn_flags, 0); + + if (flags & ~PERF_PMU_TXN_ADD) + return 0; + perf_pmu_enable(pmu); return 0; } static void perf_pmu_cancel_txn(struct pmu *pmu) { + unsigned int flags = __this_cpu_read(nop_txn_flags); + + __this_cpu_write(nop_txn_flags, 0); + + if (flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_enable(pmu); } @@ -7523,7 +7548,7 @@ got_cpu_context: pmu->commit_txn = perf_pmu_commit_txn; pmu->cancel_txn = perf_pmu_cancel_txn; } else { - pmu->start_txn = perf_pmu_nop_void; + pmu->start_txn = perf_pmu_nop_txn; pmu->commit_txn = perf_pmu_nop_int; pmu->cancel_txn = perf_pmu_nop_void; } -- cgit v1.1 From 01add3eaf1b25e497b14ca210f3bfe5f5dd2b112 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:46 -0700 Subject: perf/core: Split perf_event_read() and perf_event_count() perf_event_read() does two things: - call the PMU to read/update the counter value, and - compute the total count of the event and its children Not all callers need both. perf_event_reset() for instance needs the first piece but doesn't need the second. Similarly, when we implement the ability to read a group of events using the transaction interface, we would need the two pieces done independently. Break up perf_event_read() and have it just read/update the counter and have the callers compute the total count if necessary. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-4-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index c80cee8..260bf8c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3275,7 +3275,7 @@ u64 perf_event_read_local(struct perf_event *event) return val; } -static u64 perf_event_read(struct perf_event *event) +static void perf_event_read(struct perf_event *event) { /* * If event is enabled and currently active on a CPU, update the @@ -3301,8 +3301,6 @@ static u64 perf_event_read(struct perf_event *event) update_event_times(event); raw_spin_unlock_irqrestore(&ctx->lock, flags); } - - return perf_event_count(event); } /* @@ -3818,14 +3816,18 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) *running = 0; mutex_lock(&event->child_mutex); - total += perf_event_read(event); + + perf_event_read(event); + total += perf_event_count(event); + *enabled += event->total_time_enabled + atomic64_read(&event->child_total_time_enabled); *running += event->total_time_running + atomic64_read(&event->child_total_time_running); list_for_each_entry(child, &event->child_list, child_list) { - total += perf_event_read(child); + perf_event_read(child); + total += perf_event_count(child); *enabled += child->total_time_enabled; *running += child->total_time_running; } @@ -3985,7 +3987,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - (void)perf_event_read(event); + perf_event_read(event); local64_set(&event->count, 0); perf_event_update_userpage(event); } -- cgit v1.1 From b15f495b4e9295cf21065d8569835a2f18cfe41b Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Thu, 3 Sep 2015 20:07:47 -0700 Subject: perf/core: Rename perf_event_read_{one,group}, perf_read_hw In order to free up the perf_event_read_group() name: s/perf_event_read_\(one\|group\)/perf_read_\1/g s/perf_read_hw/__perf_read/g Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-5-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 260bf8c..67b7dba 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3742,7 +3742,7 @@ static void put_event(struct perf_event *event) * see the comment there. * * 2) there is a lock-inversion with mmap_sem through - * perf_event_read_group(), which takes faults while + * perf_read_group(), which takes faults while * holding ctx->mutex, however this is called after * the last filedesc died, so there is no possibility * to trigger the AB-BA case. @@ -3837,7 +3837,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static int perf_event_read_group(struct perf_event *event, +static int perf_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event->group_leader, *sub; @@ -3885,7 +3885,7 @@ static int perf_event_read_group(struct perf_event *event, return ret; } -static int perf_event_read_one(struct perf_event *event, +static int perf_read_one(struct perf_event *event, u64 read_format, char __user *buf) { u64 enabled, running; @@ -3923,7 +3923,7 @@ static bool is_event_hup(struct perf_event *event) * Read the performance event - simple non blocking version for now */ static ssize_t -perf_read_hw(struct perf_event *event, char __user *buf, size_t count) +__perf_read(struct perf_event *event, char __user *buf, size_t count) { u64 read_format = event->attr.read_format; int ret; @@ -3941,9 +3941,9 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) WARN_ON_ONCE(event->ctx->parent_ctx); if (read_format & PERF_FORMAT_GROUP) - ret = perf_event_read_group(event, read_format, buf); + ret = perf_read_group(event, read_format, buf); else - ret = perf_event_read_one(event, read_format, buf); + ret = perf_read_one(event, read_format, buf); return ret; } @@ -3956,7 +3956,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) int ret; ctx = perf_event_ctx_lock(event); - ret = perf_read_hw(event, buf, count); + ret = __perf_read(event, buf, count); perf_event_ctx_unlock(event, ctx); return ret; -- cgit v1.1 From 0492d4c5b8c4dc3a7591bf6fa0e35d117812cc85 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Sep 2015 20:07:48 -0700 Subject: perf/core: Add group reads to perf_event_read() Enable perf_event_read() to update entire groups at once, this will be useful for read transactions. Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Sukadev Bhattiprolu Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20150723080435.GE25159@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 67b7dba..4d89866 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3184,12 +3184,18 @@ void perf_event_exec(void) rcu_read_unlock(); } +struct perf_read_data { + struct perf_event *event; + bool group; +}; + /* * Cross CPU call to read the hardware event */ static void __perf_event_read(void *info) { - struct perf_event *event = info; + struct perf_read_data *data = info; + struct perf_event *sub, *event = data->event; struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); @@ -3208,9 +3214,21 @@ static void __perf_event_read(void *info) update_context_time(ctx); update_cgrp_time_from_event(event); } + update_event_times(event); if (event->state == PERF_EVENT_STATE_ACTIVE) event->pmu->read(event); + + if (!data->group) + goto unlock; + + list_for_each_entry(sub, &event->sibling_list, group_entry) { + update_event_times(sub); + if (sub->state == PERF_EVENT_STATE_ACTIVE) + sub->pmu->read(sub); + } + +unlock: raw_spin_unlock(&ctx->lock); } @@ -3275,15 +3293,19 @@ u64 perf_event_read_local(struct perf_event *event) return val; } -static void perf_event_read(struct perf_event *event) +static void perf_event_read(struct perf_event *event, bool group) { /* * If event is enabled and currently active on a CPU, update the * value in the event structure: */ if (event->state == PERF_EVENT_STATE_ACTIVE) { + struct perf_read_data data = { + .event = event, + .group = group, + }; smp_call_function_single(event->oncpu, - __perf_event_read, event, 1); + __perf_event_read, &data, 1); } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; @@ -3298,7 +3320,10 @@ static void perf_event_read(struct perf_event *event) update_context_time(ctx); update_cgrp_time_from_event(event); } - update_event_times(event); + if (group) + update_group_times(event); + else + update_event_times(event); raw_spin_unlock_irqrestore(&ctx->lock, flags); } } @@ -3817,7 +3842,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) mutex_lock(&event->child_mutex); - perf_event_read(event); + perf_event_read(event, false); total += perf_event_count(event); *enabled += event->total_time_enabled + @@ -3826,7 +3851,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) atomic64_read(&event->child_total_time_running); list_for_each_entry(child, &event->child_list, child_list) { - perf_event_read(child); + perf_event_read(child, false); total += perf_event_count(child); *enabled += child->total_time_enabled; *running += child->total_time_running; @@ -3987,7 +4012,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - perf_event_read(event); + perf_event_read(event, false); local64_set(&event->count, 0); perf_event_update_userpage(event); } -- cgit v1.1 From fa8c269353d560b7c28119ad7617029f92e40b15 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Sep 2015 20:07:49 -0700 Subject: perf/core: Invert perf_read_group() loops In order to enable the use of perf_event_read(.group = true), we need to invert the sibling-child loop nesting of perf_read_group(). Currently we iterate the child list for each sibling, this precludes using group reads. Flip things around so we iterate each group for each child. Signed-off-by: Peter Zijlstra (Intel) [ Made the patch compile and things. ] Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-7-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 85 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 30 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 4d89866..dd2a0b8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3862,50 +3862,75 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static int perf_read_group(struct perf_event *event, - u64 read_format, char __user *buf) +static void __perf_read_group_add(struct perf_event *leader, + u64 read_format, u64 *values) { - struct perf_event *leader = event->group_leader, *sub; - struct perf_event_context *ctx = leader->ctx; - int n = 0, size = 0, ret; - u64 count, enabled, running; - u64 values[5]; + struct perf_event *sub; + int n = 1; /* skip @nr */ - lockdep_assert_held(&ctx->mutex); + perf_event_read(leader, true); + + /* + * Since we co-schedule groups, {enabled,running} times of siblings + * will be identical to those of the leader, so we only publish one + * set. + */ + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] += leader->total_time_enabled + + atomic64_read(&leader->child_total_time_enabled); + } - count = perf_event_read_value(leader, &enabled, &running); + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] += leader->total_time_running + + atomic64_read(&leader->child_total_time_running); + } - values[n++] = 1 + leader->nr_siblings; - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = enabled; - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = running; - values[n++] = count; + /* + * Write {count,id} tuples for every sibling. + */ + values[n++] += perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); - size = n * sizeof(u64); + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + values[n++] += perf_event_count(sub); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(sub); + } +} - if (copy_to_user(buf, values, size)) - return -EFAULT; +static int perf_read_group(struct perf_event *event, + u64 read_format, char __user *buf) +{ + struct perf_event *leader = event->group_leader, *child; + struct perf_event_context *ctx = leader->ctx; + int ret = event->read_size; + u64 *values; - ret = size; + lockdep_assert_held(&ctx->mutex); - list_for_each_entry(sub, &leader->sibling_list, group_entry) { - n = 0; + values = kzalloc(event->read_size, GFP_KERNEL); + if (!values) + return -ENOMEM; - values[n++] = perf_event_read_value(sub, &enabled, &running); - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_event_id(sub); + values[0] = 1 + leader->nr_siblings; + + /* + * By locking the child_mutex of the leader we effectively + * lock the child list of all siblings.. XXX explain how. + */ + mutex_lock(&leader->child_mutex); - size = n * sizeof(u64); + __perf_read_group_add(leader, read_format, values); + list_for_each_entry(child, &leader->child_list, child_list) + __perf_read_group_add(child, read_format, values); - if (copy_to_user(buf + ret, values, size)) { - return -EFAULT; - } + mutex_unlock(&leader->child_mutex); - ret += size; - } + if (copy_to_user(buf, values, event->read_size)) + ret = -EFAULT; + + kfree(values); return ret; } -- cgit v1.1 From 7d88962e230c8342080e7e2fe9dd5be43dc13b79 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:50 -0700 Subject: perf/core: Add return value for perf_event_read() When we implement the ability to read several counters at once (using the PERF_PMU_TXN_READ transaction interface), perf_event_read() can fail when the 'group' parameter is true (eg: trying to read too many events at once). For now, have perf_event_read() return an integer. Ignore the return value when the 'group' parameter is false. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-8-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index dd2a0b8..ade04df 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3187,6 +3187,7 @@ void perf_event_exec(void) struct perf_read_data { struct perf_event *event; bool group; + int ret; }; /* @@ -3227,6 +3228,7 @@ static void __perf_event_read(void *info) if (sub->state == PERF_EVENT_STATE_ACTIVE) sub->pmu->read(sub); } + data->ret = 0; unlock: raw_spin_unlock(&ctx->lock); @@ -3293,8 +3295,10 @@ u64 perf_event_read_local(struct perf_event *event) return val; } -static void perf_event_read(struct perf_event *event, bool group) +static int perf_event_read(struct perf_event *event, bool group) { + int ret = 0; + /* * If event is enabled and currently active on a CPU, update the * value in the event structure: @@ -3303,9 +3307,11 @@ static void perf_event_read(struct perf_event *event, bool group) struct perf_read_data data = { .event = event, .group = group, + .ret = 0, }; smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); + ret = data.ret; } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; @@ -3326,6 +3332,8 @@ static void perf_event_read(struct perf_event *event, bool group) update_event_times(event); raw_spin_unlock_irqrestore(&ctx->lock, flags); } + + return ret; } /* @@ -3842,7 +3850,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) mutex_lock(&event->child_mutex); - perf_event_read(event, false); + (void)perf_event_read(event, false); total += perf_event_count(event); *enabled += event->total_time_enabled + @@ -3851,7 +3859,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) atomic64_read(&event->child_total_time_running); list_for_each_entry(child, &event->child_list, child_list) { - perf_event_read(child, false); + (void)perf_event_read(child, false); total += perf_event_count(child); *enabled += child->total_time_enabled; *running += child->total_time_running; @@ -3862,13 +3870,16 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static void __perf_read_group_add(struct perf_event *leader, +static int __perf_read_group_add(struct perf_event *leader, u64 read_format, u64 *values) { struct perf_event *sub; int n = 1; /* skip @nr */ + int ret; - perf_event_read(leader, true); + ret = perf_event_read(leader, true); + if (ret) + return ret; /* * Since we co-schedule groups, {enabled,running} times of siblings @@ -3897,6 +3908,8 @@ static void __perf_read_group_add(struct perf_event *leader, if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); } + + return 0; } static int perf_read_group(struct perf_event *event, @@ -3904,7 +3917,7 @@ static int perf_read_group(struct perf_event *event, { struct perf_event *leader = event->group_leader, *child; struct perf_event_context *ctx = leader->ctx; - int ret = event->read_size; + int ret; u64 *values; lockdep_assert_held(&ctx->mutex); @@ -3921,17 +3934,27 @@ static int perf_read_group(struct perf_event *event, */ mutex_lock(&leader->child_mutex); - __perf_read_group_add(leader, read_format, values); - list_for_each_entry(child, &leader->child_list, child_list) - __perf_read_group_add(child, read_format, values); + ret = __perf_read_group_add(leader, read_format, values); + if (ret) + goto unlock; + + list_for_each_entry(child, &leader->child_list, child_list) { + ret = __perf_read_group_add(child, read_format, values); + if (ret) + goto unlock; + } mutex_unlock(&leader->child_mutex); + ret = event->read_size; if (copy_to_user(buf, values, event->read_size)) ret = -EFAULT; + goto out; +unlock: + mutex_unlock(&leader->child_mutex); +out: kfree(values); - return ret; } @@ -4037,7 +4060,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - perf_event_read(event, false); + (void)perf_event_read(event, false); local64_set(&event->count, 0); perf_event_update_userpage(event); } -- cgit v1.1 From 4a00c16e552ea5e71756cd29cd2df7557ec9cac4 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:51 -0700 Subject: perf/core: Define PERF_PMU_TXN_READ interface Define a new PERF_PMU_TXN_READ interface to read a group of counters at once. pmu->start_txn() // Initialize before first event for each event in group pmu->read(event); // Queue each event to be read rc = pmu->commit_txn() // Read/update all queued counters Note that we use this interface with all PMUs. PMUs that implement this interface use the ->read() operation to _queue_ the counters to be read and use ->commit_txn() to actually read all the queued counters at once. PMUs that don't implement PERF_PMU_TXN_READ ignore ->start_txn() and ->commit_txn() and continue to read counters one at a time. Thanks to input from Peter Zijlstra. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-9-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index ade04df..55b0f7c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3199,6 +3199,7 @@ static void __perf_event_read(void *info) struct perf_event *sub, *event = data->event; struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); + struct pmu *pmu = event->pmu; /* * If this is a task context, we need to check whether it is @@ -3217,18 +3218,31 @@ static void __perf_event_read(void *info) } update_event_times(event); - if (event->state == PERF_EVENT_STATE_ACTIVE) - event->pmu->read(event); + if (event->state != PERF_EVENT_STATE_ACTIVE) + goto unlock; - if (!data->group) + if (!data->group) { + pmu->read(event); + data->ret = 0; goto unlock; + } + + pmu->start_txn(pmu, PERF_PMU_TXN_READ); + + pmu->read(event); list_for_each_entry(sub, &event->sibling_list, group_entry) { update_event_times(sub); - if (sub->state == PERF_EVENT_STATE_ACTIVE) + if (sub->state == PERF_EVENT_STATE_ACTIVE) { + /* + * Use sibling's PMU rather than @event's since + * sibling could be on different (eg: software) PMU. + */ sub->pmu->read(sub); + } } - data->ret = 0; + + data->ret = pmu->commit_txn(pmu); unlock: raw_spin_unlock(&ctx->lock); -- cgit v1.1 From 18ab2cd3ee9d52dc64c5ae984146a261a328c4e8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 27 Sep 2015 23:25:50 +0800 Subject: perf/core, perf/x86: Change needlessly global functions and a variable to static Fixes various sparse warnings. Signed-off-by: Geliang Tang Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/70c14234da1bed6e3e67b9c419e2d5e376ab4f32.1443367286.git.geliangtang@163.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index f87b434..ea02109 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -196,7 +196,7 @@ static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS; static int perf_sample_allowed_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100; -void update_perf_cpu_limits(void) +static void update_perf_cpu_limits(void) { u64 tmp = perf_sample_period_ns; @@ -472,7 +472,7 @@ perf_cgroup_set_timestamp(struct task_struct *task, * mode SWOUT : schedule out everything * mode SWIN : schedule in based on cgroup for next */ -void perf_cgroup_switch(struct task_struct *task, int mode) +static void perf_cgroup_switch(struct task_struct *task, int mode) { struct perf_cpu_context *cpuctx; struct pmu *pmu; @@ -7390,7 +7390,7 @@ static int perf_pmu_nop_int(struct pmu *pmu) return 0; } -DEFINE_PER_CPU(unsigned int, nop_txn_flags); +static DEFINE_PER_CPU(unsigned int, nop_txn_flags); static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags) { @@ -7750,7 +7750,7 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) return ret; } -struct pmu *perf_init_event(struct perf_event *event) +static struct pmu *perf_init_event(struct perf_event *event) { struct pmu *pmu = NULL; int idx; -- cgit v1.1