summaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c775
1 files changed, 439 insertions, 336 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 9ae4dbc..c772a3d4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -283,14 +283,15 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
static void
list_add_event(struct perf_event *event, struct perf_event_context *ctx)
{
- struct perf_event *group_leader = event->group_leader;
+ WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
+ event->attach_state |= PERF_ATTACH_CONTEXT;
/*
- * Depending on whether it is a standalone or sibling event,
- * add it straight to the context's event list, or to the group
- * leader's sibling list:
+ * If we're a stand alone event or group leader, we go to the context
+ * list, group events are kept attached to the group so that
+ * perf_group_detach can, at all times, locate all siblings.
*/
- if (group_leader == event) {
+ if (event->group_leader == event) {
struct list_head *list;
if (is_software_event(event))
@@ -298,13 +299,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
list = ctx_group_list(event, ctx);
list_add_tail(&event->group_entry, list);
- } else {
- if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
- !is_software_event(event))
- group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
-
- list_add_tail(&event->group_entry, &group_leader->sibling_list);
- group_leader->nr_siblings++;
}
list_add_rcu(&event->event_entry, &ctx->event_list);
@@ -313,6 +307,24 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
ctx->nr_stat++;
}
+static void perf_group_attach(struct perf_event *event)
+{
+ struct perf_event *group_leader = event->group_leader;
+
+ WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP);
+ event->attach_state |= PERF_ATTACH_GROUP;
+
+ if (group_leader == event)
+ return;
+
+ if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
+ !is_software_event(event))
+ group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+
+ list_add_tail(&event->group_entry, &group_leader->sibling_list);
+ group_leader->nr_siblings++;
+}
+
/*
* Remove a event from the lists for its context.
* Must be called with ctx->mutex and ctx->lock held.
@@ -320,17 +332,22 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
static void
list_del_event(struct perf_event *event, struct perf_event_context *ctx)
{
- if (list_empty(&event->group_entry))
+ /*
+ * We can have double detach due to exit/hot-unplug + close.
+ */
+ if (!(event->attach_state & PERF_ATTACH_CONTEXT))
return;
+
+ event->attach_state &= ~PERF_ATTACH_CONTEXT;
+
ctx->nr_events--;
if (event->attr.inherit_stat)
ctx->nr_stat--;
- list_del_init(&event->group_entry);
list_del_rcu(&event->event_entry);
- if (event->group_leader != event)
- event->group_leader->nr_siblings--;
+ if (event->group_leader == event)
+ list_del_init(&event->group_entry);
update_group_times(event);
@@ -345,21 +362,39 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
event->state = PERF_EVENT_STATE_OFF;
}
-static void
-perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx)
+static void perf_group_detach(struct perf_event *event)
{
struct perf_event *sibling, *tmp;
+ struct list_head *list = NULL;
+
+ /*
+ * We can have double detach due to exit/hot-unplug + close.
+ */
+ if (!(event->attach_state & PERF_ATTACH_GROUP))
+ return;
+
+ event->attach_state &= ~PERF_ATTACH_GROUP;
+
+ /*
+ * If this is a sibling, remove it from its group.
+ */
+ if (event->group_leader != event) {
+ list_del_init(&event->group_entry);
+ event->group_leader->nr_siblings--;
+ return;
+ }
+
+ if (!list_empty(&event->group_entry))
+ list = &event->group_entry;
/*
* If this was a group event with sibling events then
* upgrade the siblings to singleton events by adding them
- * to the context list directly:
+ * to whatever list we are on.
*/
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
- struct list_head *list;
-
- list = ctx_group_list(event, ctx);
- list_move_tail(&sibling->group_entry, list);
+ if (list)
+ list_move_tail(&sibling->group_entry, list);
sibling->group_leader = sibling;
/* Inherit group flags from the previous leader */
@@ -640,7 +675,6 @@ group_sched_in(struct perf_event *group_event,
struct perf_event *event, *partial_group = NULL;
const struct pmu *pmu = group_event->pmu;
bool txn = false;
- int ret;
if (group_event->state == PERF_EVENT_STATE_OFF)
return 0;
@@ -652,8 +686,11 @@ group_sched_in(struct perf_event *group_event,
if (txn)
pmu->start_txn(pmu);
- if (event_sched_in(group_event, cpuctx, ctx))
+ if (event_sched_in(group_event, cpuctx, ctx)) {
+ if (txn)
+ pmu->cancel_txn(pmu);
return -EAGAIN;
+ }
/*
* Schedule in siblings as one group (if any):
@@ -665,19 +702,10 @@ group_sched_in(struct perf_event *group_event,
}
}
- if (!txn)
- return 0;
-
- ret = pmu->commit_txn(pmu);
- if (!ret) {
- pmu->cancel_txn(pmu);
+ if (!txn || !pmu->commit_txn(pmu))
return 0;
- }
group_error:
- if (txn)
- pmu->cancel_txn(pmu);
-
/*
* Groups can be scheduled in as one unit only, so undo any
* partial group before returning:
@@ -689,6 +717,9 @@ group_error:
}
event_sched_out(group_event, cpuctx, ctx);
+ if (txn)
+ pmu->cancel_txn(pmu);
+
return -EAGAIN;
}
@@ -727,6 +758,7 @@ static void add_event_to_ctx(struct perf_event *event,
struct perf_event_context *ctx)
{
list_add_event(event, ctx);
+ perf_group_attach(event);
event->tstamp_enabled = ctx->time;
event->tstamp_running = ctx->time;
event->tstamp_stopped = ctx->time;
@@ -1116,9 +1148,9 @@ static void __perf_event_sync_stat(struct perf_event *event,
* In order to keep per-task stats reliable we need to flip the event
* values when we flip the contexts.
*/
- value = atomic64_read(&next_event->count);
- value = atomic64_xchg(&event->count, value);
- atomic64_set(&next_event->count, value);
+ value = local64_read(&next_event->count);
+ value = local64_xchg(&event->count, value);
+ local64_set(&next_event->count, value);
swap(event->total_time_enabled, next_event->total_time_enabled);
swap(event->total_time_running, next_event->total_time_running);
@@ -1468,6 +1500,9 @@ do { \
divisor = nsec * frequency;
}
+ if (!divisor)
+ return dividend;
+
return div64_u64(dividend, divisor);
}
@@ -1490,7 +1525,7 @@ static int perf_event_start(struct perf_event *event)
static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
{
struct hw_perf_event *hwc = &event->hw;
- u64 period, sample_period;
+ s64 period, sample_period;
s64 delta;
period = perf_calculate_period(event, nsec, count);
@@ -1505,10 +1540,10 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
hwc->sample_period = sample_period;
- if (atomic64_read(&hwc->period_left) > 8*sample_period) {
+ if (local64_read(&hwc->period_left) > 8*sample_period) {
perf_disable();
perf_event_stop(event);
- atomic64_set(&hwc->period_left, 0);
+ local64_set(&hwc->period_left, 0);
perf_event_start(event);
perf_enable();
}
@@ -1549,7 +1584,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
perf_disable();
event->pmu->read(event);
- now = atomic64_read(&event->count);
+ now = local64_read(&event->count);
delta = now - hwc->freq_count_stamp;
hwc->freq_count_stamp = now;
@@ -1701,6 +1736,11 @@ static void __perf_event_read(void *info)
event->pmu->read(event);
}
+static inline u64 perf_event_count(struct perf_event *event)
+{
+ return local64_read(&event->count) + atomic64_read(&event->child_count);
+}
+
static u64 perf_event_read(struct perf_event *event)
{
/*
@@ -1720,7 +1760,7 @@ static u64 perf_event_read(struct perf_event *event)
raw_spin_unlock_irqrestore(&ctx->lock, flags);
}
- return atomic64_read(&event->count);
+ return perf_event_count(event);
}
/*
@@ -1841,6 +1881,7 @@ static void free_event_rcu(struct rcu_head *head)
}
static void perf_pending_sync(struct perf_event *event);
+static void perf_buffer_put(struct perf_buffer *buffer);
static void free_event(struct perf_event *event)
{
@@ -1848,7 +1889,7 @@ static void free_event(struct perf_event *event)
if (!event->parent) {
atomic_dec(&nr_events);
- if (event->attr.mmap)
+ if (event->attr.mmap || event->attr.mmap_data)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
atomic_dec(&nr_comm_events);
@@ -1856,9 +1897,9 @@ static void free_event(struct perf_event *event)
atomic_dec(&nr_task_events);
}
- if (event->output) {
- fput(event->output->filp);
- event->output = NULL;
+ if (event->buffer) {
+ perf_buffer_put(event->buffer);
+ event->buffer = NULL;
}
if (event->destroy)
@@ -1893,8 +1934,8 @@ int perf_event_release_kernel(struct perf_event *event)
*/
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
raw_spin_lock_irq(&ctx->lock);
+ perf_group_detach(event);
list_del_event(event, ctx);
- perf_destroy_group(event, ctx);
raw_spin_unlock_irq(&ctx->lock);
mutex_unlock(&ctx->mutex);
@@ -2083,13 +2124,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
static unsigned int perf_poll(struct file *file, poll_table *wait)
{
struct perf_event *event = file->private_data;
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
unsigned int events = POLL_HUP;
rcu_read_lock();
- data = rcu_dereference(event->data);
- if (data)
- events = atomic_xchg(&data->poll, 0);
+ buffer = rcu_dereference(event->buffer);
+ if (buffer)
+ events = atomic_xchg(&buffer->poll, 0);
rcu_read_unlock();
poll_wait(file, &event->waitq, wait);
@@ -2100,7 +2141,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
static void perf_event_reset(struct perf_event *event)
{
(void)perf_event_read(event);
- atomic64_set(&event->count, 0);
+ local64_set(&event->count, 0);
perf_event_update_userpage(event);
}
@@ -2175,7 +2216,27 @@ unlock:
return ret;
}
-static int perf_event_set_output(struct perf_event *event, int output_fd);
+static const struct file_operations perf_fops;
+
+static struct perf_event *perf_fget_light(int fd, int *fput_needed)
+{
+ struct file *file;
+
+ file = fget_light(fd, fput_needed);
+ if (!file)
+ return ERR_PTR(-EBADF);
+
+ if (file->f_op != &perf_fops) {
+ fput_light(file, *fput_needed);
+ *fput_needed = 0;
+ return ERR_PTR(-EBADF);
+ }
+
+ return file->private_data;
+}
+
+static int perf_event_set_output(struct perf_event *event,
+ struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -2202,7 +2263,23 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return perf_event_period(event, (u64 __user *)arg);
case PERF_EVENT_IOC_SET_OUTPUT:
- return perf_event_set_output(event, arg);
+ {
+ struct perf_event *output_event = NULL;
+ int fput_needed = 0;
+ int ret;
+
+ if (arg != -1) {
+ output_event = perf_fget_light(arg, &fput_needed);
+ if (IS_ERR(output_event))
+ return PTR_ERR(output_event);
+ }
+
+ ret = perf_event_set_output(event, output_event);
+ if (output_event)
+ fput_light(output_event->filp, fput_needed);
+
+ return ret;
+ }
case PERF_EVENT_IOC_SET_FILTER:
return perf_event_set_filter(event, (void __user *)arg);
@@ -2263,14 +2340,14 @@ static int perf_event_index(struct perf_event *event)
void perf_event_update_userpage(struct perf_event *event)
{
struct perf_event_mmap_page *userpg;
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
rcu_read_lock();
- data = rcu_dereference(event->data);
- if (!data)
+ buffer = rcu_dereference(event->buffer);
+ if (!buffer)
goto unlock;
- userpg = data->user_page;
+ userpg = buffer->user_page;
/*
* Disable preemption so as to not let the corresponding user-space
@@ -2280,9 +2357,9 @@ void perf_event_update_userpage(struct perf_event *event)
++userpg->lock;
barrier();
userpg->index = perf_event_index(event);
- userpg->offset = atomic64_read(&event->count);
+ userpg->offset = perf_event_count(event);
if (event->state == PERF_EVENT_STATE_ACTIVE)
- userpg->offset -= atomic64_read(&event->hw.prev_count);
+ userpg->offset -= local64_read(&event->hw.prev_count);
userpg->time_enabled = event->total_time_enabled +
atomic64_read(&event->child_total_time_enabled);
@@ -2297,6 +2374,25 @@ unlock:
rcu_read_unlock();
}
+static unsigned long perf_data_size(struct perf_buffer *buffer);
+
+static void
+perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
+{
+ long max_size = perf_data_size(buffer);
+
+ if (watermark)
+ buffer->watermark = min(max_size, watermark);
+
+ if (!buffer->watermark)
+ buffer->watermark = max_size / 2;
+
+ if (flags & PERF_BUFFER_WRITABLE)
+ buffer->writable = 1;
+
+ atomic_set(&buffer->refcount, 1);
+}
+
#ifndef CONFIG_PERF_USE_VMALLOC
/*
@@ -2304,15 +2400,15 @@ unlock:
*/
static struct page *
-perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
{
- if (pgoff > data->nr_pages)
+ if (pgoff > buffer->nr_pages)
return NULL;
if (pgoff == 0)
- return virt_to_page(data->user_page);
+ return virt_to_page(buffer->user_page);
- return virt_to_page(data->data_pages[pgoff - 1]);
+ return virt_to_page(buffer->data_pages[pgoff - 1]);
}
static void *perf_mmap_alloc_page(int cpu)
@@ -2328,44 +2424,44 @@ static void *perf_mmap_alloc_page(int cpu)
return page_address(page);
}
-static struct perf_mmap_data *
-perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_buffer *
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
{
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
unsigned long size;
int i;
- WARN_ON(atomic_read(&event->mmap_count));
-
- size = sizeof(struct perf_mmap_data);
+ size = sizeof(struct perf_buffer);
size += nr_pages * sizeof(void *);
- data = kzalloc(size, GFP_KERNEL);
- if (!data)
+ buffer = kzalloc(size, GFP_KERNEL);
+ if (!buffer)
goto fail;
- data->user_page = perf_mmap_alloc_page(event->cpu);
- if (!data->user_page)
+ buffer->user_page = perf_mmap_alloc_page(cpu);
+ if (!buffer->user_page)
goto fail_user_page;
for (i = 0; i < nr_pages; i++) {
- data->data_pages[i] = perf_mmap_alloc_page(event->cpu);
- if (!data->data_pages[i])
+ buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
+ if (!buffer->data_pages[i])
goto fail_data_pages;
}
- data->nr_pages = nr_pages;
+ buffer->nr_pages = nr_pages;
+
+ perf_buffer_init(buffer, watermark, flags);
- return data;
+ return buffer;
fail_data_pages:
for (i--; i >= 0; i--)
- free_page((unsigned long)data->data_pages[i]);
+ free_page((unsigned long)buffer->data_pages[i]);
- free_page((unsigned long)data->user_page);
+ free_page((unsigned long)buffer->user_page);
fail_user_page:
- kfree(data);
+ kfree(buffer);
fail:
return NULL;
@@ -2379,17 +2475,17 @@ static void perf_mmap_free_page(unsigned long addr)
__free_page(page);
}
-static void perf_mmap_data_free(struct perf_mmap_data *data)
+static void perf_buffer_free(struct perf_buffer *buffer)
{
int i;
- perf_mmap_free_page((unsigned long)data->user_page);
- for (i = 0; i < data->nr_pages; i++)
- perf_mmap_free_page((unsigned long)data->data_pages[i]);
- kfree(data);
+ perf_mmap_free_page((unsigned long)buffer->user_page);
+ for (i = 0; i < buffer->nr_pages; i++)
+ perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
+ kfree(buffer);
}
-static inline int page_order(struct perf_mmap_data *data)
+static inline int page_order(struct perf_buffer *buffer)
{
return 0;
}
@@ -2402,18 +2498,18 @@ static inline int page_order(struct perf_mmap_data *data)
* Required for architectures that have d-cache aliasing issues.
*/
-static inline int page_order(struct perf_mmap_data *data)
+static inline int page_order(struct perf_buffer *buffer)
{
- return data->page_order;
+ return buffer->page_order;
}
static struct page *
-perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
{
- if (pgoff > (1UL << page_order(data)))
+ if (pgoff > (1UL << page_order(buffer)))
return NULL;
- return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
+ return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
}
static void perf_mmap_unmark_page(void *addr)
@@ -2423,59 +2519,59 @@ static void perf_mmap_unmark_page(void *addr)
page->mapping = NULL;
}
-static void perf_mmap_data_free_work(struct work_struct *work)
+static void perf_buffer_free_work(struct work_struct *work)
{
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
void *base;
int i, nr;
- data = container_of(work, struct perf_mmap_data, work);
- nr = 1 << page_order(data);
+ buffer = container_of(work, struct perf_buffer, work);
+ nr = 1 << page_order(buffer);
- base = data->user_page;
+ base = buffer->user_page;
for (i = 0; i < nr + 1; i++)
perf_mmap_unmark_page(base + (i * PAGE_SIZE));
vfree(base);
- kfree(data);
+ kfree(buffer);
}
-static void perf_mmap_data_free(struct perf_mmap_data *data)
+static void perf_buffer_free(struct perf_buffer *buffer)
{
- schedule_work(&data->work);
+ schedule_work(&buffer->work);
}
-static struct perf_mmap_data *
-perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_buffer *
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
{
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
unsigned long size;
void *all_buf;
- WARN_ON(atomic_read(&event->mmap_count));
-
- size = sizeof(struct perf_mmap_data);
+ size = sizeof(struct perf_buffer);
size += sizeof(void *);
- data = kzalloc(size, GFP_KERNEL);
- if (!data)
+ buffer = kzalloc(size, GFP_KERNEL);
+ if (!buffer)
goto fail;
- INIT_WORK(&data->work, perf_mmap_data_free_work);
+ INIT_WORK(&buffer->work, perf_buffer_free_work);
all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
if (!all_buf)
goto fail_all_buf;
- data->user_page = all_buf;
- data->data_pages[0] = all_buf + PAGE_SIZE;
- data->page_order = ilog2(nr_pages);
- data->nr_pages = 1;
+ buffer->user_page = all_buf;
+ buffer->data_pages[0] = all_buf + PAGE_SIZE;
+ buffer->page_order = ilog2(nr_pages);
+ buffer->nr_pages = 1;
+
+ perf_buffer_init(buffer, watermark, flags);
- return data;
+ return buffer;
fail_all_buf:
- kfree(data);
+ kfree(buffer);
fail:
return NULL;
@@ -2483,15 +2579,15 @@ fail:
#endif
-static unsigned long perf_data_size(struct perf_mmap_data *data)
+static unsigned long perf_data_size(struct perf_buffer *buffer)
{
- return data->nr_pages << (PAGE_SHIFT + page_order(data));
+ return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
}
static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct perf_event *event = vma->vm_file->private_data;
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
int ret = VM_FAULT_SIGBUS;
if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -2501,14 +2597,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
rcu_read_lock();
- data = rcu_dereference(event->data);
- if (!data)
+ buffer = rcu_dereference(event->buffer);
+ if (!buffer)
goto unlock;
if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
goto unlock;
- vmf->page = perf_mmap_to_page(data, vmf->pgoff);
+ vmf->page = perf_mmap_to_page(buffer, vmf->pgoff);
if (!vmf->page)
goto unlock;
@@ -2523,39 +2619,35 @@ unlock:
return ret;
}
-static void
-perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
+static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
{
- long max_size = perf_data_size(data);
-
- if (event->attr.watermark) {
- data->watermark = min_t(long, max_size,
- event->attr.wakeup_watermark);
- }
+ struct perf_buffer *buffer;
- if (!data->watermark)
- data->watermark = max_size / 2;
-
-
- rcu_assign_pointer(event->data, data);
+ buffer = container_of(rcu_head, struct perf_buffer, rcu_head);
+ perf_buffer_free(buffer);
}
-static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
+static struct perf_buffer *perf_buffer_get(struct perf_event *event)
{
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
- data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
- perf_mmap_data_free(data);
+ rcu_read_lock();
+ buffer = rcu_dereference(event->buffer);
+ if (buffer) {
+ if (!atomic_inc_not_zero(&buffer->refcount))
+ buffer = NULL;
+ }
+ rcu_read_unlock();
+
+ return buffer;
}
-static void perf_mmap_data_release(struct perf_event *event)
+static void perf_buffer_put(struct perf_buffer *buffer)
{
- struct perf_mmap_data *data = event->data;
-
- WARN_ON(atomic_read(&event->mmap_count));
+ if (!atomic_dec_and_test(&buffer->refcount))
+ return;
- rcu_assign_pointer(event->data, NULL);
- call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
+ call_rcu(&buffer->rcu_head, perf_buffer_free_rcu);
}
static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2569,15 +2661,18 @@ static void perf_mmap_close(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;
- WARN_ON_ONCE(event->ctx->parent_ctx);
if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
- unsigned long size = perf_data_size(event->data);
- struct user_struct *user = current_user();
+ unsigned long size = perf_data_size(event->buffer);
+ struct user_struct *user = event->mmap_user;
+ struct perf_buffer *buffer = event->buffer;
atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
- vma->vm_mm->locked_vm -= event->data->nr_locked;
- perf_mmap_data_release(event);
+ vma->vm_mm->locked_vm -= event->mmap_locked;
+ rcu_assign_pointer(event->buffer, NULL);
mutex_unlock(&event->mmap_mutex);
+
+ perf_buffer_put(buffer);
+ free_uid(user);
}
}
@@ -2594,11 +2689,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
unsigned long user_locked, user_lock_limit;
struct user_struct *user = current_user();
unsigned long locked, lock_limit;
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
unsigned long vma_size;
unsigned long nr_pages;
long user_extra, extra;
- int ret = 0;
+ int ret = 0, flags = 0;
/*
* Don't allow mmap() of inherited per-task counters. This would
@@ -2615,7 +2710,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
nr_pages = (vma_size / PAGE_SIZE) - 1;
/*
- * If we have data pages ensure they're a power-of-two number, so we
+ * If we have buffer pages ensure they're a power-of-two number, so we
* can do bitmasks instead of modulo.
*/
if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -2629,13 +2724,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
WARN_ON_ONCE(event->ctx->parent_ctx);
mutex_lock(&event->mmap_mutex);
- if (event->output) {
- ret = -EINVAL;
- goto unlock;
- }
-
- if (atomic_inc_not_zero(&event->mmap_count)) {
- if (nr_pages != event->data->nr_pages)
+ if (event->buffer) {
+ if (event->buffer->nr_pages == nr_pages)
+ atomic_inc(&event->buffer->refcount);
+ else
ret = -EINVAL;
goto unlock;
}
@@ -2664,24 +2756,27 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
goto unlock;
}
- WARN_ON(event->data);
+ WARN_ON(event->buffer);
- data = perf_mmap_data_alloc(event, nr_pages);
- ret = -ENOMEM;
- if (!data)
- goto unlock;
+ if (vma->vm_flags & VM_WRITE)
+ flags |= PERF_BUFFER_WRITABLE;
- ret = 0;
- perf_mmap_data_init(event, data);
+ buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark,
+ event->cpu, flags);
+ if (!buffer) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+ rcu_assign_pointer(event->buffer, buffer);
- atomic_set(&event->mmap_count, 1);
atomic_long_add(user_extra, &user->locked_vm);
- vma->vm_mm->locked_vm += extra;
- event->data->nr_locked = extra;
- if (vma->vm_flags & VM_WRITE)
- event->data->writable = 1;
+ event->mmap_locked = extra;
+ event->mmap_user = get_current_user();
+ vma->vm_mm->locked_vm += event->mmap_locked;
unlock:
+ if (!ret)
+ atomic_inc(&event->mmap_count);
mutex_unlock(&event->mmap_mutex);
vma->vm_flags |= VM_RESERVED;
@@ -2876,15 +2971,15 @@ EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
/*
* Output
*/
-static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
+static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
unsigned long offset, unsigned long head)
{
unsigned long mask;
- if (!data->writable)
+ if (!buffer->writable)
return true;
- mask = perf_data_size(data) - 1;
+ mask = perf_data_size(buffer) - 1;
offset = (offset - tail) & mask;
head = (head - tail) & mask;
@@ -2897,7 +2992,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
static void perf_output_wakeup(struct perf_output_handle *handle)
{
- atomic_set(&handle->data->poll, POLL_IN);
+ atomic_set(&handle->buffer->poll, POLL_IN);
if (handle->nmi) {
handle->event->pending_wakeup = 1;
@@ -2917,45 +3012,45 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
*/
static void perf_output_get_handle(struct perf_output_handle *handle)
{
- struct perf_mmap_data *data = handle->data;
+ struct perf_buffer *buffer = handle->buffer;
preempt_disable();
- local_inc(&data->nest);
- handle->wakeup = local_read(&data->wakeup);
+ local_inc(&buffer->nest);
+ handle->wakeup = local_read(&buffer->wakeup);
}
static void perf_output_put_handle(struct perf_output_handle *handle)
{
- struct perf_mmap_data *data = handle->data;
+ struct perf_buffer *buffer = handle->buffer;
unsigned long head;
again:
- head = local_read(&data->head);
+ head = local_read(&buffer->head);
/*
* IRQ/NMI can happen here, which means we can miss a head update.
*/
- if (!local_dec_and_test(&data->nest))
+ if (!local_dec_and_test(&buffer->nest))
goto out;
/*
* Publish the known good head. Rely on the full barrier implied
- * by atomic_dec_and_test() order the data->head read and this
+ * by atomic_dec_and_test() order the buffer->head read and this
* write.
*/
- data->user_page->data_head = head;
+ buffer->user_page->data_head = head;
/*
* Now check if we missed an update, rely on the (compiler)
- * barrier in atomic_dec_and_test() to re-read data->head.
+ * barrier in atomic_dec_and_test() to re-read buffer->head.
*/
- if (unlikely(head != local_read(&data->head))) {
- local_inc(&data->nest);
+ if (unlikely(head != local_read(&buffer->head))) {
+ local_inc(&buffer->nest);
goto again;
}
- if (handle->wakeup != local_read(&data->wakeup))
+ if (handle->wakeup != local_read(&buffer->wakeup))
perf_output_wakeup(handle);
out:
@@ -2972,14 +3067,15 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
len -= size;
handle->addr += size;
+ buf += size;
handle->size -= size;
if (!handle->size) {
- struct perf_mmap_data *data = handle->data;
+ struct perf_buffer *buffer = handle->buffer;
handle->page++;
- handle->page &= data->nr_pages - 1;
- handle->addr = data->data_pages[handle->page];
- handle->size = PAGE_SIZE << page_order(data);
+ handle->page &= buffer->nr_pages - 1;
+ handle->addr = buffer->data_pages[handle->page];
+ handle->size = PAGE_SIZE << page_order(buffer);
}
} while (len);
}
@@ -2988,8 +3084,7 @@ int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size,
int nmi, int sample)
{
- struct perf_event *output_event;
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
unsigned long tail, offset, head;
int have_lost;
struct {
@@ -3005,23 +3100,19 @@ int perf_output_begin(struct perf_output_handle *handle,
if (event->parent)
event = event->parent;
- output_event = rcu_dereference(event->output);
- if (output_event)
- event = output_event;
-
- data = rcu_dereference(event->data);
- if (!data)
+ buffer = rcu_dereference(event->buffer);
+ if (!buffer)
goto out;
- handle->data = data;
+ handle->buffer = buffer;
handle->event = event;
handle->nmi = nmi;
handle->sample = sample;
- if (!data->nr_pages)
+ if (!buffer->nr_pages)
goto out;
- have_lost = local_read(&data->lost);
+ have_lost = local_read(&buffer->lost);
if (have_lost)
size += sizeof(lost_event);
@@ -3033,30 +3124,30 @@ int perf_output_begin(struct perf_output_handle *handle,
* tail pointer. So that all reads will be completed before the
* write is issued.
*/
- tail = ACCESS_ONCE(data->user_page->data_tail);
+ tail = ACCESS_ONCE(buffer->user_page->data_tail);
smp_rmb();
- offset = head = local_read(&data->head);
+ offset = head = local_read(&buffer->head);
head += size;
- if (unlikely(!perf_output_space(data, tail, offset, head)))
+ if (unlikely(!perf_output_space(buffer, tail, offset, head)))
goto fail;
- } while (local_cmpxchg(&data->head, offset, head) != offset);
+ } while (local_cmpxchg(&buffer->head, offset, head) != offset);
- if (head - local_read(&data->wakeup) > data->watermark)
- local_add(data->watermark, &data->wakeup);
+ if (head - local_read(&buffer->wakeup) > buffer->watermark)
+ local_add(buffer->watermark, &buffer->wakeup);
- handle->page = offset >> (PAGE_SHIFT + page_order(data));
- handle->page &= data->nr_pages - 1;
- handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
- handle->addr = data->data_pages[handle->page];
+ handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
+ handle->page &= buffer->nr_pages - 1;
+ handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
+ handle->addr = buffer->data_pages[handle->page];
handle->addr += handle->size;
- handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
+ handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
if (have_lost) {
lost_event.header.type = PERF_RECORD_LOST;
lost_event.header.misc = 0;
lost_event.header.size = sizeof(lost_event);
lost_event.id = event->id;
- lost_event.lost = local_xchg(&data->lost, 0);
+ lost_event.lost = local_xchg(&buffer->lost, 0);
perf_output_put(handle, lost_event);
}
@@ -3064,7 +3155,7 @@ int perf_output_begin(struct perf_output_handle *handle,
return 0;
fail:
- local_inc(&data->lost);
+ local_inc(&buffer->lost);
perf_output_put_handle(handle);
out:
rcu_read_unlock();
@@ -3075,15 +3166,15 @@ out:
void perf_output_end(struct perf_output_handle *handle)
{
struct perf_event *event = handle->event;
- struct perf_mmap_data *data = handle->data;
+ struct perf_buffer *buffer = handle->buffer;
int wakeup_events = event->attr.wakeup_events;
if (handle->sample && wakeup_events) {
- int events = local_inc_return(&data->events);
+ int events = local_inc_return(&buffer->events);
if (events >= wakeup_events) {
- local_sub(wakeup_events, &data->events);
- local_inc(&data->wakeup);
+ local_sub(wakeup_events, &buffer->events);
+ local_inc(&buffer->wakeup);
}
}
@@ -3120,7 +3211,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
u64 values[4];
int n = 0;
- values[n++] = atomic64_read(&event->count);
+ values[n++] = perf_event_count(event);
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
values[n++] = event->total_time_enabled +
atomic64_read(&event->child_total_time_enabled);
@@ -3157,7 +3248,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
if (leader != event)
leader->pmu->read(leader);
- values[n++] = atomic64_read(&leader->count);
+ values[n++] = perf_event_count(leader);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
@@ -3169,7 +3260,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
if (sub != event)
sub->pmu->read(sub);
- values[n++] = atomic64_read(&sub->count);
+ values[n++] = perf_event_count(sub);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
@@ -3400,7 +3491,7 @@ perf_event_read_event(struct perf_event *event,
/*
* task tracking -- fork/exit
*
- * enabled by: attr.comm | attr.mmap | attr.task
+ * enabled by: attr.comm | attr.mmap | attr.mmap_data | attr.task
*/
struct perf_task_event {
@@ -3450,7 +3541,8 @@ static int perf_event_task_match(struct perf_event *event)
if (event->cpu != -1 && event->cpu != smp_processor_id())
return 0;
- if (event->attr.comm || event->attr.mmap || event->attr.task)
+ if (event->attr.comm || event->attr.mmap ||
+ event->attr.mmap_data || event->attr.task)
return 1;
return 0;
@@ -3675,7 +3767,8 @@ static void perf_event_mmap_output(struct perf_event *event,
}
static int perf_event_mmap_match(struct perf_event *event,
- struct perf_mmap_event *mmap_event)
+ struct perf_mmap_event *mmap_event,
+ int executable)
{
if (event->state < PERF_EVENT_STATE_INACTIVE)
return 0;
@@ -3683,19 +3776,21 @@ static int perf_event_mmap_match(struct perf_event *event,
if (event->cpu != -1 && event->cpu != smp_processor_id())
return 0;
- if (event->attr.mmap)
+ if ((!executable && event->attr.mmap_data) ||
+ (executable && event->attr.mmap))
return 1;
return 0;
}
static void perf_event_mmap_ctx(struct perf_event_context *ctx,
- struct perf_mmap_event *mmap_event)
+ struct perf_mmap_event *mmap_event,
+ int executable)
{
struct perf_event *event;
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
- if (perf_event_mmap_match(event, mmap_event))
+ if (perf_event_mmap_match(event, mmap_event, executable))
perf_event_mmap_output(event, mmap_event);
}
}
@@ -3739,6 +3834,14 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
if (!vma->vm_mm) {
name = strncpy(tmp, "[vdso]", sizeof(tmp));
goto got_name;
+ } else if (vma->vm_start <= vma->vm_mm->start_brk &&
+ vma->vm_end >= vma->vm_mm->brk) {
+ name = strncpy(tmp, "[heap]", sizeof(tmp));
+ goto got_name;
+ } else if (vma->vm_start <= vma->vm_mm->start_stack &&
+ vma->vm_end >= vma->vm_mm->start_stack) {
+ name = strncpy(tmp, "[stack]", sizeof(tmp));
+ goto got_name;
}
name = strncpy(tmp, "//anon", sizeof(tmp));
@@ -3755,17 +3858,17 @@ got_name:
rcu_read_lock();
cpuctx = &get_cpu_var(perf_cpu_context);
- perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
+ perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC);
ctx = rcu_dereference(current->perf_event_ctxp);
if (ctx)
- perf_event_mmap_ctx(ctx, mmap_event);
+ perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC);
put_cpu_var(perf_cpu_context);
rcu_read_unlock();
kfree(buf);
}
-void __perf_event_mmap(struct vm_area_struct *vma)
+void perf_event_mmap(struct vm_area_struct *vma)
{
struct perf_mmap_event mmap_event;
@@ -3927,14 +4030,14 @@ static u64 perf_swevent_set_period(struct perf_event *event)
hwc->last_period = hwc->sample_period;
again:
- old = val = atomic64_read(&hwc->period_left);
+ old = val = local64_read(&hwc->period_left);
if (val < 0)
return 0;
nr = div64_u64(period + val, period);
offset = nr * period;
val -= offset;
- if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
+ if (local64_cmpxchg(&hwc->period_left, old, val) != old)
goto again;
return nr;
@@ -3967,20 +4070,13 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
}
}
-static void perf_swevent_unthrottle(struct perf_event *event)
-{
- /*
- * Nothing to do, we already reset hwc->interrupts.
- */
-}
-
static void perf_swevent_add(struct perf_event *event, u64 nr,
int nmi, struct perf_sample_data *data,
struct pt_regs *regs)
{
struct hw_perf_event *hwc = &event->hw;
- atomic64_add(nr, &event->count);
+ local64_add(nr, &event->count);
if (!regs)
return;
@@ -3991,7 +4087,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
return perf_swevent_overflow(event, 1, nmi, data, regs);
- if (atomic64_add_negative(nr, &hwc->period_left))
+ if (local64_add_negative(nr, &hwc->period_left))
return;
perf_swevent_overflow(event, 0, nmi, data, regs);
@@ -4129,14 +4225,12 @@ int perf_swevent_get_recursion_context(void)
}
EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
-void perf_swevent_put_recursion_context(int rctx)
+void inline perf_swevent_put_recursion_context(int rctx)
{
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
barrier();
cpuctx->recursion[rctx]--;
}
-EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
-
void __perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr)
@@ -4188,11 +4282,22 @@ static void perf_swevent_disable(struct perf_event *event)
hlist_del_rcu(&event->hlist_entry);
}
+static void perf_swevent_void(struct perf_event *event)
+{
+}
+
+static int perf_swevent_int(struct perf_event *event)
+{
+ return 0;
+}
+
static const struct pmu perf_ops_generic = {
.enable = perf_swevent_enable,
.disable = perf_swevent_disable,
+ .start = perf_swevent_int,
+ .stop = perf_swevent_void,
.read = perf_swevent_read,
- .unthrottle = perf_swevent_unthrottle,
+ .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
};
/*
@@ -4273,8 +4378,8 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
u64 now;
now = cpu_clock(cpu);
- prev = atomic64_xchg(&event->hw.prev_count, now);
- atomic64_add(now - prev, &event->count);
+ prev = local64_xchg(&event->hw.prev_count, now);
+ local64_add(now - prev, &event->count);
}
static int cpu_clock_perf_event_enable(struct perf_event *event)
@@ -4282,7 +4387,7 @@ static int cpu_clock_perf_event_enable(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
int cpu = raw_smp_processor_id();
- atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+ local64_set(&hwc->prev_count, cpu_clock(cpu));
perf_swevent_start_hrtimer(event);
return 0;
@@ -4314,9 +4419,9 @@ static void task_clock_perf_event_update(struct perf_event *event, u64 now)
u64 prev;
s64 delta;
- prev = atomic64_xchg(&event->hw.prev_count, now);
+ prev = local64_xchg(&event->hw.prev_count, now);
delta = now - prev;
- atomic64_add(delta, &event->count);
+ local64_add(delta, &event->count);
}
static int task_clock_perf_event_enable(struct perf_event *event)
@@ -4326,7 +4431,7 @@ static int task_clock_perf_event_enable(struct perf_event *event)
now = event->ctx->time;
- atomic64_set(&hwc->prev_count, now);
+ local64_set(&hwc->prev_count, now);
perf_swevent_start_hrtimer(event);
@@ -4473,8 +4578,10 @@ static int swevent_hlist_get(struct perf_event *event)
static const struct pmu perf_ops_tracepoint = {
.enable = perf_trace_enable,
.disable = perf_trace_disable,
+ .start = perf_swevent_int,
+ .stop = perf_swevent_void,
.read = perf_swevent_read,
- .unthrottle = perf_swevent_unthrottle,
+ .unthrottle = perf_swevent_void,
};
static int perf_tp_filter_match(struct perf_event *event,
@@ -4504,7 +4611,7 @@ static int perf_tp_event_match(struct perf_event *event,
}
void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
- struct pt_regs *regs, struct hlist_head *head)
+ struct pt_regs *regs, struct hlist_head *head, int rctx)
{
struct perf_sample_data data;
struct perf_event *event;
@@ -4518,12 +4625,12 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
perf_sample_data_init(&data, addr);
data.raw = &raw;
- rcu_read_lock();
hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
if (perf_tp_event_match(event, &data, regs))
perf_swevent_add(event, count, 1, &data, regs);
}
- rcu_read_unlock();
+
+ perf_swevent_put_recursion_context(rctx);
}
EXPORT_SYMBOL_GPL(perf_tp_event);
@@ -4767,7 +4874,7 @@ perf_event_alloc(struct perf_event_attr *attr,
hwc->sample_period = 1;
hwc->last_period = hwc->sample_period;
- atomic64_set(&hwc->period_left, hwc->sample_period);
+ local64_set(&hwc->period_left, hwc->sample_period);
/*
* we currently do not support PERF_FORMAT_GROUP on inherited events
@@ -4816,7 +4923,7 @@ done:
if (!event->parent) {
atomic_inc(&nr_events);
- if (event->attr.mmap)
+ if (event->attr.mmap || event->attr.mmap_data)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
atomic_inc(&nr_comm_events);
@@ -4907,39 +5014,17 @@ err_size:
goto out;
}
-static int perf_event_set_output(struct perf_event *event, int output_fd)
+static int
+perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
{
- struct perf_event *output_event = NULL;
- struct file *output_file = NULL;
- struct perf_event *old_output;
- int fput_needed = 0;
+ struct perf_buffer *buffer = NULL, *old_buffer = NULL;
int ret = -EINVAL;
- /*
- * Don't allow output of inherited per-task events. This would
- * create performance issues due to cross cpu access.
- */
- if (event->cpu == -1 && event->attr.inherit)
- return -EINVAL;
-
- if (!output_fd)
+ if (!output_event)
goto set;
- output_file = fget_light(output_fd, &fput_needed);
- if (!output_file)
- return -EBADF;
-
- if (output_file->f_op != &perf_fops)
- goto out;
-
- output_event = output_file->private_data;
-
- /* Don't chain output fds */
- if (output_event->output)
- goto out;
-
- /* Don't set an output fd when we already have an output channel */
- if (event->data)
+ /* don't allow circular references */
+ if (event == output_event)
goto out;
/*
@@ -4954,26 +5039,28 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
if (output_event->cpu == -1 && output_event->ctx != event->ctx)
goto out;
- atomic_long_inc(&output_file->f_count);
-
set:
mutex_lock(&event->mmap_mutex);
- old_output = event->output;
- rcu_assign_pointer(event->output, output_event);
- mutex_unlock(&event->mmap_mutex);
+ /* Can't redirect output if we've got an active mmap() */
+ if (atomic_read(&event->mmap_count))
+ goto unlock;
- if (old_output) {
- /*
- * we need to make sure no existing perf_output_*()
- * is still referencing this event.
- */
- synchronize_rcu();
- fput(old_output->filp);
+ if (output_event) {
+ /* get the buffer we want to redirect to */
+ buffer = perf_buffer_get(output_event);
+ if (!buffer)
+ goto unlock;
}
+ old_buffer = event->buffer;
+ rcu_assign_pointer(event->buffer, buffer);
ret = 0;
+unlock:
+ mutex_unlock(&event->mmap_mutex);
+
+ if (old_buffer)
+ perf_buffer_put(old_buffer);
out:
- fput_light(output_file, fput_needed);
return ret;
}
@@ -4989,13 +5076,13 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event_attr __user *, attr_uptr,
pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
{
- struct perf_event *event, *group_leader;
+ struct perf_event *event, *group_leader = NULL, *output_event = NULL;
struct perf_event_attr attr;
struct perf_event_context *ctx;
struct file *event_file = NULL;
struct file *group_file = NULL;
+ int event_fd;
int fput_needed = 0;
- int fput_needed2 = 0;
int err;
/* for future expandability... */
@@ -5016,26 +5103,38 @@ SYSCALL_DEFINE5(perf_event_open,
return -EINVAL;
}
+ event_fd = get_unused_fd_flags(O_RDWR);
+ if (event_fd < 0)
+ return event_fd;
+
/*
* Get the target context (task or percpu):
*/
ctx = find_get_context(pid, cpu);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto err_fd;
+ }
+
+ if (group_fd != -1) {
+ group_leader = perf_fget_light(group_fd, &fput_needed);
+ if (IS_ERR(group_leader)) {
+ err = PTR_ERR(group_leader);
+ goto err_put_context;
+ }
+ group_file = group_leader->filp;
+ if (flags & PERF_FLAG_FD_OUTPUT)
+ output_event = group_leader;
+ if (flags & PERF_FLAG_FD_NO_GROUP)
+ group_leader = NULL;
+ }
/*
* Look up the group leader (we will attach this event to it):
*/
- group_leader = NULL;
- if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
+ if (group_leader) {
err = -EINVAL;
- group_file = fget_light(group_fd, &fput_needed);
- if (!group_file)
- goto err_put_context;
- if (group_file->f_op != &perf_fops)
- goto err_put_context;
- group_leader = group_file->private_data;
/*
* Do not allow a recursive hierarchy (this new sibling
* becoming part of another group-sibling):
@@ -5057,22 +5156,21 @@ SYSCALL_DEFINE5(perf_event_open,
event = perf_event_alloc(&attr, cpu, ctx, group_leader,
NULL, NULL, GFP_KERNEL);
- err = PTR_ERR(event);
- if (IS_ERR(event))
+ if (IS_ERR(event)) {
+ err = PTR_ERR(event);
goto err_put_context;
+ }
- err = anon_inode_getfd("[perf_event]", &perf_fops, event, O_RDWR);
- if (err < 0)
- goto err_free_put_context;
+ if (output_event) {
+ err = perf_event_set_output(event, output_event);
+ if (err)
+ goto err_free_put_context;
+ }
- event_file = fget_light(err, &fput_needed2);
- if (!event_file)
+ event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
+ if (IS_ERR(event_file)) {
+ err = PTR_ERR(event_file);
goto err_free_put_context;
-
- if (flags & PERF_FLAG_FD_OUTPUT) {
- err = perf_event_set_output(event, group_fd);
- if (err)
- goto err_fput_free_put_context;
}
event->filp = event_file;
@@ -5088,19 +5186,23 @@ SYSCALL_DEFINE5(perf_event_open,
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
-err_fput_free_put_context:
- fput_light(event_file, fput_needed2);
+ /*
+ * Drop the reference on the group_event after placing the
+ * new event on the sibling_list. This ensures destruction
+ * of the group leader will find the pointer to itself in
+ * perf_group_detach().
+ */
+ fput_light(group_file, fput_needed);
+ fd_install(event_fd, event_file);
+ return event_fd;
err_free_put_context:
- if (err < 0)
- free_event(event);
-
+ free_event(event);
err_put_context:
- if (err < 0)
- put_ctx(ctx);
-
fput_light(group_file, fput_needed);
-
+ put_ctx(ctx);
+err_fd:
+ put_unused_fd(event_fd);
return err;
}
@@ -5206,7 +5308,7 @@ inherit_event(struct perf_event *parent_event,
hwc->sample_period = sample_period;
hwc->last_period = sample_period;
- atomic64_set(&hwc->period_left, sample_period);
+ local64_set(&hwc->period_left, sample_period);
}
child_event->overflow_handler = parent_event->overflow_handler;
@@ -5267,12 +5369,12 @@ static void sync_child_event(struct perf_event *child_event,
if (child_event->attr.inherit_stat)
perf_event_read_event(child_event, child);
- child_val = atomic64_read(&child_event->count);
+ child_val = perf_event_count(child_event);
/*
* Add back the child's count to the parent's count:
*/
- atomic64_add(child_val, &parent_event->count);
+ atomic64_add(child_val, &parent_event->child_count);
atomic64_add(child_event->total_time_enabled,
&parent_event->child_total_time_enabled);
atomic64_add(child_event->total_time_running,
@@ -5411,6 +5513,7 @@ static void perf_free_event(struct perf_event *event,
fput(parent->filp);
+ perf_group_detach(event);
list_del_event(event, ctx);
free_event(event);
}
OpenPOWER on IntegriCloud