diff options
Diffstat (limited to 'kernel')
31 files changed, 1778 insertions, 2533 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 3d9c7e2..187c89b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o -obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ @@ -96,7 +95,7 @@ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c7ece8f..213b7f9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -596,7 +596,7 @@ void cgroup_unlock(void) static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); static int cgroup_populate_dir(struct cgroup *cgrp); -static struct inode_operations cgroup_dir_inode_operations; +static const struct inode_operations cgroup_dir_inode_operations; static struct file_operations proc_cgroupstats_operations; static struct backing_dev_info cgroup_backing_dev_info = { @@ -961,7 +961,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) return ret; } -static struct super_operations cgroup_ops = { +static const struct super_operations cgroup_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = cgroup_show_options, @@ -1711,7 +1711,7 @@ static struct file_operations cgroup_file_operations = { .release = cgroup_file_release, }; -static struct inode_operations cgroup_dir_inode_operations = { +static const struct inode_operations cgroup_dir_inode_operations = { .lookup = simple_lookup, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, diff --git a/kernel/exit.c b/kernel/exit.c index ae5d866..e47ee8a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,7 +47,7 @@ #include <linux/tracehook.h> #include <linux/fs_struct.h> #include <linux/init_task.h> -#include <linux/perf_counter.h> +#include <linux/perf_event.h> #include <trace/events/sched.h> #include <asm/uaccess.h> @@ -154,8 +154,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); -#ifdef CONFIG_PERF_COUNTERS - WARN_ON_ONCE(tsk->perf_counter_ctxp); +#ifdef CONFIG_PERF_EVENTS + WARN_ON_ONCE(tsk->perf_event_ctxp); #endif trace_sched_process_free(tsk); put_task_struct(tsk); @@ -981,7 +981,7 @@ NORET_TYPE void do_exit(long code) * Flush inherited counters to the parent - before the parent * gets woken up by child-exit notifications. */ - perf_counter_exit_task(tsk); + perf_event_exit_task(tsk); exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA diff --git a/kernel/fork.c b/kernel/fork.c index bfee931..1020977 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -49,6 +49,7 @@ #include <linux/ftrace.h> #include <linux/profile.h> #include <linux/rmap.h> +#include <linux/ksm.h> #include <linux/acct.h> #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> @@ -61,7 +62,7 @@ #include <linux/blkdev.h> #include <linux/fs_struct.h> #include <linux/magic.h> -#include <linux/perf_counter.h> +#include <linux/perf_event.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -136,9 +137,17 @@ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; +static void account_kernel_stack(struct thread_info *ti, int account) +{ + struct zone *zone = page_zone(virt_to_page(ti)); + + mod_zone_page_state(zone, NR_KERNEL_STACK, account); +} + void free_task(struct task_struct *tsk) { prop_local_destroy_single(&tsk->dirties); + account_kernel_stack(tsk->stack, -1); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); @@ -253,6 +262,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) tsk->btrace_seq = 0; #endif tsk->splice_pipe = NULL; + + account_kernel_stack(ti, 1); + return tsk; out: @@ -288,6 +300,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) rb_link = &mm->mm_rb.rb_node; rb_parent = NULL; pprev = &mm->mmap; + retval = ksm_fork(mm, oldmm); + if (retval) + goto out; for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { struct file *file; @@ -424,7 +439,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); - mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; + mm->flags = (current->mm) ? + (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; mm->core_state = NULL; mm->nr_ptes = 0; set_mm_counter(mm, file_rss, 0); @@ -485,6 +501,7 @@ void mmput(struct mm_struct *mm) if (atomic_dec_and_test(&mm->mm_users)) { exit_aio(mm); + ksm_exit(mm); exit_mmap(mm); set_mm_exe_file(mm, NULL); if (!list_empty(&mm->mmlist)) { @@ -863,6 +880,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) tty_audit_fork(sig); + sig->oom_adj = current->signal->oom_adj; + return 0; } @@ -1078,7 +1097,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); - retval = perf_counter_init_task(p); + retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; @@ -1253,7 +1272,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); - perf_counter_fork(p); + perf_event_fork(p); return p; bad_fork_free_pid: @@ -1280,7 +1299,7 @@ bad_fork_cleanup_semundo: bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: - perf_counter_free_task(p); + perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: diff --git a/kernel/marker.c b/kernel/marker.c deleted file mode 100644 index ea54f26..0000000 --- a/kernel/marker.c +++ /dev/null @@ -1,930 +0,0 @@ -/* - * Copyright (C) 2007 Mathieu Desnoyers - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/types.h> -#include <linux/jhash.h> -#include <linux/list.h> -#include <linux/rcupdate.h> -#include <linux/marker.h> -#include <linux/err.h> -#include <linux/slab.h> - -extern struct marker __start___markers[]; -extern struct marker __stop___markers[]; - -/* Set to 1 to enable marker debug output */ -static const int marker_debug; - -/* - * markers_mutex nests inside module_mutex. Markers mutex protects the builtin - * and module markers and the hash table. - */ -static DEFINE_MUTEX(markers_mutex); - -/* - * Marker hash table, containing the active markers. - * Protected by module_mutex. - */ -#define MARKER_HASH_BITS 6 -#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) -static struct hlist_head marker_table[MARKER_TABLE_SIZE]; - -/* - * Note about RCU : - * It is used to make sure every handler has finished using its private data - * between two consecutive operation (add or remove) on a given marker. It is - * also used to delay the free of multiple probes array until a quiescent state - * is reached. - * marker entries modifications are protected by the markers_mutex. - */ -struct marker_entry { - struct hlist_node hlist; - char *format; - /* Probe wrapper */ - void (*call)(const struct marker *mdata, void *call_private, ...); - struct marker_probe_closure single; - struct marker_probe_closure *multi; - int refcount; /* Number of times armed. 0 if disarmed. */ - struct rcu_head rcu; - void *oldptr; - int rcu_pending; - unsigned char ptype:1; - unsigned char format_allocated:1; - char name[0]; /* Contains name'\0'format'\0' */ -}; - -/** - * __mark_empty_function - Empty probe callback - * @probe_private: probe private data - * @call_private: call site private data - * @fmt: format string - * @...: variable argument list - * - * Empty callback provided as a probe to the markers. By providing this to a - * disabled marker, we make sure the execution flow is always valid even - * though the function pointer change and the marker enabling are two distinct - * operations that modifies the execution flow of preemptible code. - */ -notrace void __mark_empty_function(void *probe_private, void *call_private, - const char *fmt, va_list *args) -{ -} -EXPORT_SYMBOL_GPL(__mark_empty_function); - -/* - * marker_probe_cb Callback that prepares the variable argument list for probes. - * @mdata: pointer of type struct marker - * @call_private: caller site private data - * @...: Variable argument list. - * - * Since we do not use "typical" pointer based RCU in the 1 argument case, we - * need to put a full smp_rmb() in this branch. This is why we do not use - * rcu_dereference() for the pointer read. - */ -notrace void marker_probe_cb(const struct marker *mdata, - void *call_private, ...) -{ - va_list args; - char ptype; - - /* - * rcu_read_lock_sched does two things : disabling preemption to make - * sure the teardown of the callbacks can be done correctly when they - * are in modules and they insure RCU read coherency. - */ - rcu_read_lock_sched_notrace(); - ptype = mdata->ptype; - if (likely(!ptype)) { - marker_probe_func *func; - /* Must read the ptype before ptr. They are not data dependant, - * so we put an explicit smp_rmb() here. */ - smp_rmb(); - func = mdata->single.func; - /* Must read the ptr before private data. They are not data - * dependant, so we put an explicit smp_rmb() here. */ - smp_rmb(); - va_start(args, call_private); - func(mdata->single.probe_private, call_private, mdata->format, - &args); - va_end(args); - } else { - struct marker_probe_closure *multi; - int i; - /* - * Read mdata->ptype before mdata->multi. - */ - smp_rmb(); - multi = mdata->multi; - /* - * multi points to an array, therefore accessing the array - * depends on reading multi. However, even in this case, - * we must insure that the pointer is read _before_ the array - * data. Same as rcu_dereference, but we need a full smp_rmb() - * in the fast path, so put the explicit barrier here. - */ - smp_read_barrier_depends(); - for (i = 0; multi[i].func; i++) { - va_start(args, call_private); - multi[i].func(multi[i].probe_private, call_private, - mdata->format, &args); - va_end(args); - } - } - rcu_read_unlock_sched_notrace(); -} -EXPORT_SYMBOL_GPL(marker_probe_cb); - -/* - * marker_probe_cb Callback that does not prepare the variable argument list. - * @mdata: pointer of type struct marker - * @call_private: caller site private data - * @...: Variable argument list. - * - * Should be connected to markers "MARK_NOARGS". - */ -static notrace void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, ...) -{ - va_list args; /* not initialized */ - char ptype; - - rcu_read_lock_sched_notrace(); - ptype = mdata->ptype; - if (likely(!ptype)) { - marker_probe_func *func; - /* Must read the ptype before ptr. They are not data dependant, - * so we put an explicit smp_rmb() here. */ - smp_rmb(); - func = mdata->single.func; - /* Must read the ptr before private data. They are not data - * dependant, so we put an explicit smp_rmb() here. */ - smp_rmb(); - func(mdata->single.probe_private, call_private, mdata->format, - &args); - } else { - struct marker_probe_closure *multi; - int i; - /* - * Read mdata->ptype before mdata->multi. - */ - smp_rmb(); - multi = mdata->multi; - /* - * multi points to an array, therefore accessing the array - * depends on reading multi. However, even in this case, - * we must insure that the pointer is read _before_ the array - * data. Same as rcu_dereference, but we need a full smp_rmb() - * in the fast path, so put the explicit barrier here. - */ - smp_read_barrier_depends(); - for (i = 0; multi[i].func; i++) - multi[i].func(multi[i].probe_private, call_private, - mdata->format, &args); - } - rcu_read_unlock_sched_notrace(); -} - -static void free_old_closure(struct rcu_head *head) -{ - struct marker_entry *entry = container_of(head, - struct marker_entry, rcu); - kfree(entry->oldptr); - /* Make sure we free the data before setting the pending flag to 0 */ - smp_wmb(); - entry->rcu_pending = 0; -} - -static void debug_print_probes(struct marker_entry *entry) -{ - int i; - - if (!marker_debug) - return; - - if (!entry->ptype) { - printk(KERN_DEBUG "Single probe : %p %p\n", - entry->single.func, - entry->single.probe_private); - } else { - for (i = 0; entry->multi[i].func; i++) - printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, - entry->multi[i].func, - entry->multi[i].probe_private); - } -} - -static struct marker_probe_closure * -marker_entry_add_probe(struct marker_entry *entry, - marker_probe_func *probe, void *probe_private) -{ - int nr_probes = 0; - struct marker_probe_closure *old, *new; - - WARN_ON(!probe); - - debug_print_probes(entry); - old = entry->multi; - if (!entry->ptype) { - if (entry->single.func == probe && - entry->single.probe_private == probe_private) - return ERR_PTR(-EBUSY); - if (entry->single.func == __mark_empty_function) { - /* 0 -> 1 probes */ - entry->single.func = probe; - entry->single.probe_private = probe_private; - entry->refcount = 1; - entry->ptype = 0; - debug_print_probes(entry); - return NULL; - } else { - /* 1 -> 2 probes */ - nr_probes = 1; - old = NULL; - } - } else { - /* (N -> N+1), (N != 0, 1) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) - if (old[nr_probes].func == probe - && old[nr_probes].probe_private - == probe_private) - return ERR_PTR(-EBUSY); - } - /* + 2 : one for new probe, one for NULL func */ - new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), - GFP_KERNEL); - if (new == NULL) - return ERR_PTR(-ENOMEM); - if (!old) - new[0] = entry->single; - else - memcpy(new, old, - nr_probes * sizeof(struct marker_probe_closure)); - new[nr_probes].func = probe; - new[nr_probes].probe_private = probe_private; - entry->refcount = nr_probes + 1; - entry->multi = new; - entry->ptype = 1; - debug_print_probes(entry); - return old; -} - -static struct marker_probe_closure * -marker_entry_remove_probe(struct marker_entry *entry, - marker_probe_func *probe, void *probe_private) -{ - int nr_probes = 0, nr_del = 0, i; - struct marker_probe_closure *old, *new; - - old = entry->multi; - - debug_print_probes(entry); - if (!entry->ptype) { - /* 0 -> N is an error */ - WARN_ON(entry->single.func == __mark_empty_function); - /* 1 -> 0 probes */ - WARN_ON(probe && entry->single.func != probe); - WARN_ON(entry->single.probe_private != probe_private); - entry->single.func = __mark_empty_function; - entry->refcount = 0; - entry->ptype = 0; - debug_print_probes(entry); - return NULL; - } else { - /* (N -> M), (N > 1, M >= 0) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if ((!probe || old[nr_probes].func == probe) - && old[nr_probes].probe_private - == probe_private) - nr_del++; - } - } - - if (nr_probes - nr_del == 0) { - /* N -> 0, (N > 1) */ - entry->single.func = __mark_empty_function; - entry->refcount = 0; - entry->ptype = 0; - } else if (nr_probes - nr_del == 1) { - /* N -> 1, (N > 1) */ - for (i = 0; old[i].func; i++) - if ((probe && old[i].func != probe) || - old[i].probe_private != probe_private) - entry->single = old[i]; - entry->refcount = 1; - entry->ptype = 0; - } else { - int j = 0; - /* N -> M, (N > 1, M > 1) */ - /* + 1 for NULL */ - new = kzalloc((nr_probes - nr_del + 1) - * sizeof(struct marker_probe_closure), GFP_KERNEL); - if (new == NULL) - return ERR_PTR(-ENOMEM); - for (i = 0; old[i].func; i++) - if ((probe && old[i].func != probe) || - old[i].probe_private != probe_private) - new[j++] = old[i]; - entry->refcount = nr_probes - nr_del; - entry->ptype = 1; - entry->multi = new; - } - debug_print_probes(entry); - return old; -} - -/* - * Get marker if the marker is present in the marker hash table. - * Must be called with markers_mutex held. - * Returns NULL if not present. - */ -static struct marker_entry *get_marker(const char *name) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - u32 hash = jhash(name, strlen(name), 0); - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) - return e; - } - return NULL; -} - -/* - * Add the marker to the marker hash table. Must be called with markers_mutex - * held. - */ -static struct marker_entry *add_marker(const char *name, const char *format) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - size_t name_len = strlen(name) + 1; - size_t format_len = 0; - u32 hash = jhash(name, name_len-1, 0); - - if (format) - format_len = strlen(format) + 1; - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - printk(KERN_NOTICE - "Marker %s busy\n", name); - return ERR_PTR(-EBUSY); /* Already there */ - } - } - /* - * Using kmalloc here to allocate a variable length element. Could - * cause some memory fragmentation if overused. - */ - e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, - GFP_KERNEL); - if (!e) - return ERR_PTR(-ENOMEM); - memcpy(&e->name[0], name, name_len); - if (format) { - e->format = &e->name[name_len]; - memcpy(e->format, format, format_len); - if (strcmp(e->format, MARK_NOARGS) == 0) - e->call = marker_probe_cb_noarg; - else - e->call = marker_probe_cb; - trace_mark(core_marker_format, "name %s format %s", - e->name, e->format); - } else { - e->format = NULL; - e->call = marker_probe_cb; - } - e->single.func = __mark_empty_function; - e->single.probe_private = NULL; - e->multi = NULL; - e->ptype = 0; - e->format_allocated = 0; - e->refcount = 0; - e->rcu_pending = 0; - hlist_add_head(&e->hlist, head); - return e; -} - -/* - * Remove the marker from the marker hash table. Must be called with mutex_lock - * held. - */ -static int remove_marker(const char *name) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - int found = 0; - size_t len = strlen(name) + 1; - u32 hash = jhash(name, len-1, 0); - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - found = 1; - break; - } - } - if (!found) - return -ENOENT; - if (e->single.func != __mark_empty_function) - return -EBUSY; - hlist_del(&e->hlist); - if (e->format_allocated) - kfree(e->format); - /* Make sure the call_rcu has been executed */ - if (e->rcu_pending) - rcu_barrier_sched(); - kfree(e); - return 0; -} - -/* - * Set the mark_entry format to the format found in the element. - */ -static int marker_set_format(struct marker_entry *entry, const char *format) -{ - entry->format = kstrdup(format, GFP_KERNEL); - if (!entry->format) - return -ENOMEM; - entry->format_allocated = 1; - - trace_mark(core_marker_format, "name %s format %s", - entry->name, entry->format); - return 0; -} - -/* - * Sets the probe callback corresponding to one marker. - */ -static int set_marker(struct marker_entry *entry, struct marker *elem, - int active) -{ - int ret = 0; - WARN_ON(strcmp(entry->name, elem->name) != 0); - - if (entry->format) { - if (strcmp(entry->format, elem->format) != 0) { - printk(KERN_NOTICE - "Format mismatch for probe %s " - "(%s), marker (%s)\n", - entry->name, - entry->format, - elem->format); - return -EPERM; - } - } else { - ret = marker_set_format(entry, elem->format); - if (ret) - return ret; - } - - /* - * probe_cb setup (statically known) is done here. It is - * asynchronous with the rest of execution, therefore we only - * pass from a "safe" callback (with argument) to an "unsafe" - * callback (does not set arguments). - */ - elem->call = entry->call; - /* - * Sanity check : - * We only update the single probe private data when the ptr is - * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) - */ - WARN_ON(elem->single.func != __mark_empty_function - && elem->single.probe_private != entry->single.probe_private - && !elem->ptype); - elem->single.probe_private = entry->single.probe_private; - /* - * Make sure the private data is valid when we update the - * single probe ptr. - */ - smp_wmb(); - elem->single.func = entry->single.func; - /* - * We also make sure that the new probe callbacks array is consistent - * before setting a pointer to it. - */ - rcu_assign_pointer(elem->multi, entry->multi); - /* - * Update the function or multi probe array pointer before setting the - * ptype. - */ - smp_wmb(); - elem->ptype = entry->ptype; - - if (elem->tp_name && (active ^ elem->state)) { - WARN_ON(!elem->tp_cb); - /* - * It is ok to directly call the probe registration because type - * checking has been done in the __trace_mark_tp() macro. - */ - - if (active) { - /* - * try_module_get should always succeed because we hold - * lock_module() to get the tp_cb address. - */ - ret = try_module_get(__module_text_address( - (unsigned long)elem->tp_cb)); - BUG_ON(!ret); - ret = tracepoint_probe_register_noupdate( - elem->tp_name, - elem->tp_cb); - } else { - ret = tracepoint_probe_unregister_noupdate( - elem->tp_name, - elem->tp_cb); - /* - * tracepoint_probe_update_all() must be called - * before the module containing tp_cb is unloaded. - */ - module_put(__module_text_address( - (unsigned long)elem->tp_cb)); - } - } - elem->state = active; - - return ret; -} - -/* - * Disable a marker and its probe callback. - * Note: only waiting an RCU period after setting elem->call to the empty - * function insures that the original callback is not used anymore. This insured - * by rcu_read_lock_sched around the call site. - */ -static void disable_marker(struct marker *elem) -{ - int ret; - - /* leave "call" as is. It is known statically. */ - if (elem->tp_name && elem->state) { - WARN_ON(!elem->tp_cb); - /* - * It is ok to directly call the probe registration because type - * checking has been done in the __trace_mark_tp() macro. - */ - ret = tracepoint_probe_unregister_noupdate(elem->tp_name, - elem->tp_cb); - WARN_ON(ret); - /* - * tracepoint_probe_update_all() must be called - * before the module containing tp_cb is unloaded. - */ - module_put(__module_text_address((unsigned long)elem->tp_cb)); - } - elem->state = 0; - elem->single.func = __mark_empty_function; - /* Update the function before setting the ptype */ - smp_wmb(); - elem->ptype = 0; /* single probe */ - /* - * Leave the private data and id there, because removal is racy and - * should be done only after an RCU period. These are never used until - * the next initialization anyway. - */ -} - -/** - * marker_update_probe_range - Update a probe range - * @begin: beginning of the range - * @end: end of the range - * - * Updates the probe callback corresponding to a range of markers. - */ -void marker_update_probe_range(struct marker *begin, - struct marker *end) -{ - struct marker *iter; - struct marker_entry *mark_entry; - - mutex_lock(&markers_mutex); - for (iter = begin; iter < end; iter++) { - mark_entry = get_marker(iter->name); - if (mark_entry) { - set_marker(mark_entry, iter, !!mark_entry->refcount); - /* - * ignore error, continue - */ - } else { - disable_marker(iter); - } - } - mutex_unlock(&markers_mutex); -} - -/* - * Update probes, removing the faulty probes. - * - * Internal callback only changed before the first probe is connected to it. - * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 - * transitions. All other transitions will leave the old private data valid. - * This makes the non-atomicity of the callback/private data updates valid. - * - * "special case" updates : - * 0 -> 1 callback - * 1 -> 0 callback - * 1 -> 2 callbacks - * 2 -> 1 callbacks - * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. - * Site effect : marker_set_format may delete the marker entry (creating a - * replacement). - */ -static void marker_update_probes(void) -{ - /* Core kernel markers */ - marker_update_probe_range(__start___markers, __stop___markers); - /* Markers in modules. */ - module_update_markers(); - tracepoint_probe_update_all(); -} - -/** - * marker_probe_register - Connect a probe to a marker - * @name: marker name - * @format: format string - * @probe: probe handler - * @probe_private: probe private data - * - * private data must be a valid allocated memory address, or NULL. - * Returns 0 if ok, error value on error. - * The probe address must at least be aligned on the architecture pointer size. - */ -int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - int ret = 0; - struct marker_probe_closure *old; - - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) { - entry = add_marker(name, format); - if (IS_ERR(entry)) - ret = PTR_ERR(entry); - } else if (format) { - if (!entry->format) - ret = marker_set_format(entry, format); - else if (strcmp(entry->format, format)) - ret = -EPERM; - } - if (ret) - goto end; - - /* - * If we detect that a call_rcu is pending for this marker, - * make sure it's executed now. - */ - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_add_probe(entry, probe, probe_private); - if (IS_ERR(old)) { - ret = PTR_ERR(old); - goto end; - } - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_register); - -/** - * marker_probe_unregister - Disconnect a probe from a marker - * @name: marker name - * @probe: probe function pointer - * @probe_private: probe private data - * - * Returns the private data given to marker_probe_register, or an ERR_PTR(). - * We do not need to call a synchronize_sched to make sure the probes have - * finished running before doing a module unload, because the module unload - * itself uses stop_machine(), which insures that every preempt disabled section - * have finished. - */ -int marker_probe_unregister(const char *name, - marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - struct marker_probe_closure *old; - int ret = -ENOENT; - - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_remove_probe(entry, probe, probe_private); - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); - remove_marker(name); /* Ignore busy error message */ - ret = 0; -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_unregister); - -static struct marker_entry * -get_marker_from_private_data(marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - unsigned int i; - struct hlist_head *head; - struct hlist_node *node; - - for (i = 0; i < MARKER_TABLE_SIZE; i++) { - head = &marker_table[i]; - hlist_for_each_entry(entry, node, head, hlist) { - if (!entry->ptype) { - if (entry->single.func == probe - && entry->single.probe_private - == probe_private) - return entry; - } else { - struct marker_probe_closure *closure; - closure = entry->multi; - for (i = 0; closure[i].func; i++) { - if (closure[i].func == probe && - closure[i].probe_private - == probe_private) - return entry; - } - } - } - } - return NULL; -} - -/** - * marker_probe_unregister_private_data - Disconnect a probe from a marker - * @probe: probe function - * @probe_private: probe private data - * - * Unregister a probe by providing the registered private data. - * Only removes the first marker found in hash table. - * Return 0 on success or error value. - * We do not need to call a synchronize_sched to make sure the probes have - * finished running before doing a module unload, because the module unload - * itself uses stop_machine(), which insures that every preempt disabled section - * have finished. - */ -int marker_probe_unregister_private_data(marker_probe_func *probe, - void *probe_private) -{ - struct marker_entry *entry; - int ret = 0; - struct marker_probe_closure *old; - - mutex_lock(&markers_mutex); - entry = get_marker_from_private_data(probe, probe_private); - if (!entry) { - ret = -ENOENT; - goto end; - } - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_remove_probe(entry, NULL, probe_private); - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker_from_private_data(probe, probe_private); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); - remove_marker(entry->name); /* Ignore busy error message */ -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); - -/** - * marker_get_private_data - Get a marker's probe private data - * @name: marker name - * @probe: probe to match - * @num: get the nth matching probe's private data - * - * Returns the nth private data pointer (starting from 0) matching, or an - * ERR_PTR. - * Returns the private data pointer, or an ERR_PTR. - * The private data pointer should _only_ be dereferenced if the caller is the - * owner of the data, or its content could vanish. This is mostly used to - * confirm that a caller is the owner of a registered probe. - */ -void *marker_get_private_data(const char *name, marker_probe_func *probe, - int num) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - size_t name_len = strlen(name) + 1; - u32 hash = jhash(name, name_len-1, 0); - int i; - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - if (!e->ptype) { - if (num == 0 && e->single.func == probe) - return e->single.probe_private; - } else { - struct marker_probe_closure *closure; - int match = 0; - closure = e->multi; - for (i = 0; closure[i].func; i++) { - if (closure[i].func != probe) - continue; - if (match++ == num) - return closure[i].probe_private; - } - } - break; - } - } - return ERR_PTR(-ENOENT); -} -EXPORT_SYMBOL_GPL(marker_get_private_data); - -#ifdef CONFIG_MODULES - -int marker_module_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct module *mod = data; - - switch (val) { - case MODULE_STATE_COMING: - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - break; - case MODULE_STATE_GOING: - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - break; - } - return 0; -} - -struct notifier_block marker_module_nb = { - .notifier_call = marker_module_notify, - .priority = 0, -}; - -static int init_markers(void) -{ - return register_module_notifier(&marker_module_nb); -} -__initcall(init_markers); - -#endif /* CONFIG_MODULES */ diff --git a/kernel/module.c b/kernel/module.c index 05ce49c..e6bc4b2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -47,6 +47,7 @@ #include <linux/rculist.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> +#include <asm/mmu_context.h> #include <linux/license.h> #include <asm/sections.h> #include <linux/tracepoint.h> @@ -1535,6 +1536,10 @@ static void free_module(struct module *mod) /* Finally, free the core (containing the module structure) */ module_free(mod, mod->module_core); + +#ifdef CONFIG_MPU + update_protections(current->mm); +#endif } void *__symbol_get(const char *symbol) @@ -2237,10 +2242,6 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->ctors), &mod->num_ctors); #endif -#ifdef CONFIG_MARKERS - mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", - sizeof(*mod->markers), &mod->num_markers); -#endif #ifdef CONFIG_TRACEPOINTS mod->tracepoints = section_objs(hdr, sechdrs, secstrings, "__tracepoints", @@ -2958,20 +2959,6 @@ void module_layout(struct module *mod, EXPORT_SYMBOL(module_layout); #endif -#ifdef CONFIG_MARKERS -void module_update_markers(void) -{ - struct module *mod; - - mutex_lock(&module_mutex); - list_for_each_entry(mod, &modules, list) - if (!mod->taints) - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - mutex_unlock(&module_mutex); -} -#endif - #ifdef CONFIG_TRACEPOINTS void module_update_tracepoints(void) { diff --git a/kernel/perf_counter.c b/kernel/perf_event.c index cc768ab..76ac4db 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_event.c @@ -1,12 +1,12 @@ /* - * Performance counter core code + * Performance events core code: * * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * - * For licensing details see kernel-base/COPYING + * For licensing details see kernel-base/COPYING */ #include <linux/fs.h> @@ -26,66 +26,66 @@ #include <linux/syscalls.h> #include <linux/anon_inodes.h> #include <linux/kernel_stat.h> -#include <linux/perf_counter.h> +#include <linux/perf_event.h> #include <asm/irq_regs.h> /* - * Each CPU has a list of per CPU counters: + * Each CPU has a list of per CPU events: */ DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); -int perf_max_counters __read_mostly = 1; +int perf_max_events __read_mostly = 1; static int perf_reserved_percpu __read_mostly; static int perf_overcommit __read_mostly = 1; -static atomic_t nr_counters __read_mostly; -static atomic_t nr_mmap_counters __read_mostly; -static atomic_t nr_comm_counters __read_mostly; -static atomic_t nr_task_counters __read_mostly; +static atomic_t nr_events __read_mostly; +static atomic_t nr_mmap_events __read_mostly; +static atomic_t nr_comm_events __read_mostly; +static atomic_t nr_task_events __read_mostly; /* - * perf counter paranoia level: + * perf event paranoia level: * -1 - not paranoid at all * 0 - disallow raw tracepoint access for unpriv - * 1 - disallow cpu counters for unpriv + * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv */ -int sysctl_perf_counter_paranoid __read_mostly = 1; +int sysctl_perf_event_paranoid __read_mostly = 1; static inline bool perf_paranoid_tracepoint_raw(void) { - return sysctl_perf_counter_paranoid > -1; + return sysctl_perf_event_paranoid > -1; } static inline bool perf_paranoid_cpu(void) { - return sysctl_perf_counter_paranoid > 0; + return sysctl_perf_event_paranoid > 0; } static inline bool perf_paranoid_kernel(void) { - return sysctl_perf_counter_paranoid > 1; + return sysctl_perf_event_paranoid > 1; } -int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ +int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ /* - * max perf counter sample rate + * max perf event sample rate */ -int sysctl_perf_counter_sample_rate __read_mostly = 100000; +int sysctl_perf_event_sample_rate __read_mostly = 100000; -static atomic64_t perf_counter_id; +static atomic64_t perf_event_id; /* - * Lock for (sysadmin-configurable) counter reservations: + * Lock for (sysadmin-configurable) event reservations: */ static DEFINE_SPINLOCK(perf_resource_lock); /* * Architecture provided APIs - weak aliases: */ -extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter) +extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) { return NULL; } @@ -93,18 +93,18 @@ extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counte void __weak hw_perf_disable(void) { barrier(); } void __weak hw_perf_enable(void) { barrier(); } -void __weak hw_perf_counter_setup(int cpu) { barrier(); } -void __weak hw_perf_counter_setup_online(int cpu) { barrier(); } +void __weak hw_perf_event_setup(int cpu) { barrier(); } +void __weak hw_perf_event_setup_online(int cpu) { barrier(); } int __weak -hw_perf_group_sched_in(struct perf_counter *group_leader, +hw_perf_group_sched_in(struct perf_event *group_leader, struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu) + struct perf_event_context *ctx, int cpu) { return 0; } -void __weak perf_counter_print_debug(void) { } +void __weak perf_event_print_debug(void) { } static DEFINE_PER_CPU(int, perf_disable_count); @@ -130,20 +130,20 @@ void perf_enable(void) hw_perf_enable(); } -static void get_ctx(struct perf_counter_context *ctx) +static void get_ctx(struct perf_event_context *ctx) { WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); } static void free_ctx(struct rcu_head *head) { - struct perf_counter_context *ctx; + struct perf_event_context *ctx; - ctx = container_of(head, struct perf_counter_context, rcu_head); + ctx = container_of(head, struct perf_event_context, rcu_head); kfree(ctx); } -static void put_ctx(struct perf_counter_context *ctx) +static void put_ctx(struct perf_event_context *ctx) { if (atomic_dec_and_test(&ctx->refcount)) { if (ctx->parent_ctx) @@ -154,7 +154,7 @@ static void put_ctx(struct perf_counter_context *ctx) } } -static void unclone_ctx(struct perf_counter_context *ctx) +static void unclone_ctx(struct perf_event_context *ctx) { if (ctx->parent_ctx) { put_ctx(ctx->parent_ctx); @@ -163,37 +163,37 @@ static void unclone_ctx(struct perf_counter_context *ctx) } /* - * If we inherit counters we want to return the parent counter id + * If we inherit events we want to return the parent event id * to userspace. */ -static u64 primary_counter_id(struct perf_counter *counter) +static u64 primary_event_id(struct perf_event *event) { - u64 id = counter->id; + u64 id = event->id; - if (counter->parent) - id = counter->parent->id; + if (event->parent) + id = event->parent->id; return id; } /* - * Get the perf_counter_context for a task and lock it. + * Get the perf_event_context for a task and lock it. * This has to cope with with the fact that until it is locked, * the context could get moved to another task. */ -static struct perf_counter_context * +static struct perf_event_context * perf_lock_task_context(struct task_struct *task, unsigned long *flags) { - struct perf_counter_context *ctx; + struct perf_event_context *ctx; rcu_read_lock(); retry: - ctx = rcu_dereference(task->perf_counter_ctxp); + ctx = rcu_dereference(task->perf_event_ctxp); if (ctx) { /* * If this context is a clone of another, it might * get swapped for another underneath us by - * perf_counter_task_sched_out, though the + * perf_event_task_sched_out, though the * rcu_read_lock() protects us from any context * getting freed. Lock the context and check if it * got swapped before we could get the lock, and retry @@ -201,7 +201,7 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) * can't get swapped on us any more. */ spin_lock_irqsave(&ctx->lock, *flags); - if (ctx != rcu_dereference(task->perf_counter_ctxp)) { + if (ctx != rcu_dereference(task->perf_event_ctxp)) { spin_unlock_irqrestore(&ctx->lock, *flags); goto retry; } @@ -220,9 +220,9 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) * can't get swapped to another task. This also increments its * reference count so that the context can't get freed. */ -static struct perf_counter_context *perf_pin_task_context(struct task_struct *task) +static struct perf_event_context *perf_pin_task_context(struct task_struct *task) { - struct perf_counter_context *ctx; + struct perf_event_context *ctx; unsigned long flags; ctx = perf_lock_task_context(task, &flags); @@ -233,7 +233,7 @@ static struct perf_counter_context *perf_pin_task_context(struct task_struct *ta return ctx; } -static void perf_unpin_context(struct perf_counter_context *ctx) +static void perf_unpin_context(struct perf_event_context *ctx) { unsigned long flags; @@ -244,123 +244,122 @@ static void perf_unpin_context(struct perf_counter_context *ctx) } /* - * Add a counter from the lists for its context. + * Add a event from the lists for its context. * Must be called with ctx->mutex and ctx->lock held. */ static void -list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) +list_add_event(struct perf_event *event, struct perf_event_context *ctx) { - struct perf_counter *group_leader = counter->group_leader; + struct perf_event *group_leader = event->group_leader; /* - * Depending on whether it is a standalone or sibling counter, - * add it straight to the context's counter list, or to the group + * Depending on whether it is a standalone or sibling event, + * add it straight to the context's event list, or to the group * leader's sibling list: */ - if (group_leader == counter) - list_add_tail(&counter->list_entry, &ctx->counter_list); + if (group_leader == event) + list_add_tail(&event->group_entry, &ctx->group_list); else { - list_add_tail(&counter->list_entry, &group_leader->sibling_list); + list_add_tail(&event->group_entry, &group_leader->sibling_list); group_leader->nr_siblings++; } - list_add_rcu(&counter->event_entry, &ctx->event_list); - ctx->nr_counters++; - if (counter->attr.inherit_stat) + list_add_rcu(&event->event_entry, &ctx->event_list); + ctx->nr_events++; + if (event->attr.inherit_stat) ctx->nr_stat++; } /* - * Remove a counter from the lists for its context. + * Remove a event from the lists for its context. * Must be called with ctx->mutex and ctx->lock held. */ static void -list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) +list_del_event(struct perf_event *event, struct perf_event_context *ctx) { - struct perf_counter *sibling, *tmp; + struct perf_event *sibling, *tmp; - if (list_empty(&counter->list_entry)) + if (list_empty(&event->group_entry)) return; - ctx->nr_counters--; - if (counter->attr.inherit_stat) + ctx->nr_events--; + if (event->attr.inherit_stat) ctx->nr_stat--; - list_del_init(&counter->list_entry); - list_del_rcu(&counter->event_entry); + list_del_init(&event->group_entry); + list_del_rcu(&event->event_entry); - if (counter->group_leader != counter) - counter->group_leader->nr_siblings--; + if (event->group_leader != event) + event->group_leader->nr_siblings--; /* - * If this was a group counter with sibling counters then - * upgrade the siblings to singleton counters by adding them + * If this was a group event with sibling events then + * upgrade the siblings to singleton events by adding them * to the context list directly: */ - list_for_each_entry_safe(sibling, tmp, - &counter->sibling_list, list_entry) { + list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { - list_move_tail(&sibling->list_entry, &ctx->counter_list); + list_move_tail(&sibling->group_entry, &ctx->group_list); sibling->group_leader = sibling; } } static void -counter_sched_out(struct perf_counter *counter, +event_sched_out(struct perf_event *event, struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx) + struct perf_event_context *ctx) { - if (counter->state != PERF_COUNTER_STATE_ACTIVE) + if (event->state != PERF_EVENT_STATE_ACTIVE) return; - counter->state = PERF_COUNTER_STATE_INACTIVE; - if (counter->pending_disable) { - counter->pending_disable = 0; - counter->state = PERF_COUNTER_STATE_OFF; + event->state = PERF_EVENT_STATE_INACTIVE; + if (event->pending_disable) { + event->pending_disable = 0; + event->state = PERF_EVENT_STATE_OFF; } - counter->tstamp_stopped = ctx->time; - counter->pmu->disable(counter); - counter->oncpu = -1; + event->tstamp_stopped = ctx->time; + event->pmu->disable(event); + event->oncpu = -1; - if (!is_software_counter(counter)) + if (!is_software_event(event)) cpuctx->active_oncpu--; ctx->nr_active--; - if (counter->attr.exclusive || !cpuctx->active_oncpu) + if (event->attr.exclusive || !cpuctx->active_oncpu) cpuctx->exclusive = 0; } static void -group_sched_out(struct perf_counter *group_counter, +group_sched_out(struct perf_event *group_event, struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx) + struct perf_event_context *ctx) { - struct perf_counter *counter; + struct perf_event *event; - if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) + if (group_event->state != PERF_EVENT_STATE_ACTIVE) return; - counter_sched_out(group_counter, cpuctx, ctx); + event_sched_out(group_event, cpuctx, ctx); /* * Schedule out siblings (if any): */ - list_for_each_entry(counter, &group_counter->sibling_list, list_entry) - counter_sched_out(counter, cpuctx, ctx); + list_for_each_entry(event, &group_event->sibling_list, group_entry) + event_sched_out(event, cpuctx, ctx); - if (group_counter->attr.exclusive) + if (group_event->attr.exclusive) cpuctx->exclusive = 0; } /* - * Cross CPU call to remove a performance counter + * Cross CPU call to remove a performance event * - * We disable the counter on the hardware level first. After that we + * We disable the event on the hardware level first. After that we * remove it from the context list. */ -static void __perf_counter_remove_from_context(void *info) +static void __perf_event_remove_from_context(void *info) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; + struct perf_event *event = info; + struct perf_event_context *ctx = event->ctx; /* * If this is a task context, we need to check whether it is @@ -373,22 +372,22 @@ static void __perf_counter_remove_from_context(void *info) spin_lock(&ctx->lock); /* * Protect the list operation against NMI by disabling the - * counters on a global level. + * events on a global level. */ perf_disable(); - counter_sched_out(counter, cpuctx, ctx); + event_sched_out(event, cpuctx, ctx); - list_del_counter(counter, ctx); + list_del_event(event, ctx); if (!ctx->task) { /* - * Allow more per task counters with respect to the + * Allow more per task events with respect to the * reservation: */ cpuctx->max_pertask = - min(perf_max_counters - ctx->nr_counters, - perf_max_counters - perf_reserved_percpu); + min(perf_max_events - ctx->nr_events, + perf_max_events - perf_reserved_percpu); } perf_enable(); @@ -397,56 +396,56 @@ static void __perf_counter_remove_from_context(void *info) /* - * Remove the counter from a task's (or a CPU's) list of counters. + * Remove the event from a task's (or a CPU's) list of events. * * Must be called with ctx->mutex held. * - * CPU counters are removed with a smp call. For task counters we only + * CPU events are removed with a smp call. For task events we only * call when the task is on a CPU. * - * If counter->ctx is a cloned context, callers must make sure that - * every task struct that counter->ctx->task could possibly point to + * If event->ctx is a cloned context, callers must make sure that + * every task struct that event->ctx->task could possibly point to * remains valid. This is OK when called from perf_release since * that only calls us on the top-level context, which can't be a clone. - * When called from perf_counter_exit_task, it's OK because the + * When called from perf_event_exit_task, it's OK because the * context has been detached from its task. */ -static void perf_counter_remove_from_context(struct perf_counter *counter) +static void perf_event_remove_from_context(struct perf_event *event) { - struct perf_counter_context *ctx = counter->ctx; + struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; if (!task) { /* - * Per cpu counters are removed via an smp call and + * Per cpu events are removed via an smp call and * the removal is always sucessful. */ - smp_call_function_single(counter->cpu, - __perf_counter_remove_from_context, - counter, 1); + smp_call_function_single(event->cpu, + __perf_event_remove_from_context, + event, 1); return; } retry: - task_oncpu_function_call(task, __perf_counter_remove_from_context, - counter); + task_oncpu_function_call(task, __perf_event_remove_from_context, + event); spin_lock_irq(&ctx->lock); /* * If the context is active we need to retry the smp call. */ - if (ctx->nr_active && !list_empty(&counter->list_entry)) { + if (ctx->nr_active && !list_empty(&event->group_entry)) { spin_unlock_irq(&ctx->lock); goto retry; } /* * The lock prevents that this context is scheduled in so we - * can remove the counter safely, if the call above did not + * can remove the event safely, if the call above did not * succeed. */ - if (!list_empty(&counter->list_entry)) { - list_del_counter(counter, ctx); + if (!list_empty(&event->group_entry)) { + list_del_event(event, ctx); } spin_unlock_irq(&ctx->lock); } @@ -459,7 +458,7 @@ static inline u64 perf_clock(void) /* * Update the record of the current time in a context. */ -static void update_context_time(struct perf_counter_context *ctx) +static void update_context_time(struct perf_event_context *ctx) { u64 now = perf_clock(); @@ -468,51 +467,51 @@ static void update_context_time(struct perf_counter_context *ctx) } /* - * Update the total_time_enabled and total_time_running fields for a counter. + * Update the total_time_enabled and total_time_running fields for a event. */ -static void update_counter_times(struct perf_counter *counter) +static void update_event_times(struct perf_event *event) { - struct perf_counter_context *ctx = counter->ctx; + struct perf_event_context *ctx = event->ctx; u64 run_end; - if (counter->state < PERF_COUNTER_STATE_INACTIVE || - counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE) + if (event->state < PERF_EVENT_STATE_INACTIVE || + event->group_leader->state < PERF_EVENT_STATE_INACTIVE) return; - counter->total_time_enabled = ctx->time - counter->tstamp_enabled; + event->total_time_enabled = ctx->time - event->tstamp_enabled; - if (counter->state == PERF_COUNTER_STATE_INACTIVE) - run_end = counter->tstamp_stopped; + if (event->state == PERF_EVENT_STATE_INACTIVE) + run_end = event->tstamp_stopped; else run_end = ctx->time; - counter->total_time_running = run_end - counter->tstamp_running; + event->total_time_running = run_end - event->tstamp_running; } /* - * Update total_time_enabled and total_time_running for all counters in a group. + * Update total_time_enabled and total_time_running for all events in a group. */ -static void update_group_times(struct perf_counter *leader) +static void update_group_times(struct perf_event *leader) { - struct perf_counter *counter; + struct perf_event *event; - update_counter_times(leader); - list_for_each_entry(counter, &leader->sibling_list, list_entry) - update_counter_times(counter); + update_event_times(leader); + list_for_each_entry(event, &leader->sibling_list, group_entry) + update_event_times(event); } /* - * Cross CPU call to disable a performance counter + * Cross CPU call to disable a performance event */ -static void __perf_counter_disable(void *info) +static void __perf_event_disable(void *info) { - struct perf_counter *counter = info; + struct perf_event *event = info; struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter_context *ctx = counter->ctx; + struct perf_event_context *ctx = event->ctx; /* - * If this is a per-task counter, need to check whether this - * counter's task is the current task on this cpu. + * If this is a per-task event, need to check whether this + * event's task is the current task on this cpu. */ if (ctx->task && cpuctx->task_ctx != ctx) return; @@ -520,57 +519,57 @@ static void __perf_counter_disable(void *info) spin_lock(&ctx->lock); /* - * If the counter is on, turn it off. + * If the event is on, turn it off. * If it is in error state, leave it in error state. */ - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { + if (event->state >= PERF_EVENT_STATE_INACTIVE) { update_context_time(ctx); - update_group_times(counter); - if (counter == counter->group_leader) - group_sched_out(counter, cpuctx, ctx); + update_group_times(event); + if (event == event->group_leader) + group_sched_out(event, cpuctx, ctx); else - counter_sched_out(counter, cpuctx, ctx); - counter->state = PERF_COUNTER_STATE_OFF; + event_sched_out(event, cpuctx, ctx); + event->state = PERF_EVENT_STATE_OFF; } spin_unlock(&ctx->lock); } /* - * Disable a counter. + * Disable a event. * - * If counter->ctx is a cloned context, callers must make sure that - * every task struct that counter->ctx->task could possibly point to + * If event->ctx is a cloned context, callers must make sure that + * every task struct that event->ctx->task could possibly point to * remains valid. This condition is satisifed when called through - * perf_counter_for_each_child or perf_counter_for_each because they - * hold the top-level counter's child_mutex, so any descendant that - * goes to exit will block in sync_child_counter. - * When called from perf_pending_counter it's OK because counter->ctx + * perf_event_for_each_child or perf_event_for_each because they + * hold the top-level event's child_mutex, so any descendant that + * goes to exit will block in sync_child_event. + * When called from perf_pending_event it's OK because event->ctx * is the current context on this CPU and preemption is disabled, - * hence we can't get into perf_counter_task_sched_out for this context. + * hence we can't get into perf_event_task_sched_out for this context. */ -static void perf_counter_disable(struct perf_counter *counter) +static void perf_event_disable(struct perf_event *event) { - struct perf_counter_context *ctx = counter->ctx; + struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; if (!task) { /* - * Disable the counter on the cpu that it's on + * Disable the event on the cpu that it's on */ - smp_call_function_single(counter->cpu, __perf_counter_disable, - counter, 1); + smp_call_function_single(event->cpu, __perf_event_disable, + event, 1); return; } retry: - task_oncpu_function_call(task, __perf_counter_disable, counter); + task_oncpu_function_call(task, __perf_event_disable, event); spin_lock_irq(&ctx->lock); /* - * If the counter is still active, we need to retry the cross-call. + * If the event is still active, we need to retry the cross-call. */ - if (counter->state == PERF_COUNTER_STATE_ACTIVE) { + if (event->state == PERF_EVENT_STATE_ACTIVE) { spin_unlock_irq(&ctx->lock); goto retry; } @@ -579,73 +578,73 @@ static void perf_counter_disable(struct perf_counter *counter) * Since we have the lock this context can't be scheduled * in, so we can change the state safely. */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_group_times(counter); - counter->state = PERF_COUNTER_STATE_OFF; + if (event->state == PERF_EVENT_STATE_INACTIVE) { + update_group_times(event); + event->state = PERF_EVENT_STATE_OFF; } spin_unlock_irq(&ctx->lock); } static int -counter_sched_in(struct perf_counter *counter, +event_sched_in(struct perf_event *event, struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, + struct perf_event_context *ctx, int cpu) { - if (counter->state <= PERF_COUNTER_STATE_OFF) + if (event->state <= PERF_EVENT_STATE_OFF) return 0; - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ + event->state = PERF_EVENT_STATE_ACTIVE; + event->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ /* * The new state must be visible before we turn it on in the hardware: */ smp_wmb(); - if (counter->pmu->enable(counter)) { - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->oncpu = -1; + if (event->pmu->enable(event)) { + event->state = PERF_EVENT_STATE_INACTIVE; + event->oncpu = -1; return -EAGAIN; } - counter->tstamp_running += ctx->time - counter->tstamp_stopped; + event->tstamp_running += ctx->time - event->tstamp_stopped; - if (!is_software_counter(counter)) + if (!is_software_event(event)) cpuctx->active_oncpu++; ctx->nr_active++; - if (counter->attr.exclusive) + if (event->attr.exclusive) cpuctx->exclusive = 1; return 0; } static int -group_sched_in(struct perf_counter *group_counter, +group_sched_in(struct perf_event *group_event, struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, + struct perf_event_context *ctx, int cpu) { - struct perf_counter *counter, *partial_group; + struct perf_event *event, *partial_group; int ret; - if (group_counter->state == PERF_COUNTER_STATE_OFF) + if (group_event->state == PERF_EVENT_STATE_OFF) return 0; - ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu); + ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu); if (ret) return ret < 0 ? ret : 0; - if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) + if (event_sched_in(group_event, cpuctx, ctx, cpu)) return -EAGAIN; /* * Schedule in siblings as one group (if any): */ - list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { - if (counter_sched_in(counter, cpuctx, ctx, cpu)) { - partial_group = counter; + list_for_each_entry(event, &group_event->sibling_list, group_entry) { + if (event_sched_in(event, cpuctx, ctx, cpu)) { + partial_group = event; goto group_error; } } @@ -657,57 +656,57 @@ group_error: * Groups can be scheduled in as one unit only, so undo any * partial group before returning: */ - list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { - if (counter == partial_group) + list_for_each_entry(event, &group_event->sibling_list, group_entry) { + if (event == partial_group) break; - counter_sched_out(counter, cpuctx, ctx); + event_sched_out(event, cpuctx, ctx); } - counter_sched_out(group_counter, cpuctx, ctx); + event_sched_out(group_event, cpuctx, ctx); return -EAGAIN; } /* - * Return 1 for a group consisting entirely of software counters, - * 0 if the group contains any hardware counters. + * Return 1 for a group consisting entirely of software events, + * 0 if the group contains any hardware events. */ -static int is_software_only_group(struct perf_counter *leader) +static int is_software_only_group(struct perf_event *leader) { - struct perf_counter *counter; + struct perf_event *event; - if (!is_software_counter(leader)) + if (!is_software_event(leader)) return 0; - list_for_each_entry(counter, &leader->sibling_list, list_entry) - if (!is_software_counter(counter)) + list_for_each_entry(event, &leader->sibling_list, group_entry) + if (!is_software_event(event)) return 0; return 1; } /* - * Work out whether we can put this counter group on the CPU now. + * Work out whether we can put this event group on the CPU now. */ -static int group_can_go_on(struct perf_counter *counter, +static int group_can_go_on(struct perf_event *event, struct perf_cpu_context *cpuctx, int can_add_hw) { /* - * Groups consisting entirely of software counters can always go on. + * Groups consisting entirely of software events can always go on. */ - if (is_software_only_group(counter)) + if (is_software_only_group(event)) return 1; /* * If an exclusive group is already on, no other hardware - * counters can go on. + * events can go on. */ if (cpuctx->exclusive) return 0; /* * If this group is exclusive and there are already - * counters on the CPU, it can't go on. + * events on the CPU, it can't go on. */ - if (counter->attr.exclusive && cpuctx->active_oncpu) + if (event->attr.exclusive && cpuctx->active_oncpu) return 0; /* * Otherwise, try to add it if all previous groups were able @@ -716,26 +715,26 @@ static int group_can_go_on(struct perf_counter *counter, return can_add_hw; } -static void add_counter_to_ctx(struct perf_counter *counter, - struct perf_counter_context *ctx) +static void add_event_to_ctx(struct perf_event *event, + struct perf_event_context *ctx) { - list_add_counter(counter, ctx); - counter->tstamp_enabled = ctx->time; - counter->tstamp_running = ctx->time; - counter->tstamp_stopped = ctx->time; + list_add_event(event, ctx); + event->tstamp_enabled = ctx->time; + event->tstamp_running = ctx->time; + event->tstamp_stopped = ctx->time; } /* - * Cross CPU call to install and enable a performance counter + * Cross CPU call to install and enable a performance event * * Must be called with ctx->mutex held */ static void __perf_install_in_context(void *info) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *leader = counter->group_leader; + struct perf_event *event = info; + struct perf_event_context *ctx = event->ctx; + struct perf_event *leader = event->group_leader; int cpu = smp_processor_id(); int err; @@ -744,7 +743,7 @@ static void __perf_install_in_context(void *info) * the current task context of this cpu. If not it has been * scheduled out before the smp call arrived. * Or possibly this is the right context but it isn't - * on this cpu because it had no counters. + * on this cpu because it had no events. */ if (ctx->task && cpuctx->task_ctx != ctx) { if (cpuctx->task_ctx || ctx->task != current) @@ -758,41 +757,41 @@ static void __perf_install_in_context(void *info) /* * Protect the list operation against NMI by disabling the - * counters on a global level. NOP for non NMI based counters. + * events on a global level. NOP for non NMI based events. */ perf_disable(); - add_counter_to_ctx(counter, ctx); + add_event_to_ctx(event, ctx); /* - * Don't put the counter on if it is disabled or if + * Don't put the event on if it is disabled or if * it is in a group and the group isn't on. */ - if (counter->state != PERF_COUNTER_STATE_INACTIVE || - (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)) + if (event->state != PERF_EVENT_STATE_INACTIVE || + (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)) goto unlock; /* - * An exclusive counter can't go on if there are already active - * hardware counters, and no hardware counter can go on if there - * is already an exclusive counter on. + * An exclusive event can't go on if there are already active + * hardware events, and no hardware event can go on if there + * is already an exclusive event on. */ - if (!group_can_go_on(counter, cpuctx, 1)) + if (!group_can_go_on(event, cpuctx, 1)) err = -EEXIST; else - err = counter_sched_in(counter, cpuctx, ctx, cpu); + err = event_sched_in(event, cpuctx, ctx, cpu); if (err) { /* - * This counter couldn't go on. If it is in a group + * This event couldn't go on. If it is in a group * then we have to pull the whole group off. - * If the counter group is pinned then put it in error state. + * If the event group is pinned then put it in error state. */ - if (leader != counter) + if (leader != event) group_sched_out(leader, cpuctx, ctx); if (leader->attr.pinned) { update_group_times(leader); - leader->state = PERF_COUNTER_STATE_ERROR; + leader->state = PERF_EVENT_STATE_ERROR; } } @@ -806,92 +805,92 @@ static void __perf_install_in_context(void *info) } /* - * Attach a performance counter to a context + * Attach a performance event to a context * - * First we add the counter to the list with the hardware enable bit - * in counter->hw_config cleared. + * First we add the event to the list with the hardware enable bit + * in event->hw_config cleared. * - * If the counter is attached to a task which is on a CPU we use a smp + * If the event is attached to a task which is on a CPU we use a smp * call to enable it in the task context. The task might have been * scheduled away, but we check this in the smp call again. * * Must be called with ctx->mutex held. */ static void -perf_install_in_context(struct perf_counter_context *ctx, - struct perf_counter *counter, +perf_install_in_context(struct perf_event_context *ctx, + struct perf_event *event, int cpu) { struct task_struct *task = ctx->task; if (!task) { /* - * Per cpu counters are installed via an smp call and + * Per cpu events are installed via an smp call and * the install is always sucessful. */ smp_call_function_single(cpu, __perf_install_in_context, - counter, 1); + event, 1); return; } retry: task_oncpu_function_call(task, __perf_install_in_context, - counter); + event); spin_lock_irq(&ctx->lock); /* * we need to retry the smp call. */ - if (ctx->is_active && list_empty(&counter->list_entry)) { + if (ctx->is_active && list_empty(&event->group_entry)) { spin_unlock_irq(&ctx->lock); goto retry; } /* * The lock prevents that this context is scheduled in so we - * can add the counter safely, if it the call above did not + * can add the event safely, if it the call above did not * succeed. */ - if (list_empty(&counter->list_entry)) - add_counter_to_ctx(counter, ctx); + if (list_empty(&event->group_entry)) + add_event_to_ctx(event, ctx); spin_unlock_irq(&ctx->lock); } /* - * Put a counter into inactive state and update time fields. + * Put a event into inactive state and update time fields. * Enabling the leader of a group effectively enables all * the group members that aren't explicitly disabled, so we * have to update their ->tstamp_enabled also. * Note: this works for group members as well as group leaders * since the non-leader members' sibling_lists will be empty. */ -static void __perf_counter_mark_enabled(struct perf_counter *counter, - struct perf_counter_context *ctx) +static void __perf_event_mark_enabled(struct perf_event *event, + struct perf_event_context *ctx) { - struct perf_counter *sub; + struct perf_event *sub; - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time - counter->total_time_enabled; - list_for_each_entry(sub, &counter->sibling_list, list_entry) - if (sub->state >= PERF_COUNTER_STATE_INACTIVE) + event->state = PERF_EVENT_STATE_INACTIVE; + event->tstamp_enabled = ctx->time - event->total_time_enabled; + list_for_each_entry(sub, &event->sibling_list, group_entry) + if (sub->state >= PERF_EVENT_STATE_INACTIVE) sub->tstamp_enabled = ctx->time - sub->total_time_enabled; } /* - * Cross CPU call to enable a performance counter + * Cross CPU call to enable a performance event */ -static void __perf_counter_enable(void *info) +static void __perf_event_enable(void *info) { - struct perf_counter *counter = info; + struct perf_event *event = info; struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *leader = counter->group_leader; + struct perf_event_context *ctx = event->ctx; + struct perf_event *leader = event->group_leader; int err; /* - * If this is a per-task counter, need to check whether this - * counter's task is the current task on this cpu. + * If this is a per-task event, need to check whether this + * event's task is the current task on this cpu. */ if (ctx->task && cpuctx->task_ctx != ctx) { if (cpuctx->task_ctx || ctx->task != current) @@ -903,40 +902,40 @@ static void __perf_counter_enable(void *info) ctx->is_active = 1; update_context_time(ctx); - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) + if (event->state >= PERF_EVENT_STATE_INACTIVE) goto unlock; - __perf_counter_mark_enabled(counter, ctx); + __perf_event_mark_enabled(event, ctx); /* - * If the counter is in a group and isn't the group leader, + * If the event is in a group and isn't the group leader, * then don't put it on unless the group is on. */ - if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE) + if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) goto unlock; - if (!group_can_go_on(counter, cpuctx, 1)) { + if (!group_can_go_on(event, cpuctx, 1)) { err = -EEXIST; } else { perf_disable(); - if (counter == leader) - err = group_sched_in(counter, cpuctx, ctx, + if (event == leader) + err = group_sched_in(event, cpuctx, ctx, smp_processor_id()); else - err = counter_sched_in(counter, cpuctx, ctx, + err = event_sched_in(event, cpuctx, ctx, smp_processor_id()); perf_enable(); } if (err) { /* - * If this counter can't go on and it's part of a + * If this event can't go on and it's part of a * group, then the whole group has to come off. */ - if (leader != counter) + if (leader != event) group_sched_out(leader, cpuctx, ctx); if (leader->attr.pinned) { update_group_times(leader); - leader->state = PERF_COUNTER_STATE_ERROR; + leader->state = PERF_EVENT_STATE_ERROR; } } @@ -945,98 +944,98 @@ static void __perf_counter_enable(void *info) } /* - * Enable a counter. + * Enable a event. * - * If counter->ctx is a cloned context, callers must make sure that - * every task struct that counter->ctx->task could possibly point to + * If event->ctx is a cloned context, callers must make sure that + * every task struct that event->ctx->task could possibly point to * remains valid. This condition is satisfied when called through - * perf_counter_for_each_child or perf_counter_for_each as described - * for perf_counter_disable. + * perf_event_for_each_child or perf_event_for_each as described + * for perf_event_disable. */ -static void perf_counter_enable(struct perf_counter *counter) +static void perf_event_enable(struct perf_event *event) { - struct perf_counter_context *ctx = counter->ctx; + struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; if (!task) { /* - * Enable the counter on the cpu that it's on + * Enable the event on the cpu that it's on */ - smp_call_function_single(counter->cpu, __perf_counter_enable, - counter, 1); + smp_call_function_single(event->cpu, __perf_event_enable, + event, 1); return; } spin_lock_irq(&ctx->lock); - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) + if (event->state >= PERF_EVENT_STATE_INACTIVE) goto out; /* - * If the counter is in error state, clear that first. - * That way, if we see the counter in error state below, we + * If the event is in error state, clear that first. + * That way, if we see the event in error state below, we * know that it has gone back into error state, as distinct * from the task having been scheduled away before the * cross-call arrived. */ - if (counter->state == PERF_COUNTER_STATE_ERROR) - counter->state = PERF_COUNTER_STATE_OFF; + if (event->state == PERF_EVENT_STATE_ERROR) + event->state = PERF_EVENT_STATE_OFF; retry: spin_unlock_irq(&ctx->lock); - task_oncpu_function_call(task, __perf_counter_enable, counter); + task_oncpu_function_call(task, __perf_event_enable, event); spin_lock_irq(&ctx->lock); /* - * If the context is active and the counter is still off, + * If the context is active and the event is still off, * we need to retry the cross-call. */ - if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF) + if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) goto retry; /* * Since we have the lock this context can't be scheduled * in, so we can change the state safely. */ - if (counter->state == PERF_COUNTER_STATE_OFF) - __perf_counter_mark_enabled(counter, ctx); + if (event->state == PERF_EVENT_STATE_OFF) + __perf_event_mark_enabled(event, ctx); out: spin_unlock_irq(&ctx->lock); } -static int perf_counter_refresh(struct perf_counter *counter, int refresh) +static int perf_event_refresh(struct perf_event *event, int refresh) { /* - * not supported on inherited counters + * not supported on inherited events */ - if (counter->attr.inherit) + if (event->attr.inherit) return -EINVAL; - atomic_add(refresh, &counter->event_limit); - perf_counter_enable(counter); + atomic_add(refresh, &event->event_limit); + perf_event_enable(event); return 0; } -void __perf_counter_sched_out(struct perf_counter_context *ctx, +void __perf_event_sched_out(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx) { - struct perf_counter *counter; + struct perf_event *event; spin_lock(&ctx->lock); ctx->is_active = 0; - if (likely(!ctx->nr_counters)) + if (likely(!ctx->nr_events)) goto out; update_context_time(ctx); perf_disable(); if (ctx->nr_active) { - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter != counter->group_leader) - counter_sched_out(counter, cpuctx, ctx); + list_for_each_entry(event, &ctx->group_list, group_entry) { + if (event != event->group_leader) + event_sched_out(event, cpuctx, ctx); else - group_sched_out(counter, cpuctx, ctx); + group_sched_out(event, cpuctx, ctx); } } perf_enable(); @@ -1047,46 +1046,46 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, /* * Test whether two contexts are equivalent, i.e. whether they * have both been cloned from the same version of the same context - * and they both have the same number of enabled counters. - * If the number of enabled counters is the same, then the set - * of enabled counters should be the same, because these are both - * inherited contexts, therefore we can't access individual counters + * and they both have the same number of enabled events. + * If the number of enabled events is the same, then the set + * of enabled events should be the same, because these are both + * inherited contexts, therefore we can't access individual events * in them directly with an fd; we can only enable/disable all - * counters via prctl, or enable/disable all counters in a family + * events via prctl, or enable/disable all events in a family * via ioctl, which will have the same effect on both contexts. */ -static int context_equiv(struct perf_counter_context *ctx1, - struct perf_counter_context *ctx2) +static int context_equiv(struct perf_event_context *ctx1, + struct perf_event_context *ctx2) { return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx && ctx1->parent_gen == ctx2->parent_gen && !ctx1->pin_count && !ctx2->pin_count; } -static void __perf_counter_read(void *counter); +static void __perf_event_read(void *event); -static void __perf_counter_sync_stat(struct perf_counter *counter, - struct perf_counter *next_counter) +static void __perf_event_sync_stat(struct perf_event *event, + struct perf_event *next_event) { u64 value; - if (!counter->attr.inherit_stat) + if (!event->attr.inherit_stat) return; /* - * Update the counter value, we cannot use perf_counter_read() + * Update the event value, we cannot use perf_event_read() * because we're in the middle of a context switch and have IRQs * disabled, which upsets smp_call_function_single(), however - * we know the counter must be on the current CPU, therefore we + * we know the event must be on the current CPU, therefore we * don't need to use it. */ - switch (counter->state) { - case PERF_COUNTER_STATE_ACTIVE: - __perf_counter_read(counter); + switch (event->state) { + case PERF_EVENT_STATE_ACTIVE: + __perf_event_read(event); break; - case PERF_COUNTER_STATE_INACTIVE: - update_counter_times(counter); + case PERF_EVENT_STATE_INACTIVE: + update_event_times(event); break; default: @@ -1094,73 +1093,73 @@ static void __perf_counter_sync_stat(struct perf_counter *counter, } /* - * In order to keep per-task stats reliable we need to flip the counter + * In order to keep per-task stats reliable we need to flip the event * values when we flip the contexts. */ - value = atomic64_read(&next_counter->count); - value = atomic64_xchg(&counter->count, value); - atomic64_set(&next_counter->count, value); + value = atomic64_read(&next_event->count); + value = atomic64_xchg(&event->count, value); + atomic64_set(&next_event->count, value); - swap(counter->total_time_enabled, next_counter->total_time_enabled); - swap(counter->total_time_running, next_counter->total_time_running); + swap(event->total_time_enabled, next_event->total_time_enabled); + swap(event->total_time_running, next_event->total_time_running); /* * Since we swizzled the values, update the user visible data too. */ - perf_counter_update_userpage(counter); - perf_counter_update_userpage(next_counter); + perf_event_update_userpage(event); + perf_event_update_userpage(next_event); } #define list_next_entry(pos, member) \ list_entry(pos->member.next, typeof(*pos), member) -static void perf_counter_sync_stat(struct perf_counter_context *ctx, - struct perf_counter_context *next_ctx) +static void perf_event_sync_stat(struct perf_event_context *ctx, + struct perf_event_context *next_ctx) { - struct perf_counter *counter, *next_counter; + struct perf_event *event, *next_event; if (!ctx->nr_stat) return; - counter = list_first_entry(&ctx->event_list, - struct perf_counter, event_entry); + event = list_first_entry(&ctx->event_list, + struct perf_event, event_entry); - next_counter = list_first_entry(&next_ctx->event_list, - struct perf_counter, event_entry); + next_event = list_first_entry(&next_ctx->event_list, + struct perf_event, event_entry); - while (&counter->event_entry != &ctx->event_list && - &next_counter->event_entry != &next_ctx->event_list) { + while (&event->event_entry != &ctx->event_list && + &next_event->event_entry != &next_ctx->event_list) { - __perf_counter_sync_stat(counter, next_counter); + __perf_event_sync_stat(event, next_event); - counter = list_next_entry(counter, event_entry); - next_counter = list_next_entry(next_counter, event_entry); + event = list_next_entry(event, event_entry); + next_event = list_next_entry(next_event, event_entry); } } /* - * Called from scheduler to remove the counters of the current task, + * Called from scheduler to remove the events of the current task, * with interrupts disabled. * - * We stop each counter and update the counter value in counter->count. + * We stop each event and update the event value in event->count. * * This does not protect us against NMI, but disable() - * sets the disabled bit in the control field of counter _before_ - * accessing the counter control register. If a NMI hits, then it will - * not restart the counter. + * sets the disabled bit in the control field of event _before_ + * accessing the event control register. If a NMI hits, then it will + * not restart the event. */ -void perf_counter_task_sched_out(struct task_struct *task, +void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next, int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = task->perf_counter_ctxp; - struct perf_counter_context *next_ctx; - struct perf_counter_context *parent; + struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_event_context *next_ctx; + struct perf_event_context *parent; struct pt_regs *regs; int do_switch = 1; regs = task_pt_regs(task); - perf_swcounter_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0); + perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0); if (likely(!ctx || !cpuctx->task_ctx)) return; @@ -1169,7 +1168,7 @@ void perf_counter_task_sched_out(struct task_struct *task, rcu_read_lock(); parent = rcu_dereference(ctx->parent_ctx); - next_ctx = next->perf_counter_ctxp; + next_ctx = next->perf_event_ctxp; if (parent && next_ctx && rcu_dereference(next_ctx->parent_ctx) == parent) { /* @@ -1186,15 +1185,15 @@ void perf_counter_task_sched_out(struct task_struct *task, if (context_equiv(ctx, next_ctx)) { /* * XXX do we need a memory barrier of sorts - * wrt to rcu_dereference() of perf_counter_ctxp + * wrt to rcu_dereference() of perf_event_ctxp */ - task->perf_counter_ctxp = next_ctx; - next->perf_counter_ctxp = ctx; + task->perf_event_ctxp = next_ctx; + next->perf_event_ctxp = ctx; ctx->task = next; next_ctx->task = task; do_switch = 0; - perf_counter_sync_stat(ctx, next_ctx); + perf_event_sync_stat(ctx, next_ctx); } spin_unlock(&next_ctx->lock); spin_unlock(&ctx->lock); @@ -1202,7 +1201,7 @@ void perf_counter_task_sched_out(struct task_struct *task, rcu_read_unlock(); if (do_switch) { - __perf_counter_sched_out(ctx, cpuctx); + __perf_event_sched_out(ctx, cpuctx); cpuctx->task_ctx = NULL; } } @@ -1210,7 +1209,7 @@ void perf_counter_task_sched_out(struct task_struct *task, /* * Called with IRQs disabled */ -static void __perf_counter_task_sched_out(struct perf_counter_context *ctx) +static void __perf_event_task_sched_out(struct perf_event_context *ctx) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); @@ -1220,28 +1219,28 @@ static void __perf_counter_task_sched_out(struct perf_counter_context *ctx) if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) return; - __perf_counter_sched_out(ctx, cpuctx); + __perf_event_sched_out(ctx, cpuctx); cpuctx->task_ctx = NULL; } /* * Called with IRQs disabled */ -static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx) +static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) { - __perf_counter_sched_out(&cpuctx->ctx, cpuctx); + __perf_event_sched_out(&cpuctx->ctx, cpuctx); } static void -__perf_counter_sched_in(struct perf_counter_context *ctx, +__perf_event_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, int cpu) { - struct perf_counter *counter; + struct perf_event *event; int can_add_hw = 1; spin_lock(&ctx->lock); ctx->is_active = 1; - if (likely(!ctx->nr_counters)) + if (likely(!ctx->nr_events)) goto out; ctx->timestamp = perf_clock(); @@ -1252,52 +1251,52 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, * First go through the list and put on any pinned groups * in order to give them the best chance of going on. */ - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter->state <= PERF_COUNTER_STATE_OFF || - !counter->attr.pinned) + list_for_each_entry(event, &ctx->group_list, group_entry) { + if (event->state <= PERF_EVENT_STATE_OFF || + !event->attr.pinned) continue; - if (counter->cpu != -1 && counter->cpu != cpu) + if (event->cpu != -1 && event->cpu != cpu) continue; - if (counter != counter->group_leader) - counter_sched_in(counter, cpuctx, ctx, cpu); + if (event != event->group_leader) + event_sched_in(event, cpuctx, ctx, cpu); else { - if (group_can_go_on(counter, cpuctx, 1)) - group_sched_in(counter, cpuctx, ctx, cpu); + if (group_can_go_on(event, cpuctx, 1)) + group_sched_in(event, cpuctx, ctx, cpu); } /* * If this pinned group hasn't been scheduled, * put it in error state. */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_group_times(counter); - counter->state = PERF_COUNTER_STATE_ERROR; + if (event->state == PERF_EVENT_STATE_INACTIVE) { + update_group_times(event); + event->state = PERF_EVENT_STATE_ERROR; } } - list_for_each_entry(counter, &ctx->counter_list, list_entry) { + list_for_each_entry(event, &ctx->group_list, group_entry) { /* - * Ignore counters in OFF or ERROR state, and - * ignore pinned counters since we did them already. + * Ignore events in OFF or ERROR state, and + * ignore pinned events since we did them already. */ - if (counter->state <= PERF_COUNTER_STATE_OFF || - counter->attr.pinned) + if (event->state <= PERF_EVENT_STATE_OFF || + event->attr.pinned) continue; /* * Listen to the 'cpu' scheduling filter constraint - * of counters: + * of events: */ - if (counter->cpu != -1 && counter->cpu != cpu) + if (event->cpu != -1 && event->cpu != cpu) continue; - if (counter != counter->group_leader) { - if (counter_sched_in(counter, cpuctx, ctx, cpu)) + if (event != event->group_leader) { + if (event_sched_in(event, cpuctx, ctx, cpu)) can_add_hw = 0; } else { - if (group_can_go_on(counter, cpuctx, can_add_hw)) { - if (group_sched_in(counter, cpuctx, ctx, cpu)) + if (group_can_go_on(event, cpuctx, can_add_hw)) { + if (group_sched_in(event, cpuctx, ctx, cpu)) can_add_hw = 0; } } @@ -1308,48 +1307,48 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, } /* - * Called from scheduler to add the counters of the current task + * Called from scheduler to add the events of the current task * with interrupts disabled. * - * We restore the counter value and then enable it. + * We restore the event value and then enable it. * * This does not protect us against NMI, but enable() - * sets the enabled bit in the control field of counter _before_ - * accessing the counter control register. If a NMI hits, then it will - * keep the counter running. + * sets the enabled bit in the control field of event _before_ + * accessing the event control register. If a NMI hits, then it will + * keep the event running. */ -void perf_counter_task_sched_in(struct task_struct *task, int cpu) +void perf_event_task_sched_in(struct task_struct *task, int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = task->perf_counter_ctxp; + struct perf_event_context *ctx = task->perf_event_ctxp; if (likely(!ctx)) return; if (cpuctx->task_ctx == ctx) return; - __perf_counter_sched_in(ctx, cpuctx, cpu); + __perf_event_sched_in(ctx, cpuctx, cpu); cpuctx->task_ctx = ctx; } -static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) +static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) { - struct perf_counter_context *ctx = &cpuctx->ctx; + struct perf_event_context *ctx = &cpuctx->ctx; - __perf_counter_sched_in(ctx, cpuctx, cpu); + __perf_event_sched_in(ctx, cpuctx, cpu); } #define MAX_INTERRUPTS (~0ULL) -static void perf_log_throttle(struct perf_counter *counter, int enable); +static void perf_log_throttle(struct perf_event *event, int enable); -static void perf_adjust_period(struct perf_counter *counter, u64 events) +static void perf_adjust_period(struct perf_event *event, u64 events) { - struct hw_perf_counter *hwc = &counter->hw; + struct hw_perf_event *hwc = &event->hw; u64 period, sample_period; s64 delta; events *= hwc->sample_period; - period = div64_u64(events, counter->attr.sample_freq); + period = div64_u64(events, event->attr.sample_freq); delta = (s64)(period - hwc->sample_period); delta = (delta + 7) / 8; /* low pass filter */ @@ -1362,39 +1361,39 @@ static void perf_adjust_period(struct perf_counter *counter, u64 events) hwc->sample_period = sample_period; } -static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) +static void perf_ctx_adjust_freq(struct perf_event_context *ctx) { - struct perf_counter *counter; - struct hw_perf_counter *hwc; + struct perf_event *event; + struct hw_perf_event *hwc; u64 interrupts, freq; spin_lock(&ctx->lock); - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter->state != PERF_COUNTER_STATE_ACTIVE) + list_for_each_entry(event, &ctx->group_list, group_entry) { + if (event->state != PERF_EVENT_STATE_ACTIVE) continue; - hwc = &counter->hw; + hwc = &event->hw; interrupts = hwc->interrupts; hwc->interrupts = 0; /* - * unthrottle counters on the tick + * unthrottle events on the tick */ if (interrupts == MAX_INTERRUPTS) { - perf_log_throttle(counter, 1); - counter->pmu->unthrottle(counter); - interrupts = 2*sysctl_perf_counter_sample_rate/HZ; + perf_log_throttle(event, 1); + event->pmu->unthrottle(event); + interrupts = 2*sysctl_perf_event_sample_rate/HZ; } - if (!counter->attr.freq || !counter->attr.sample_freq) + if (!event->attr.freq || !event->attr.sample_freq) continue; /* * if the specified freq < HZ then we need to skip ticks */ - if (counter->attr.sample_freq < HZ) { - freq = counter->attr.sample_freq; + if (event->attr.sample_freq < HZ) { + freq = event->attr.sample_freq; hwc->freq_count += freq; hwc->freq_interrupts += interrupts; @@ -1408,7 +1407,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) } else freq = HZ; - perf_adjust_period(counter, freq * interrupts); + perf_adjust_period(event, freq * interrupts); /* * In order to avoid being stalled by an (accidental) huge @@ -1417,9 +1416,9 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) */ if (!interrupts) { perf_disable(); - counter->pmu->disable(counter); + event->pmu->disable(event); atomic64_set(&hwc->period_left, 0); - counter->pmu->enable(counter); + event->pmu->enable(event); perf_enable(); } } @@ -1427,22 +1426,22 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) } /* - * Round-robin a context's counters: + * Round-robin a context's events: */ -static void rotate_ctx(struct perf_counter_context *ctx) +static void rotate_ctx(struct perf_event_context *ctx) { - struct perf_counter *counter; + struct perf_event *event; - if (!ctx->nr_counters) + if (!ctx->nr_events) return; spin_lock(&ctx->lock); /* - * Rotate the first entry last (works just fine for group counters too): + * Rotate the first entry last (works just fine for group events too): */ perf_disable(); - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - list_move_tail(&counter->list_entry, &ctx->counter_list); + list_for_each_entry(event, &ctx->group_list, group_entry) { + list_move_tail(&event->group_entry, &ctx->group_list); break; } perf_enable(); @@ -1450,93 +1449,93 @@ static void rotate_ctx(struct perf_counter_context *ctx) spin_unlock(&ctx->lock); } -void perf_counter_task_tick(struct task_struct *curr, int cpu) +void perf_event_task_tick(struct task_struct *curr, int cpu) { struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; + struct perf_event_context *ctx; - if (!atomic_read(&nr_counters)) + if (!atomic_read(&nr_events)) return; cpuctx = &per_cpu(perf_cpu_context, cpu); - ctx = curr->perf_counter_ctxp; + ctx = curr->perf_event_ctxp; perf_ctx_adjust_freq(&cpuctx->ctx); if (ctx) perf_ctx_adjust_freq(ctx); - perf_counter_cpu_sched_out(cpuctx); + perf_event_cpu_sched_out(cpuctx); if (ctx) - __perf_counter_task_sched_out(ctx); + __perf_event_task_sched_out(ctx); rotate_ctx(&cpuctx->ctx); if (ctx) rotate_ctx(ctx); - perf_counter_cpu_sched_in(cpuctx, cpu); + perf_event_cpu_sched_in(cpuctx, cpu); if (ctx) - perf_counter_task_sched_in(curr, cpu); + perf_event_task_sched_in(curr, cpu); } /* - * Enable all of a task's counters that have been marked enable-on-exec. + * Enable all of a task's events that have been marked enable-on-exec. * This expects task == current. */ -static void perf_counter_enable_on_exec(struct task_struct *task) +static void perf_event_enable_on_exec(struct task_struct *task) { - struct perf_counter_context *ctx; - struct perf_counter *counter; + struct perf_event_context *ctx; + struct perf_event *event; unsigned long flags; int enabled = 0; local_irq_save(flags); - ctx = task->perf_counter_ctxp; - if (!ctx || !ctx->nr_counters) + ctx = task->perf_event_ctxp; + if (!ctx || !ctx->nr_events) goto out; - __perf_counter_task_sched_out(ctx); + __perf_event_task_sched_out(ctx); spin_lock(&ctx->lock); - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (!counter->attr.enable_on_exec) + list_for_each_entry(event, &ctx->group_list, group_entry) { + if (!event->attr.enable_on_exec) continue; - counter->attr.enable_on_exec = 0; - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) + event->attr.enable_on_exec = 0; + if (event->state >= PERF_EVENT_STATE_INACTIVE) continue; - __perf_counter_mark_enabled(counter, ctx); + __perf_event_mark_enabled(event, ctx); enabled = 1; } /* - * Unclone this context if we enabled any counter. + * Unclone this context if we enabled any event. */ if (enabled) unclone_ctx(ctx); spin_unlock(&ctx->lock); - perf_counter_task_sched_in(task, smp_processor_id()); + perf_event_task_sched_in(task, smp_processor_id()); out: local_irq_restore(flags); } /* - * Cross CPU call to read the hardware counter + * Cross CPU call to read the hardware event */ -static void __perf_counter_read(void *info) +static void __perf_event_read(void *info) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; + struct perf_event *event = info; + struct perf_event_context *ctx = event->ctx; unsigned long flags; /* * If this is a task context, we need to check whether it is * the current task context of this cpu. If not it has been * scheduled out before the smp call arrived. In that case - * counter->count would have been updated to a recent sample - * when the counter was scheduled out. + * event->count would have been updated to a recent sample + * when the event was scheduled out. */ if (ctx->task && cpuctx->task_ctx != ctx) return; @@ -1544,56 +1543,56 @@ static void __perf_counter_read(void *info) local_irq_save(flags); if (ctx->is_active) update_context_time(ctx); - counter->pmu->read(counter); - update_counter_times(counter); + event->pmu->read(event); + update_event_times(event); local_irq_restore(flags); } -static u64 perf_counter_read(struct perf_counter *counter) +static u64 perf_event_read(struct perf_event *event) { /* - * If counter is enabled and currently active on a CPU, update the - * value in the counter structure: + * If event is enabled and currently active on a CPU, update the + * value in the event structure: */ - if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - smp_call_function_single(counter->oncpu, - __perf_counter_read, counter, 1); - } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_counter_times(counter); + if (event->state == PERF_EVENT_STATE_ACTIVE) { + smp_call_function_single(event->oncpu, + __perf_event_read, event, 1); + } else if (event->state == PERF_EVENT_STATE_INACTIVE) { + update_event_times(event); } - return atomic64_read(&counter->count); + return atomic64_read(&event->count); } /* - * Initialize the perf_counter context in a task_struct: + * Initialize the perf_event context in a task_struct: */ static void -__perf_counter_init_context(struct perf_counter_context *ctx, +__perf_event_init_context(struct perf_event_context *ctx, struct task_struct *task) { memset(ctx, 0, sizeof(*ctx)); spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->counter_list); + INIT_LIST_HEAD(&ctx->group_list); INIT_LIST_HEAD(&ctx->event_list); atomic_set(&ctx->refcount, 1); ctx->task = task; } -static struct perf_counter_context *find_get_context(pid_t pid, int cpu) +static struct perf_event_context *find_get_context(pid_t pid, int cpu) { - struct perf_counter_context *ctx; + struct perf_event_context *ctx; struct perf_cpu_context *cpuctx; struct task_struct *task; unsigned long flags; int err; /* - * If cpu is not a wildcard then this is a percpu counter: + * If cpu is not a wildcard then this is a percpu event: */ if (cpu != -1) { - /* Must be root to operate on a CPU counter: */ + /* Must be root to operate on a CPU event: */ if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EACCES); @@ -1601,7 +1600,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) return ERR_PTR(-EINVAL); /* - * We could be clever and allow to attach a counter to an + * We could be clever and allow to attach a event to an * offline CPU and activate it when the CPU comes up, but * that's for later. */ @@ -1628,7 +1627,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) return ERR_PTR(-ESRCH); /* - * Can't attach counters to a dying task. + * Can't attach events to a dying task. */ err = -ESRCH; if (task->flags & PF_EXITING) @@ -1647,13 +1646,13 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) } if (!ctx) { - ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); + ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); err = -ENOMEM; if (!ctx) goto errout; - __perf_counter_init_context(ctx, task); + __perf_event_init_context(ctx, task); get_ctx(ctx); - if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) { + if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { /* * We raced with some other task; use * the context they set. @@ -1672,42 +1671,42 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) return ERR_PTR(err); } -static void free_counter_rcu(struct rcu_head *head) +static void free_event_rcu(struct rcu_head *head) { - struct perf_counter *counter; + struct perf_event *event; - counter = container_of(head, struct perf_counter, rcu_head); - if (counter->ns) - put_pid_ns(counter->ns); - kfree(counter); + event = container_of(head, struct perf_event, rcu_head); + if (event->ns) + put_pid_ns(event->ns); + kfree(event); } -static void perf_pending_sync(struct perf_counter *counter); +static void perf_pending_sync(struct perf_event *event); -static void free_counter(struct perf_counter *counter) +static void free_event(struct perf_event *event) { - perf_pending_sync(counter); + perf_pending_sync(event); - if (!counter->parent) { - atomic_dec(&nr_counters); - if (counter->attr.mmap) - atomic_dec(&nr_mmap_counters); - if (counter->attr.comm) - atomic_dec(&nr_comm_counters); - if (counter->attr.task) - atomic_dec(&nr_task_counters); + if (!event->parent) { + atomic_dec(&nr_events); + if (event->attr.mmap) + atomic_dec(&nr_mmap_events); + if (event->attr.comm) + atomic_dec(&nr_comm_events); + if (event->attr.task) + atomic_dec(&nr_task_events); } - if (counter->output) { - fput(counter->output->filp); - counter->output = NULL; + if (event->output) { + fput(event->output->filp); + event->output = NULL; } - if (counter->destroy) - counter->destroy(counter); + if (event->destroy) + event->destroy(event); - put_ctx(counter->ctx); - call_rcu(&counter->rcu_head, free_counter_rcu); + put_ctx(event->ctx); + call_rcu(&event->rcu_head, free_event_rcu); } /* @@ -1715,43 +1714,43 @@ static void free_counter(struct perf_counter *counter) */ static int perf_release(struct inode *inode, struct file *file) { - struct perf_counter *counter = file->private_data; - struct perf_counter_context *ctx = counter->ctx; + struct perf_event *event = file->private_data; + struct perf_event_context *ctx = event->ctx; file->private_data = NULL; WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); - perf_counter_remove_from_context(counter); + perf_event_remove_from_context(event); mutex_unlock(&ctx->mutex); - mutex_lock(&counter->owner->perf_counter_mutex); - list_del_init(&counter->owner_entry); - mutex_unlock(&counter->owner->perf_counter_mutex); - put_task_struct(counter->owner); + mutex_lock(&event->owner->perf_event_mutex); + list_del_init(&event->owner_entry); + mutex_unlock(&event->owner->perf_event_mutex); + put_task_struct(event->owner); - free_counter(counter); + free_event(event); return 0; } -static int perf_counter_read_size(struct perf_counter *counter) +static int perf_event_read_size(struct perf_event *event) { int entry = sizeof(u64); /* value */ int size = 0; int nr = 1; - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) size += sizeof(u64); - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) size += sizeof(u64); - if (counter->attr.read_format & PERF_FORMAT_ID) + if (event->attr.read_format & PERF_FORMAT_ID) entry += sizeof(u64); - if (counter->attr.read_format & PERF_FORMAT_GROUP) { - nr += counter->group_leader->nr_siblings; + if (event->attr.read_format & PERF_FORMAT_GROUP) { + nr += event->group_leader->nr_siblings; size += sizeof(u64); } @@ -1760,27 +1759,27 @@ static int perf_counter_read_size(struct perf_counter *counter) return size; } -static u64 perf_counter_read_value(struct perf_counter *counter) +static u64 perf_event_read_value(struct perf_event *event) { - struct perf_counter *child; + struct perf_event *child; u64 total = 0; - total += perf_counter_read(counter); - list_for_each_entry(child, &counter->child_list, child_list) - total += perf_counter_read(child); + total += perf_event_read(event); + list_for_each_entry(child, &event->child_list, child_list) + total += perf_event_read(child); return total; } -static int perf_counter_read_entry(struct perf_counter *counter, +static int perf_event_read_entry(struct perf_event *event, u64 read_format, char __user *buf) { int n = 0, count = 0; u64 values[2]; - values[n++] = perf_counter_read_value(counter); + values[n++] = perf_event_read_value(event); if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(counter); + values[n++] = primary_event_id(event); count = n * sizeof(u64); @@ -1790,10 +1789,10 @@ static int perf_counter_read_entry(struct perf_counter *counter, return count; } -static int perf_counter_read_group(struct perf_counter *counter, +static int perf_event_read_group(struct perf_event *event, u64 read_format, char __user *buf) { - struct perf_counter *leader = counter->group_leader, *sub; + struct perf_event *leader = event->group_leader, *sub; int n = 0, size = 0, err = -EFAULT; u64 values[3]; @@ -1812,14 +1811,14 @@ static int perf_counter_read_group(struct perf_counter *counter, if (copy_to_user(buf, values, size)) return -EFAULT; - err = perf_counter_read_entry(leader, read_format, buf + size); + err = perf_event_read_entry(leader, read_format, buf + size); if (err < 0) return err; size += err; - list_for_each_entry(sub, &leader->sibling_list, list_entry) { - err = perf_counter_read_entry(sub, read_format, + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + err = perf_event_read_entry(sub, read_format, buf + size); if (err < 0) return err; @@ -1830,23 +1829,23 @@ static int perf_counter_read_group(struct perf_counter *counter, return size; } -static int perf_counter_read_one(struct perf_counter *counter, +static int perf_event_read_one(struct perf_event *event, u64 read_format, char __user *buf) { u64 values[4]; int n = 0; - values[n++] = perf_counter_read_value(counter); + values[n++] = perf_event_read_value(event); if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = counter->total_time_enabled + - atomic64_read(&counter->child_total_time_enabled); + values[n++] = event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); } if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = counter->total_time_running + - atomic64_read(&counter->child_total_time_running); + values[n++] = event->total_time_running + + atomic64_read(&event->child_total_time_running); } if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(counter); + values[n++] = primary_event_id(event); if (copy_to_user(buf, values, n * sizeof(u64))) return -EFAULT; @@ -1855,32 +1854,32 @@ static int perf_counter_read_one(struct perf_counter *counter, } /* - * Read the performance counter - simple non blocking version for now + * Read the performance event - simple non blocking version for now */ static ssize_t -perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) +perf_read_hw(struct perf_event *event, char __user *buf, size_t count) { - u64 read_format = counter->attr.read_format; + u64 read_format = event->attr.read_format; int ret; /* - * Return end-of-file for a read on a counter that is in + * Return end-of-file for a read on a event that is in * error state (i.e. because it was pinned but it couldn't be * scheduled on to the CPU at some point). */ - if (counter->state == PERF_COUNTER_STATE_ERROR) + if (event->state == PERF_EVENT_STATE_ERROR) return 0; - if (count < perf_counter_read_size(counter)) + if (count < perf_event_read_size(event)) return -ENOSPC; - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->child_mutex); + WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->child_mutex); if (read_format & PERF_FORMAT_GROUP) - ret = perf_counter_read_group(counter, read_format, buf); + ret = perf_event_read_group(event, read_format, buf); else - ret = perf_counter_read_one(counter, read_format, buf); - mutex_unlock(&counter->child_mutex); + ret = perf_event_read_one(event, read_format, buf); + mutex_unlock(&event->child_mutex); return ret; } @@ -1888,79 +1887,79 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) static ssize_t perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct perf_counter *counter = file->private_data; + struct perf_event *event = file->private_data; - return perf_read_hw(counter, buf, count); + return perf_read_hw(event, buf, count); } static unsigned int perf_poll(struct file *file, poll_table *wait) { - struct perf_counter *counter = file->private_data; + struct perf_event *event = file->private_data; struct perf_mmap_data *data; unsigned int events = POLL_HUP; rcu_read_lock(); - data = rcu_dereference(counter->data); + data = rcu_dereference(event->data); if (data) events = atomic_xchg(&data->poll, 0); rcu_read_unlock(); - poll_wait(file, &counter->waitq, wait); + poll_wait(file, &event->waitq, wait); return events; } -static void perf_counter_reset(struct perf_counter *counter) +static void perf_event_reset(struct perf_event *event) { - (void)perf_counter_read(counter); - atomic64_set(&counter->count, 0); - perf_counter_update_userpage(counter); + (void)perf_event_read(event); + atomic64_set(&event->count, 0); + perf_event_update_userpage(event); } /* - * Holding the top-level counter's child_mutex means that any - * descendant process that has inherited this counter will block - * in sync_child_counter if it goes to exit, thus satisfying the - * task existence requirements of perf_counter_enable/disable. + * Holding the top-level event's child_mutex means that any + * descendant process that has inherited this event will block + * in sync_child_event if it goes to exit, thus satisfying the + * task existence requirements of perf_event_enable/disable. */ -static void perf_counter_for_each_child(struct perf_counter *counter, - void (*func)(struct perf_counter *)) +static void perf_event_for_each_child(struct perf_event *event, + void (*func)(struct perf_event *)) { - struct perf_counter *child; + struct perf_event *child; - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->child_mutex); - func(counter); - list_for_each_entry(child, &counter->child_list, child_list) + WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->child_mutex); + func(event); + list_for_each_entry(child, &event->child_list, child_list) func(child); - mutex_unlock(&counter->child_mutex); + mutex_unlock(&event->child_mutex); } -static void perf_counter_for_each(struct perf_counter *counter, - void (*func)(struct perf_counter *)) +static void perf_event_for_each(struct perf_event *event, + void (*func)(struct perf_event *)) { - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *sibling; + struct perf_event_context *ctx = event->ctx; + struct perf_event *sibling; WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); - counter = counter->group_leader; + event = event->group_leader; - perf_counter_for_each_child(counter, func); - func(counter); - list_for_each_entry(sibling, &counter->sibling_list, list_entry) - perf_counter_for_each_child(counter, func); + perf_event_for_each_child(event, func); + func(event); + list_for_each_entry(sibling, &event->sibling_list, group_entry) + perf_event_for_each_child(event, func); mutex_unlock(&ctx->mutex); } -static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) +static int perf_event_period(struct perf_event *event, u64 __user *arg) { - struct perf_counter_context *ctx = counter->ctx; + struct perf_event_context *ctx = event->ctx; unsigned long size; int ret = 0; u64 value; - if (!counter->attr.sample_period) + if (!event->attr.sample_period) return -EINVAL; size = copy_from_user(&value, arg, sizeof(value)); @@ -1971,16 +1970,16 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) return -EINVAL; spin_lock_irq(&ctx->lock); - if (counter->attr.freq) { - if (value > sysctl_perf_counter_sample_rate) { + if (event->attr.freq) { + if (value > sysctl_perf_event_sample_rate) { ret = -EINVAL; goto unlock; } - counter->attr.sample_freq = value; + event->attr.sample_freq = value; } else { - counter->attr.sample_period = value; - counter->hw.sample_period = value; + event->attr.sample_period = value; + event->hw.sample_period = value; } unlock: spin_unlock_irq(&ctx->lock); @@ -1988,80 +1987,80 @@ unlock: return ret; } -int perf_counter_set_output(struct perf_counter *counter, int output_fd); +int perf_event_set_output(struct perf_event *event, int output_fd); static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct perf_counter *counter = file->private_data; - void (*func)(struct perf_counter *); + struct perf_event *event = file->private_data; + void (*func)(struct perf_event *); u32 flags = arg; switch (cmd) { - case PERF_COUNTER_IOC_ENABLE: - func = perf_counter_enable; + case PERF_EVENT_IOC_ENABLE: + func = perf_event_enable; break; - case PERF_COUNTER_IOC_DISABLE: - func = perf_counter_disable; + case PERF_EVENT_IOC_DISABLE: + func = perf_event_disable; break; - case PERF_COUNTER_IOC_RESET: - func = perf_counter_reset; + case PERF_EVENT_IOC_RESET: + func = perf_event_reset; break; - case PERF_COUNTER_IOC_REFRESH: - return perf_counter_refresh(counter, arg); + case PERF_EVENT_IOC_REFRESH: + return perf_event_refresh(event, arg); - case PERF_COUNTER_IOC_PERIOD: - return perf_counter_period(counter, (u64 __user *)arg); + case PERF_EVENT_IOC_PERIOD: + return perf_event_period(event, (u64 __user *)arg); - case PERF_COUNTER_IOC_SET_OUTPUT: - return perf_counter_set_output(counter, arg); + case PERF_EVENT_IOC_SET_OUTPUT: + return perf_event_set_output(event, arg); default: return -ENOTTY; } if (flags & PERF_IOC_FLAG_GROUP) - perf_counter_for_each(counter, func); + perf_event_for_each(event, func); else - perf_counter_for_each_child(counter, func); + perf_event_for_each_child(event, func); return 0; } -int perf_counter_task_enable(void) +int perf_event_task_enable(void) { - struct perf_counter *counter; + struct perf_event *event; - mutex_lock(¤t->perf_counter_mutex); - list_for_each_entry(counter, ¤t->perf_counter_list, owner_entry) - perf_counter_for_each_child(counter, perf_counter_enable); - mutex_unlock(¤t->perf_counter_mutex); + mutex_lock(¤t->perf_event_mutex); + list_for_each_entry(event, ¤t->perf_event_list, owner_entry) + perf_event_for_each_child(event, perf_event_enable); + mutex_unlock(¤t->perf_event_mutex); return 0; } -int perf_counter_task_disable(void) +int perf_event_task_disable(void) { - struct perf_counter *counter; + struct perf_event *event; - mutex_lock(¤t->perf_counter_mutex); - list_for_each_entry(counter, ¤t->perf_counter_list, owner_entry) - perf_counter_for_each_child(counter, perf_counter_disable); - mutex_unlock(¤t->perf_counter_mutex); + mutex_lock(¤t->perf_event_mutex); + list_for_each_entry(event, ¤t->perf_event_list, owner_entry) + perf_event_for_each_child(event, perf_event_disable); + mutex_unlock(¤t->perf_event_mutex); return 0; } -#ifndef PERF_COUNTER_INDEX_OFFSET -# define PERF_COUNTER_INDEX_OFFSET 0 +#ifndef PERF_EVENT_INDEX_OFFSET +# define PERF_EVENT_INDEX_OFFSET 0 #endif -static int perf_counter_index(struct perf_counter *counter) +static int perf_event_index(struct perf_event *event) { - if (counter->state != PERF_COUNTER_STATE_ACTIVE) + if (event->state != PERF_EVENT_STATE_ACTIVE) return 0; - return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET; + return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; } /* @@ -2069,13 +2068,13 @@ static int perf_counter_index(struct perf_counter *counter) * the seqlock logic goes bad. We can not serialize this because the arch * code calls this from NMI context. */ -void perf_counter_update_userpage(struct perf_counter *counter) +void perf_event_update_userpage(struct perf_event *event) { - struct perf_counter_mmap_page *userpg; + struct perf_event_mmap_page *userpg; struct perf_mmap_data *data; rcu_read_lock(); - data = rcu_dereference(counter->data); + data = rcu_dereference(event->data); if (!data) goto unlock; @@ -2088,16 +2087,16 @@ void perf_counter_update_userpage(struct perf_counter *counter) preempt_disable(); ++userpg->lock; barrier(); - userpg->index = perf_counter_index(counter); - userpg->offset = atomic64_read(&counter->count); - if (counter->state == PERF_COUNTER_STATE_ACTIVE) - userpg->offset -= atomic64_read(&counter->hw.prev_count); + userpg->index = perf_event_index(event); + userpg->offset = atomic64_read(&event->count); + if (event->state == PERF_EVENT_STATE_ACTIVE) + userpg->offset -= atomic64_read(&event->hw.prev_count); - userpg->time_enabled = counter->total_time_enabled + - atomic64_read(&counter->child_total_time_enabled); + userpg->time_enabled = event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); - userpg->time_running = counter->total_time_running + - atomic64_read(&counter->child_total_time_running); + userpg->time_running = event->total_time_running + + atomic64_read(&event->child_total_time_running); barrier(); ++userpg->lock; @@ -2108,7 +2107,7 @@ unlock: static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - struct perf_counter *counter = vma->vm_file->private_data; + struct perf_event *event = vma->vm_file->private_data; struct perf_mmap_data *data; int ret = VM_FAULT_SIGBUS; @@ -2119,7 +2118,7 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } rcu_read_lock(); - data = rcu_dereference(counter->data); + data = rcu_dereference(event->data); if (!data) goto unlock; @@ -2148,13 +2147,13 @@ unlock: return ret; } -static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) +static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages) { struct perf_mmap_data *data; unsigned long size; int i; - WARN_ON(atomic_read(&counter->mmap_count)); + WARN_ON(atomic_read(&event->mmap_count)); size = sizeof(struct perf_mmap_data); size += nr_pages * sizeof(void *); @@ -2176,14 +2175,14 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) data->nr_pages = nr_pages; atomic_set(&data->lock, -1); - if (counter->attr.watermark) { + if (event->attr.watermark) { data->watermark = min_t(long, PAGE_SIZE * nr_pages, - counter->attr.wakeup_watermark); + event->attr.wakeup_watermark); } if (!data->watermark) data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4); - rcu_assign_pointer(counter->data, data); + rcu_assign_pointer(event->data, data); return 0; @@ -2222,35 +2221,35 @@ static void __perf_mmap_data_free(struct rcu_head *rcu_head) kfree(data); } -static void perf_mmap_data_free(struct perf_counter *counter) +static void perf_mmap_data_free(struct perf_event *event) { - struct perf_mmap_data *data = counter->data; + struct perf_mmap_data *data = event->data; - WARN_ON(atomic_read(&counter->mmap_count)); + WARN_ON(atomic_read(&event->mmap_count)); - rcu_assign_pointer(counter->data, NULL); + rcu_assign_pointer(event->data, NULL); call_rcu(&data->rcu_head, __perf_mmap_data_free); } static void perf_mmap_open(struct vm_area_struct *vma) { - struct perf_counter *counter = vma->vm_file->private_data; + struct perf_event *event = vma->vm_file->private_data; - atomic_inc(&counter->mmap_count); + atomic_inc(&event->mmap_count); } static void perf_mmap_close(struct vm_area_struct *vma) { - struct perf_counter *counter = vma->vm_file->private_data; + struct perf_event *event = vma->vm_file->private_data; - WARN_ON_ONCE(counter->ctx->parent_ctx); - if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) { + WARN_ON_ONCE(event->ctx->parent_ctx); + if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { struct user_struct *user = current_user(); - atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm); - vma->vm_mm->locked_vm -= counter->data->nr_locked; - perf_mmap_data_free(counter); - mutex_unlock(&counter->mmap_mutex); + atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm); + vma->vm_mm->locked_vm -= event->data->nr_locked; + perf_mmap_data_free(event); + mutex_unlock(&event->mmap_mutex); } } @@ -2263,7 +2262,7 @@ static struct vm_operations_struct perf_mmap_vmops = { static int perf_mmap(struct file *file, struct vm_area_struct *vma) { - struct perf_counter *counter = file->private_data; + struct perf_event *event = file->private_data; unsigned long user_locked, user_lock_limit; struct user_struct *user = current_user(); unsigned long locked, lock_limit; @@ -2291,21 +2290,21 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (vma->vm_pgoff != 0) return -EINVAL; - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->mmap_mutex); - if (counter->output) { + WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->mmap_mutex); + if (event->output) { ret = -EINVAL; goto unlock; } - if (atomic_inc_not_zero(&counter->mmap_count)) { - if (nr_pages != counter->data->nr_pages) + if (atomic_inc_not_zero(&event->mmap_count)) { + if (nr_pages != event->data->nr_pages) ret = -EINVAL; goto unlock; } user_extra = nr_pages + 1; - user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10); + user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); /* * Increase the limit linearly with more CPUs: @@ -2328,20 +2327,20 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) goto unlock; } - WARN_ON(counter->data); - ret = perf_mmap_data_alloc(counter, nr_pages); + WARN_ON(event->data); + ret = perf_mmap_data_alloc(event, nr_pages); if (ret) goto unlock; - atomic_set(&counter->mmap_count, 1); + atomic_set(&event->mmap_count, 1); atomic_long_add(user_extra, &user->locked_vm); vma->vm_mm->locked_vm += extra; - counter->data->nr_locked = extra; + event->data->nr_locked = extra; if (vma->vm_flags & VM_WRITE) - counter->data->writable = 1; + event->data->writable = 1; unlock: - mutex_unlock(&counter->mmap_mutex); + mutex_unlock(&event->mmap_mutex); vma->vm_flags |= VM_RESERVED; vma->vm_ops = &perf_mmap_vmops; @@ -2352,11 +2351,11 @@ unlock: static int perf_fasync(int fd, struct file *filp, int on) { struct inode *inode = filp->f_path.dentry->d_inode; - struct perf_counter *counter = filp->private_data; + struct perf_event *event = filp->private_data; int retval; mutex_lock(&inode->i_mutex); - retval = fasync_helper(fd, filp, on, &counter->fasync); + retval = fasync_helper(fd, filp, on, &event->fasync); mutex_unlock(&inode->i_mutex); if (retval < 0) @@ -2376,19 +2375,19 @@ static const struct file_operations perf_fops = { }; /* - * Perf counter wakeup + * Perf event wakeup * * If there's data, ensure we set the poll() state and publish everything * to user-space before waking everybody up. */ -void perf_counter_wakeup(struct perf_counter *counter) +void perf_event_wakeup(struct perf_event *event) { - wake_up_all(&counter->waitq); + wake_up_all(&event->waitq); - if (counter->pending_kill) { - kill_fasync(&counter->fasync, SIGIO, counter->pending_kill); - counter->pending_kill = 0; + if (event->pending_kill) { + kill_fasync(&event->fasync, SIGIO, event->pending_kill); + event->pending_kill = 0; } } @@ -2401,19 +2400,19 @@ void perf_counter_wakeup(struct perf_counter *counter) * single linked list and use cmpxchg() to add entries lockless. */ -static void perf_pending_counter(struct perf_pending_entry *entry) +static void perf_pending_event(struct perf_pending_entry *entry) { - struct perf_counter *counter = container_of(entry, - struct perf_counter, pending); + struct perf_event *event = container_of(entry, + struct perf_event, pending); - if (counter->pending_disable) { - counter->pending_disable = 0; - __perf_counter_disable(counter); + if (event->pending_disable) { + event->pending_disable = 0; + __perf_event_disable(event); } - if (counter->pending_wakeup) { - counter->pending_wakeup = 0; - perf_counter_wakeup(counter); + if (event->pending_wakeup) { + event->pending_wakeup = 0; + perf_event_wakeup(event); } } @@ -2439,7 +2438,7 @@ static void perf_pending_queue(struct perf_pending_entry *entry, entry->next = *head; } while (cmpxchg(head, entry->next, entry) != entry->next); - set_perf_counter_pending(); + set_perf_event_pending(); put_cpu_var(perf_pending_head); } @@ -2472,7 +2471,7 @@ static int __perf_pending_run(void) return nr; } -static inline int perf_not_pending(struct perf_counter *counter) +static inline int perf_not_pending(struct perf_event *event) { /* * If we flush on whatever cpu we run, there is a chance we don't @@ -2487,15 +2486,15 @@ static inline int perf_not_pending(struct perf_counter *counter) * so that we do not miss the wakeup. -- see perf_pending_handle() */ smp_rmb(); - return counter->pending.next == NULL; + return event->pending.next == NULL; } -static void perf_pending_sync(struct perf_counter *counter) +static void perf_pending_sync(struct perf_event *event) { - wait_event(counter->waitq, perf_not_pending(counter)); + wait_event(event->waitq, perf_not_pending(event)); } -void perf_counter_do_pending(void) +void perf_event_do_pending(void) { __perf_pending_run(); } @@ -2536,25 +2535,25 @@ static void perf_output_wakeup(struct perf_output_handle *handle) atomic_set(&handle->data->poll, POLL_IN); if (handle->nmi) { - handle->counter->pending_wakeup = 1; - perf_pending_queue(&handle->counter->pending, - perf_pending_counter); + handle->event->pending_wakeup = 1; + perf_pending_queue(&handle->event->pending, + perf_pending_event); } else - perf_counter_wakeup(handle->counter); + perf_event_wakeup(handle->event); } /* * Curious locking construct. * - * We need to ensure a later event doesn't publish a head when a former - * event isn't done writing. However since we need to deal with NMIs we + * We need to ensure a later event_id doesn't publish a head when a former + * event_id isn't done writing. However since we need to deal with NMIs we * cannot fully serialize things. * * What we do is serialize between CPUs so we only have to deal with NMI * nesting on a single CPU. * * We only publish the head (and generate a wakeup) when the outer-most - * event completes. + * event_id completes. */ static void perf_output_lock(struct perf_output_handle *handle) { @@ -2658,10 +2657,10 @@ void perf_output_copy(struct perf_output_handle *handle, } int perf_output_begin(struct perf_output_handle *handle, - struct perf_counter *counter, unsigned int size, + struct perf_event *event, unsigned int size, int nmi, int sample) { - struct perf_counter *output_counter; + struct perf_event *output_event; struct perf_mmap_data *data; unsigned long tail, offset, head; int have_lost; @@ -2673,21 +2672,21 @@ int perf_output_begin(struct perf_output_handle *handle, rcu_read_lock(); /* - * For inherited counters we send all the output towards the parent. + * For inherited events we send all the output towards the parent. */ - if (counter->parent) - counter = counter->parent; + if (event->parent) + event = event->parent; - output_counter = rcu_dereference(counter->output); - if (output_counter) - counter = output_counter; + output_event = rcu_dereference(event->output); + if (output_event) + event = output_event; - data = rcu_dereference(counter->data); + data = rcu_dereference(event->data); if (!data) goto out; handle->data = data; - handle->counter = counter; + handle->event = event; handle->nmi = nmi; handle->sample = sample; @@ -2721,10 +2720,10 @@ int perf_output_begin(struct perf_output_handle *handle, atomic_set(&data->wakeup, 1); if (have_lost) { - lost_event.header.type = PERF_EVENT_LOST; + lost_event.header.type = PERF_RECORD_LOST; lost_event.header.misc = 0; lost_event.header.size = sizeof(lost_event); - lost_event.id = counter->id; + lost_event.id = event->id; lost_event.lost = atomic_xchg(&data->lost, 0); perf_output_put(handle, lost_event); @@ -2743,10 +2742,10 @@ out: void perf_output_end(struct perf_output_handle *handle) { - struct perf_counter *counter = handle->counter; + struct perf_event *event = handle->event; struct perf_mmap_data *data = handle->data; - int wakeup_events = counter->attr.wakeup_events; + int wakeup_events = event->attr.wakeup_events; if (handle->sample && wakeup_events) { int events = atomic_inc_return(&data->events); @@ -2760,58 +2759,58 @@ void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } -static u32 perf_counter_pid(struct perf_counter *counter, struct task_struct *p) +static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) { /* - * only top level counters have the pid namespace they were created in + * only top level events have the pid namespace they were created in */ - if (counter->parent) - counter = counter->parent; + if (event->parent) + event = event->parent; - return task_tgid_nr_ns(p, counter->ns); + return task_tgid_nr_ns(p, event->ns); } -static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) +static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) { /* - * only top level counters have the pid namespace they were created in + * only top level events have the pid namespace they were created in */ - if (counter->parent) - counter = counter->parent; + if (event->parent) + event = event->parent; - return task_pid_nr_ns(p, counter->ns); + return task_pid_nr_ns(p, event->ns); } static void perf_output_read_one(struct perf_output_handle *handle, - struct perf_counter *counter) + struct perf_event *event) { - u64 read_format = counter->attr.read_format; + u64 read_format = event->attr.read_format; u64 values[4]; int n = 0; - values[n++] = atomic64_read(&counter->count); + values[n++] = atomic64_read(&event->count); if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = counter->total_time_enabled + - atomic64_read(&counter->child_total_time_enabled); + values[n++] = event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); } if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = counter->total_time_running + - atomic64_read(&counter->child_total_time_running); + values[n++] = event->total_time_running + + atomic64_read(&event->child_total_time_running); } if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(counter); + values[n++] = primary_event_id(event); perf_output_copy(handle, values, n * sizeof(u64)); } /* - * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult. + * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. */ static void perf_output_read_group(struct perf_output_handle *handle, - struct perf_counter *counter) + struct perf_event *event) { - struct perf_counter *leader = counter->group_leader, *sub; - u64 read_format = counter->attr.read_format; + struct perf_event *leader = event->group_leader, *sub; + u64 read_format = event->attr.read_format; u64 values[5]; int n = 0; @@ -2823,42 +2822,42 @@ static void perf_output_read_group(struct perf_output_handle *handle, if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) values[n++] = leader->total_time_running; - if (leader != counter) + if (leader != event) leader->pmu->read(leader); values[n++] = atomic64_read(&leader->count); if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(leader); + values[n++] = primary_event_id(leader); perf_output_copy(handle, values, n * sizeof(u64)); - list_for_each_entry(sub, &leader->sibling_list, list_entry) { + list_for_each_entry(sub, &leader->sibling_list, group_entry) { n = 0; - if (sub != counter) + if (sub != event) sub->pmu->read(sub); values[n++] = atomic64_read(&sub->count); if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(sub); + values[n++] = primary_event_id(sub); perf_output_copy(handle, values, n * sizeof(u64)); } } static void perf_output_read(struct perf_output_handle *handle, - struct perf_counter *counter) + struct perf_event *event) { - if (counter->attr.read_format & PERF_FORMAT_GROUP) - perf_output_read_group(handle, counter); + if (event->attr.read_format & PERF_FORMAT_GROUP) + perf_output_read_group(handle, event); else - perf_output_read_one(handle, counter); + perf_output_read_one(handle, event); } void perf_output_sample(struct perf_output_handle *handle, struct perf_event_header *header, struct perf_sample_data *data, - struct perf_counter *counter) + struct perf_event *event) { u64 sample_type = data->type; @@ -2889,7 +2888,7 @@ void perf_output_sample(struct perf_output_handle *handle, perf_output_put(handle, data->period); if (sample_type & PERF_SAMPLE_READ) - perf_output_read(handle, counter); + perf_output_read(handle, event); if (sample_type & PERF_SAMPLE_CALLCHAIN) { if (data->callchain) { @@ -2927,14 +2926,14 @@ void perf_output_sample(struct perf_output_handle *handle, void perf_prepare_sample(struct perf_event_header *header, struct perf_sample_data *data, - struct perf_counter *counter, + struct perf_event *event, struct pt_regs *regs) { - u64 sample_type = counter->attr.sample_type; + u64 sample_type = event->attr.sample_type; data->type = sample_type; - header->type = PERF_EVENT_SAMPLE; + header->type = PERF_RECORD_SAMPLE; header->size = sizeof(*header); header->misc = 0; @@ -2948,8 +2947,8 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_TID) { /* namespace issues */ - data->tid_entry.pid = perf_counter_pid(counter, current); - data->tid_entry.tid = perf_counter_tid(counter, current); + data->tid_entry.pid = perf_event_pid(event, current); + data->tid_entry.tid = perf_event_tid(event, current); header->size += sizeof(data->tid_entry); } @@ -2964,13 +2963,13 @@ void perf_prepare_sample(struct perf_event_header *header, header->size += sizeof(data->addr); if (sample_type & PERF_SAMPLE_ID) { - data->id = primary_counter_id(counter); + data->id = primary_event_id(event); header->size += sizeof(data->id); } if (sample_type & PERF_SAMPLE_STREAM_ID) { - data->stream_id = counter->id; + data->stream_id = event->id; header->size += sizeof(data->stream_id); } @@ -2986,7 +2985,7 @@ void perf_prepare_sample(struct perf_event_header *header, header->size += sizeof(data->period); if (sample_type & PERF_SAMPLE_READ) - header->size += perf_counter_read_size(counter); + header->size += perf_event_read_size(event); if (sample_type & PERF_SAMPLE_CALLCHAIN) { int size = 1; @@ -3012,25 +3011,25 @@ void perf_prepare_sample(struct perf_event_header *header, } } -static void perf_counter_output(struct perf_counter *counter, int nmi, +static void perf_event_output(struct perf_event *event, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { struct perf_output_handle handle; struct perf_event_header header; - perf_prepare_sample(&header, data, counter, regs); + perf_prepare_sample(&header, data, event, regs); - if (perf_output_begin(&handle, counter, header.size, nmi, 1)) + if (perf_output_begin(&handle, event, header.size, nmi, 1)) return; - perf_output_sample(&handle, &header, data, counter); + perf_output_sample(&handle, &header, data, event); perf_output_end(&handle); } /* - * read event + * read event_id */ struct perf_read_event { @@ -3041,27 +3040,27 @@ struct perf_read_event { }; static void -perf_counter_read_event(struct perf_counter *counter, +perf_event_read_event(struct perf_event *event, struct task_struct *task) { struct perf_output_handle handle; - struct perf_read_event event = { + struct perf_read_event read_event = { .header = { - .type = PERF_EVENT_READ, + .type = PERF_RECORD_READ, .misc = 0, - .size = sizeof(event) + perf_counter_read_size(counter), + .size = sizeof(read_event) + perf_event_read_size(event), }, - .pid = perf_counter_pid(counter, task), - .tid = perf_counter_tid(counter, task), + .pid = perf_event_pid(event, task), + .tid = perf_event_tid(event, task), }; int ret; - ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); + ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); if (ret) return; - perf_output_put(&handle, event); - perf_output_read(&handle, counter); + perf_output_put(&handle, read_event); + perf_output_read(&handle, event); perf_output_end(&handle); } @@ -3074,7 +3073,7 @@ perf_counter_read_event(struct perf_counter *counter, struct perf_task_event { struct task_struct *task; - struct perf_counter_context *task_ctx; + struct perf_event_context *task_ctx; struct { struct perf_event_header header; @@ -3084,10 +3083,10 @@ struct perf_task_event { u32 tid; u32 ptid; u64 time; - } event; + } event_id; }; -static void perf_counter_task_output(struct perf_counter *counter, +static void perf_event_task_output(struct perf_event *event, struct perf_task_event *task_event) { struct perf_output_handle handle; @@ -3095,85 +3094,85 @@ static void perf_counter_task_output(struct perf_counter *counter, struct task_struct *task = task_event->task; int ret; - size = task_event->event.header.size; - ret = perf_output_begin(&handle, counter, size, 0, 0); + size = task_event->event_id.header.size; + ret = perf_output_begin(&handle, event, size, 0, 0); if (ret) return; - task_event->event.pid = perf_counter_pid(counter, task); - task_event->event.ppid = perf_counter_pid(counter, current); + task_event->event_id.pid = perf_event_pid(event, task); + task_event->event_id.ppid = perf_event_pid(event, current); - task_event->event.tid = perf_counter_tid(counter, task); - task_event->event.ptid = perf_counter_tid(counter, current); + task_event->event_id.tid = perf_event_tid(event, task); + task_event->event_id.ptid = perf_event_tid(event, current); - task_event->event.time = perf_clock(); + task_event->event_id.time = perf_clock(); - perf_output_put(&handle, task_event->event); + perf_output_put(&handle, task_event->event_id); perf_output_end(&handle); } -static int perf_counter_task_match(struct perf_counter *counter) +static int perf_event_task_match(struct perf_event *event) { - if (counter->attr.comm || counter->attr.mmap || counter->attr.task) + if (event->attr.comm || event->attr.mmap || event->attr.task) return 1; return 0; } -static void perf_counter_task_ctx(struct perf_counter_context *ctx, +static void perf_event_task_ctx(struct perf_event_context *ctx, struct perf_task_event *task_event) { - struct perf_counter *counter; + struct perf_event *event; if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) return; rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_task_match(counter)) - perf_counter_task_output(counter, task_event); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (perf_event_task_match(event)) + perf_event_task_output(event, task_event); } rcu_read_unlock(); } -static void perf_counter_task_event(struct perf_task_event *task_event) +static void perf_event_task_event(struct perf_task_event *task_event) { struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx = task_event->task_ctx; + struct perf_event_context *ctx = task_event->task_ctx; cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_task_ctx(&cpuctx->ctx, task_event); + perf_event_task_ctx(&cpuctx->ctx, task_event); put_cpu_var(perf_cpu_context); rcu_read_lock(); if (!ctx) - ctx = rcu_dereference(task_event->task->perf_counter_ctxp); + ctx = rcu_dereference(task_event->task->perf_event_ctxp); if (ctx) - perf_counter_task_ctx(ctx, task_event); + perf_event_task_ctx(ctx, task_event); rcu_read_unlock(); } -static void perf_counter_task(struct task_struct *task, - struct perf_counter_context *task_ctx, +static void perf_event_task(struct task_struct *task, + struct perf_event_context *task_ctx, int new) { struct perf_task_event task_event; - if (!atomic_read(&nr_comm_counters) && - !atomic_read(&nr_mmap_counters) && - !atomic_read(&nr_task_counters)) + if (!atomic_read(&nr_comm_events) && + !atomic_read(&nr_mmap_events) && + !atomic_read(&nr_task_events)) return; task_event = (struct perf_task_event){ .task = task, .task_ctx = task_ctx, - .event = { + .event_id = { .header = { - .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT, + .type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT, .misc = 0, - .size = sizeof(task_event.event), + .size = sizeof(task_event.event_id), }, /* .pid */ /* .ppid */ @@ -3182,12 +3181,12 @@ static void perf_counter_task(struct task_struct *task, }, }; - perf_counter_task_event(&task_event); + perf_event_task_event(&task_event); } -void perf_counter_fork(struct task_struct *task) +void perf_event_fork(struct task_struct *task) { - perf_counter_task(task, NULL, 1); + perf_event_task(task, NULL, 1); } /* @@ -3204,56 +3203,56 @@ struct perf_comm_event { u32 pid; u32 tid; - } event; + } event_id; }; -static void perf_counter_comm_output(struct perf_counter *counter, +static void perf_event_comm_output(struct perf_event *event, struct perf_comm_event *comm_event) { struct perf_output_handle handle; - int size = comm_event->event.header.size; - int ret = perf_output_begin(&handle, counter, size, 0, 0); + int size = comm_event->event_id.header.size; + int ret = perf_output_begin(&handle, event, size, 0, 0); if (ret) return; - comm_event->event.pid = perf_counter_pid(counter, comm_event->task); - comm_event->event.tid = perf_counter_tid(counter, comm_event->task); + comm_event->event_id.pid = perf_event_pid(event, comm_event->task); + comm_event->event_id.tid = perf_event_tid(event, comm_event->task); - perf_output_put(&handle, comm_event->event); + perf_output_put(&handle, comm_event->event_id); perf_output_copy(&handle, comm_event->comm, comm_event->comm_size); perf_output_end(&handle); } -static int perf_counter_comm_match(struct perf_counter *counter) +static int perf_event_comm_match(struct perf_event *event) { - if (counter->attr.comm) + if (event->attr.comm) return 1; return 0; } -static void perf_counter_comm_ctx(struct perf_counter_context *ctx, +static void perf_event_comm_ctx(struct perf_event_context *ctx, struct perf_comm_event *comm_event) { - struct perf_counter *counter; + struct perf_event *event; if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) return; rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_comm_match(counter)) - perf_counter_comm_output(counter, comm_event); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (perf_event_comm_match(event)) + perf_event_comm_output(event, comm_event); } rcu_read_unlock(); } -static void perf_counter_comm_event(struct perf_comm_event *comm_event) +static void perf_event_comm_event(struct perf_comm_event *comm_event) { struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; + struct perf_event_context *ctx; unsigned int size; char comm[TASK_COMM_LEN]; @@ -3264,10 +3263,10 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event) comm_event->comm = comm; comm_event->comm_size = size; - comm_event->event.header.size = sizeof(comm_event->event) + size; + comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_comm_ctx(&cpuctx->ctx, comm_event); + perf_event_comm_ctx(&cpuctx->ctx, comm_event); put_cpu_var(perf_cpu_context); rcu_read_lock(); @@ -3275,29 +3274,29 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event) * doesn't really matter which of the child contexts the * events ends up in. */ - ctx = rcu_dereference(current->perf_counter_ctxp); + ctx = rcu_dereference(current->perf_event_ctxp); if (ctx) - perf_counter_comm_ctx(ctx, comm_event); + perf_event_comm_ctx(ctx, comm_event); rcu_read_unlock(); } -void perf_counter_comm(struct task_struct *task) +void perf_event_comm(struct task_struct *task) { struct perf_comm_event comm_event; - if (task->perf_counter_ctxp) - perf_counter_enable_on_exec(task); + if (task->perf_event_ctxp) + perf_event_enable_on_exec(task); - if (!atomic_read(&nr_comm_counters)) + if (!atomic_read(&nr_comm_events)) return; comm_event = (struct perf_comm_event){ .task = task, /* .comm */ /* .comm_size */ - .event = { + .event_id = { .header = { - .type = PERF_EVENT_COMM, + .type = PERF_RECORD_COMM, .misc = 0, /* .size */ }, @@ -3306,7 +3305,7 @@ void perf_counter_comm(struct task_struct *task) }, }; - perf_counter_comm_event(&comm_event); + perf_event_comm_event(&comm_event); } /* @@ -3327,57 +3326,57 @@ struct perf_mmap_event { u64 start; u64 len; u64 pgoff; - } event; + } event_id; }; -static void perf_counter_mmap_output(struct perf_counter *counter, +static void perf_event_mmap_output(struct perf_event *event, struct perf_mmap_event *mmap_event) { struct perf_output_handle handle; - int size = mmap_event->event.header.size; - int ret = perf_output_begin(&handle, counter, size, 0, 0); + int size = mmap_event->event_id.header.size; + int ret = perf_output_begin(&handle, event, size, 0, 0); if (ret) return; - mmap_event->event.pid = perf_counter_pid(counter, current); - mmap_event->event.tid = perf_counter_tid(counter, current); + mmap_event->event_id.pid = perf_event_pid(event, current); + mmap_event->event_id.tid = perf_event_tid(event, current); - perf_output_put(&handle, mmap_event->event); + perf_output_put(&handle, mmap_event->event_id); perf_output_copy(&handle, mmap_event->file_name, mmap_event->file_size); perf_output_end(&handle); } -static int perf_counter_mmap_match(struct perf_counter *counter, +static int perf_event_mmap_match(struct perf_event *event, struct perf_mmap_event *mmap_event) { - if (counter->attr.mmap) + if (event->attr.mmap) return 1; return 0; } -static void perf_counter_mmap_ctx(struct perf_counter_context *ctx, +static void perf_event_mmap_ctx(struct perf_event_context *ctx, struct perf_mmap_event *mmap_event) { - struct perf_counter *counter; + struct perf_event *event; if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) return; rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_mmap_match(counter, mmap_event)) - perf_counter_mmap_output(counter, mmap_event); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (perf_event_mmap_match(event, mmap_event)) + perf_event_mmap_output(event, mmap_event); } rcu_read_unlock(); } -static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) +static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) { struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; + struct perf_event_context *ctx; struct vm_area_struct *vma = mmap_event->vma; struct file *file = vma->vm_file; unsigned int size; @@ -3425,10 +3424,10 @@ got_name: mmap_event->file_name = name; mmap_event->file_size = size; - mmap_event->event.header.size = sizeof(mmap_event->event) + size; + mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); + perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); put_cpu_var(perf_cpu_context); rcu_read_lock(); @@ -3436,28 +3435,28 @@ got_name: * doesn't really matter which of the child contexts the * events ends up in. */ - ctx = rcu_dereference(current->perf_counter_ctxp); + ctx = rcu_dereference(current->perf_event_ctxp); if (ctx) - perf_counter_mmap_ctx(ctx, mmap_event); + perf_event_mmap_ctx(ctx, mmap_event); rcu_read_unlock(); kfree(buf); } -void __perf_counter_mmap(struct vm_area_struct *vma) +void __perf_event_mmap(struct vm_area_struct *vma) { struct perf_mmap_event mmap_event; - if (!atomic_read(&nr_mmap_counters)) + if (!atomic_read(&nr_mmap_events)) return; mmap_event = (struct perf_mmap_event){ .vma = vma, /* .file_name */ /* .file_size */ - .event = { + .event_id = { .header = { - .type = PERF_EVENT_MMAP, + .type = PERF_RECORD_MMAP, .misc = 0, /* .size */ }, @@ -3469,14 +3468,14 @@ void __perf_counter_mmap(struct vm_area_struct *vma) }, }; - perf_counter_mmap_event(&mmap_event); + perf_event_mmap_event(&mmap_event); } /* * IRQ throttle logging */ -static void perf_log_throttle(struct perf_counter *counter, int enable) +static void perf_log_throttle(struct perf_event *event, int enable) { struct perf_output_handle handle; int ret; @@ -3488,19 +3487,19 @@ static void perf_log_throttle(struct perf_counter *counter, int enable) u64 stream_id; } throttle_event = { .header = { - .type = PERF_EVENT_THROTTLE, + .type = PERF_RECORD_THROTTLE, .misc = 0, .size = sizeof(throttle_event), }, .time = perf_clock(), - .id = primary_counter_id(counter), - .stream_id = counter->id, + .id = primary_event_id(event), + .stream_id = event->id, }; if (enable) - throttle_event.header.type = PERF_EVENT_UNTHROTTLE; + throttle_event.header.type = PERF_RECORD_UNTHROTTLE; - ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0); + ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0); if (ret) return; @@ -3509,18 +3508,18 @@ static void perf_log_throttle(struct perf_counter *counter, int enable) } /* - * Generic counter overflow handling, sampling. + * Generic event overflow handling, sampling. */ -static int __perf_counter_overflow(struct perf_counter *counter, int nmi, +static int __perf_event_overflow(struct perf_event *event, int nmi, int throttle, struct perf_sample_data *data, struct pt_regs *regs) { - int events = atomic_read(&counter->event_limit); - struct hw_perf_counter *hwc = &counter->hw; + int events = atomic_read(&event->event_limit); + struct hw_perf_event *hwc = &event->hw; int ret = 0; - throttle = (throttle && counter->pmu->unthrottle != NULL); + throttle = (throttle && event->pmu->unthrottle != NULL); if (!throttle) { hwc->interrupts++; @@ -3528,73 +3527,73 @@ static int __perf_counter_overflow(struct perf_counter *counter, int nmi, if (hwc->interrupts != MAX_INTERRUPTS) { hwc->interrupts++; if (HZ * hwc->interrupts > - (u64)sysctl_perf_counter_sample_rate) { + (u64)sysctl_perf_event_sample_rate) { hwc->interrupts = MAX_INTERRUPTS; - perf_log_throttle(counter, 0); + perf_log_throttle(event, 0); ret = 1; } } else { /* - * Keep re-disabling counters even though on the previous + * Keep re-disabling events even though on the previous * pass we disabled it - just in case we raced with a - * sched-in and the counter got enabled again: + * sched-in and the event got enabled again: */ ret = 1; } } - if (counter->attr.freq) { + if (event->attr.freq) { u64 now = perf_clock(); s64 delta = now - hwc->freq_stamp; hwc->freq_stamp = now; if (delta > 0 && delta < TICK_NSEC) - perf_adjust_period(counter, NSEC_PER_SEC / (int)delta); + perf_adjust_period(event, NSEC_PER_SEC / (int)delta); } /* * XXX event_limit might not quite work as expected on inherited - * counters + * events */ - counter->pending_kill = POLL_IN; - if (events && atomic_dec_and_test(&counter->event_limit)) { + event->pending_kill = POLL_IN; + if (events && atomic_dec_and_test(&event->event_limit)) { ret = 1; - counter->pending_kill = POLL_HUP; + event->pending_kill = POLL_HUP; if (nmi) { - counter->pending_disable = 1; - perf_pending_queue(&counter->pending, - perf_pending_counter); + event->pending_disable = 1; + perf_pending_queue(&event->pending, + perf_pending_event); } else - perf_counter_disable(counter); + perf_event_disable(event); } - perf_counter_output(counter, nmi, data, regs); + perf_event_output(event, nmi, data, regs); return ret; } -int perf_counter_overflow(struct perf_counter *counter, int nmi, +int perf_event_overflow(struct perf_event *event, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { - return __perf_counter_overflow(counter, nmi, 1, data, regs); + return __perf_event_overflow(event, nmi, 1, data, regs); } /* - * Generic software counter infrastructure + * Generic software event infrastructure */ /* - * We directly increment counter->count and keep a second value in - * counter->hw.period_left to count intervals. This period counter + * We directly increment event->count and keep a second value in + * event->hw.period_left to count intervals. This period event * is kept in the range [-sample_period, 0] so that we can use the * sign as trigger. */ -static u64 perf_swcounter_set_period(struct perf_counter *counter) +static u64 perf_swevent_set_period(struct perf_event *event) { - struct hw_perf_counter *hwc = &counter->hw; + struct hw_perf_event *hwc = &event->hw; u64 period = hwc->last_period; u64 nr, offset; s64 old, val; @@ -3615,22 +3614,22 @@ again: return nr; } -static void perf_swcounter_overflow(struct perf_counter *counter, +static void perf_swevent_overflow(struct perf_event *event, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { - struct hw_perf_counter *hwc = &counter->hw; + struct hw_perf_event *hwc = &event->hw; int throttle = 0; u64 overflow; - data->period = counter->hw.last_period; - overflow = perf_swcounter_set_period(counter); + data->period = event->hw.last_period; + overflow = perf_swevent_set_period(event); if (hwc->interrupts == MAX_INTERRUPTS) return; for (; overflow; overflow--) { - if (__perf_counter_overflow(counter, nmi, throttle, + if (__perf_event_overflow(event, nmi, throttle, data, regs)) { /* * We inhibit the overflow from happening when @@ -3642,20 +3641,20 @@ static void perf_swcounter_overflow(struct perf_counter *counter, } } -static void perf_swcounter_unthrottle(struct perf_counter *counter) +static void perf_swevent_unthrottle(struct perf_event *event) { /* * Nothing to do, we already reset hwc->interrupts. */ } -static void perf_swcounter_add(struct perf_counter *counter, u64 nr, +static void perf_swevent_add(struct perf_event *event, u64 nr, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { - struct hw_perf_counter *hwc = &counter->hw; + struct hw_perf_event *hwc = &event->hw; - atomic64_add(nr, &counter->count); + atomic64_add(nr, &event->count); if (!hwc->sample_period) return; @@ -3664,29 +3663,29 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr, return; if (!atomic64_add_negative(nr, &hwc->period_left)) - perf_swcounter_overflow(counter, nmi, data, regs); + perf_swevent_overflow(event, nmi, data, regs); } -static int perf_swcounter_is_counting(struct perf_counter *counter) +static int perf_swevent_is_counting(struct perf_event *event) { /* - * The counter is active, we're good! + * The event is active, we're good! */ - if (counter->state == PERF_COUNTER_STATE_ACTIVE) + if (event->state == PERF_EVENT_STATE_ACTIVE) return 1; /* - * The counter is off/error, not counting. + * The event is off/error, not counting. */ - if (counter->state != PERF_COUNTER_STATE_INACTIVE) + if (event->state != PERF_EVENT_STATE_INACTIVE) return 0; /* - * The counter is inactive, if the context is active + * The event is inactive, if the context is active * we're part of a group that didn't make it on the 'pmu', * not counting. */ - if (counter->ctx->is_active) + if (event->ctx->is_active) return 0; /* @@ -3697,49 +3696,49 @@ static int perf_swcounter_is_counting(struct perf_counter *counter) return 1; } -static int perf_swcounter_match(struct perf_counter *counter, +static int perf_swevent_match(struct perf_event *event, enum perf_type_id type, - u32 event, struct pt_regs *regs) + u32 event_id, struct pt_regs *regs) { - if (!perf_swcounter_is_counting(counter)) + if (!perf_swevent_is_counting(event)) return 0; - if (counter->attr.type != type) + if (event->attr.type != type) return 0; - if (counter->attr.config != event) + if (event->attr.config != event_id) return 0; if (regs) { - if (counter->attr.exclude_user && user_mode(regs)) + if (event->attr.exclude_user && user_mode(regs)) return 0; - if (counter->attr.exclude_kernel && !user_mode(regs)) + if (event->attr.exclude_kernel && !user_mode(regs)) return 0; } return 1; } -static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, +static void perf_swevent_ctx_event(struct perf_event_context *ctx, enum perf_type_id type, - u32 event, u64 nr, int nmi, + u32 event_id, u64 nr, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { - struct perf_counter *counter; + struct perf_event *event; if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) return; rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_swcounter_match(counter, type, event, regs)) - perf_swcounter_add(counter, nr, nmi, data, regs); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (perf_swevent_match(event, type, event_id, regs)) + perf_swevent_add(event, nr, nmi, data, regs); } rcu_read_unlock(); } -static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) +static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) { if (in_nmi()) return &cpuctx->recursion[3]; @@ -3753,14 +3752,14 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) return &cpuctx->recursion[0]; } -static void do_perf_swcounter_event(enum perf_type_id type, u32 event, +static void do_perf_sw_event(enum perf_type_id type, u32 event_id, u64 nr, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); - int *recursion = perf_swcounter_recursion_context(cpuctx); - struct perf_counter_context *ctx; + int *recursion = perf_swevent_recursion_context(cpuctx); + struct perf_event_context *ctx; if (*recursion) goto out; @@ -3768,16 +3767,16 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event, (*recursion)++; barrier(); - perf_swcounter_ctx_event(&cpuctx->ctx, type, event, + perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, nr, nmi, data, regs); rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. */ - ctx = rcu_dereference(current->perf_counter_ctxp); + ctx = rcu_dereference(current->perf_event_ctxp); if (ctx) - perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data, regs); + perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); rcu_read_unlock(); barrier(); @@ -3787,57 +3786,57 @@ out: put_cpu_var(perf_cpu_context); } -void __perf_swcounter_event(u32 event, u64 nr, int nmi, +void __perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { struct perf_sample_data data = { .addr = addr, }; - do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, + do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); } -static void perf_swcounter_read(struct perf_counter *counter) +static void perf_swevent_read(struct perf_event *event) { } -static int perf_swcounter_enable(struct perf_counter *counter) +static int perf_swevent_enable(struct perf_event *event) { - struct hw_perf_counter *hwc = &counter->hw; + struct hw_perf_event *hwc = &event->hw; if (hwc->sample_period) { hwc->last_period = hwc->sample_period; - perf_swcounter_set_period(counter); + perf_swevent_set_period(event); } return 0; } -static void perf_swcounter_disable(struct perf_counter *counter) +static void perf_swevent_disable(struct perf_event *event) { } static const struct pmu perf_ops_generic = { - .enable = perf_swcounter_enable, - .disable = perf_swcounter_disable, - .read = perf_swcounter_read, - .unthrottle = perf_swcounter_unthrottle, + .enable = perf_swevent_enable, + .disable = perf_swevent_disable, + .read = perf_swevent_read, + .unthrottle = perf_swevent_unthrottle, }; /* - * hrtimer based swcounter callback + * hrtimer based swevent callback */ -static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) +static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) { enum hrtimer_restart ret = HRTIMER_RESTART; struct perf_sample_data data; struct pt_regs *regs; - struct perf_counter *counter; + struct perf_event *event; u64 period; - counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); - counter->pmu->read(counter); + event = container_of(hrtimer, struct perf_event, hw.hrtimer); + event->pmu->read(event); data.addr = 0; regs = get_irq_regs(); @@ -3845,45 +3844,45 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) * In case we exclude kernel IPs or are somehow not in interrupt * context, provide the next best thing, the user IP. */ - if ((counter->attr.exclude_kernel || !regs) && - !counter->attr.exclude_user) + if ((event->attr.exclude_kernel || !regs) && + !event->attr.exclude_user) regs = task_pt_regs(current); if (regs) { - if (perf_counter_overflow(counter, 0, &data, regs)) + if (perf_event_overflow(event, 0, &data, regs)) ret = HRTIMER_NORESTART; } - period = max_t(u64, 10000, counter->hw.sample_period); + period = max_t(u64, 10000, event->hw.sample_period); hrtimer_forward_now(hrtimer, ns_to_ktime(period)); return ret; } /* - * Software counter: cpu wall time clock + * Software event: cpu wall time clock */ -static void cpu_clock_perf_counter_update(struct perf_counter *counter) +static void cpu_clock_perf_event_update(struct perf_event *event) { int cpu = raw_smp_processor_id(); s64 prev; u64 now; now = cpu_clock(cpu); - prev = atomic64_read(&counter->hw.prev_count); - atomic64_set(&counter->hw.prev_count, now); - atomic64_add(now - prev, &counter->count); + prev = atomic64_read(&event->hw.prev_count); + atomic64_set(&event->hw.prev_count, now); + atomic64_add(now - prev, &event->count); } -static int cpu_clock_perf_counter_enable(struct perf_counter *counter) +static int cpu_clock_perf_event_enable(struct perf_event *event) { - struct hw_perf_counter *hwc = &counter->hw; + struct hw_perf_event *hwc = &event->hw; int cpu = raw_smp_processor_id(); atomic64_set(&hwc->prev_count, cpu_clock(cpu)); hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swcounter_hrtimer; + hwc->hrtimer.function = perf_swevent_hrtimer; if (hwc->sample_period) { u64 period = max_t(u64, 10000, hwc->sample_period); __hrtimer_start_range_ns(&hwc->hrtimer, @@ -3894,48 +3893,48 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter) return 0; } -static void cpu_clock_perf_counter_disable(struct perf_counter *counter) +static void cpu_clock_perf_event_disable(struct perf_event *event) { - if (counter->hw.sample_period) - hrtimer_cancel(&counter->hw.hrtimer); - cpu_clock_perf_counter_update(counter); + if (event->hw.sample_period) + hrtimer_cancel(&event->hw.hrtimer); + cpu_clock_perf_event_update(event); } -static void cpu_clock_perf_counter_read(struct perf_counter *counter) +static void cpu_clock_perf_event_read(struct perf_event *event) { - cpu_clock_perf_counter_update(counter); + cpu_clock_perf_event_update(event); } static const struct pmu perf_ops_cpu_clock = { - .enable = cpu_clock_perf_counter_enable, - .disable = cpu_clock_perf_counter_disable, - .read = cpu_clock_perf_counter_read, + .enable = cpu_clock_perf_event_enable, + .disable = cpu_clock_perf_event_disable, + .read = cpu_clock_perf_event_read, }; /* - * Software counter: task time clock + * Software event: task time clock */ -static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now) +static void task_clock_perf_event_update(struct perf_event *event, u64 now) { u64 prev; s64 delta; - prev = atomic64_xchg(&counter->hw.prev_count, now); + prev = atomic64_xchg(&event->hw.prev_count, now); delta = now - prev; - atomic64_add(delta, &counter->count); + atomic64_add(delta, &event->count); } -static int task_clock_perf_counter_enable(struct perf_counter *counter) +static int task_clock_perf_event_enable(struct perf_event *event) { - struct hw_perf_counter *hwc = &counter->hw; + struct hw_perf_event *hwc = &event->hw; u64 now; - now = counter->ctx->time; + now = event->ctx->time; atomic64_set(&hwc->prev_count, now); hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swcounter_hrtimer; + hwc->hrtimer.function = perf_swevent_hrtimer; if (hwc->sample_period) { u64 period = max_t(u64, 10000, hwc->sample_period); __hrtimer_start_range_ns(&hwc->hrtimer, @@ -3946,38 +3945,38 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter) return 0; } -static void task_clock_perf_counter_disable(struct perf_counter *counter) +static void task_clock_perf_event_disable(struct perf_event *event) { - if (counter->hw.sample_period) - hrtimer_cancel(&counter->hw.hrtimer); - task_clock_perf_counter_update(counter, counter->ctx->time); + if (event->hw.sample_period) + hrtimer_cancel(&event->hw.hrtimer); + task_clock_perf_event_update(event, event->ctx->time); } -static void task_clock_perf_counter_read(struct perf_counter *counter) +static void task_clock_perf_event_read(struct perf_event *event) { u64 time; if (!in_nmi()) { - update_context_time(counter->ctx); - time = counter->ctx->time; + update_context_time(event->ctx); + time = event->ctx->time; } else { u64 now = perf_clock(); - u64 delta = now - counter->ctx->timestamp; - time = counter->ctx->time + delta; + u64 delta = now - event->ctx->timestamp; + time = event->ctx->time + delta; } - task_clock_perf_counter_update(counter, time); + task_clock_perf_event_update(event, time); } static const struct pmu perf_ops_task_clock = { - .enable = task_clock_perf_counter_enable, - .disable = task_clock_perf_counter_disable, - .read = task_clock_perf_counter_read, + .enable = task_clock_perf_event_enable, + .disable = task_clock_perf_event_disable, + .read = task_clock_perf_event_read, }; #ifdef CONFIG_EVENT_PROFILE -void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, +void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size) { struct perf_raw_record raw = { @@ -3995,78 +3994,78 @@ void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, if (!regs) regs = task_pt_regs(current); - do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, + do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data, regs); } -EXPORT_SYMBOL_GPL(perf_tpcounter_event); +EXPORT_SYMBOL_GPL(perf_tp_event); extern int ftrace_profile_enable(int); extern void ftrace_profile_disable(int); -static void tp_perf_counter_destroy(struct perf_counter *counter) +static void tp_perf_event_destroy(struct perf_event *event) { - ftrace_profile_disable(counter->attr.config); + ftrace_profile_disable(event->attr.config); } -static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) +static const struct pmu *tp_perf_event_init(struct perf_event *event) { /* * Raw tracepoint data is a severe data leak, only allow root to * have these. */ - if ((counter->attr.sample_type & PERF_SAMPLE_RAW) && + if ((event->attr.sample_type & PERF_SAMPLE_RAW) && perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); - if (ftrace_profile_enable(counter->attr.config)) + if (ftrace_profile_enable(event->attr.config)) return NULL; - counter->destroy = tp_perf_counter_destroy; + event->destroy = tp_perf_event_destroy; return &perf_ops_generic; } #else -static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) +static const struct pmu *tp_perf_event_init(struct perf_event *event) { return NULL; } #endif -atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; +atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; -static void sw_perf_counter_destroy(struct perf_counter *counter) +static void sw_perf_event_destroy(struct perf_event *event) { - u64 event = counter->attr.config; + u64 event_id = event->attr.config; - WARN_ON(counter->parent); + WARN_ON(event->parent); - atomic_dec(&perf_swcounter_enabled[event]); + atomic_dec(&perf_swevent_enabled[event_id]); } -static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) +static const struct pmu *sw_perf_event_init(struct perf_event *event) { const struct pmu *pmu = NULL; - u64 event = counter->attr.config; + u64 event_id = event->attr.config; /* - * Software counters (currently) can't in general distinguish + * Software events (currently) can't in general distinguish * between user, kernel and hypervisor events. * However, context switches and cpu migrations are considered * to be kernel events, and page faults are never hypervisor * events. */ - switch (event) { + switch (event_id) { case PERF_COUNT_SW_CPU_CLOCK: pmu = &perf_ops_cpu_clock; break; case PERF_COUNT_SW_TASK_CLOCK: /* - * If the user instantiates this as a per-cpu counter, - * use the cpu_clock counter instead. + * If the user instantiates this as a per-cpu event, + * use the cpu_clock event instead. */ - if (counter->ctx->task) + if (event->ctx->task) pmu = &perf_ops_task_clock; else pmu = &perf_ops_cpu_clock; @@ -4077,9 +4076,9 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_SW_PAGE_FAULTS_MAJ: case PERF_COUNT_SW_CONTEXT_SWITCHES: case PERF_COUNT_SW_CPU_MIGRATIONS: - if (!counter->parent) { - atomic_inc(&perf_swcounter_enabled[event]); - counter->destroy = sw_perf_counter_destroy; + if (!event->parent) { + atomic_inc(&perf_swevent_enabled[event_id]); + event->destroy = sw_perf_event_destroy; } pmu = &perf_ops_generic; break; @@ -4089,62 +4088,62 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) } /* - * Allocate and initialize a counter structure + * Allocate and initialize a event structure */ -static struct perf_counter * -perf_counter_alloc(struct perf_counter_attr *attr, +static struct perf_event * +perf_event_alloc(struct perf_event_attr *attr, int cpu, - struct perf_counter_context *ctx, - struct perf_counter *group_leader, - struct perf_counter *parent_counter, + struct perf_event_context *ctx, + struct perf_event *group_leader, + struct perf_event *parent_event, gfp_t gfpflags) { const struct pmu *pmu; - struct perf_counter *counter; - struct hw_perf_counter *hwc; + struct perf_event *event; + struct hw_perf_event *hwc; long err; - counter = kzalloc(sizeof(*counter), gfpflags); - if (!counter) + event = kzalloc(sizeof(*event), gfpflags); + if (!event) return ERR_PTR(-ENOMEM); /* - * Single counters are their own group leaders, with an + * Single events are their own group leaders, with an * empty sibling list: */ if (!group_leader) - group_leader = counter; + group_leader = event; - mutex_init(&counter->child_mutex); - INIT_LIST_HEAD(&counter->child_list); + mutex_init(&event->child_mutex); + INIT_LIST_HEAD(&event->child_list); - INIT_LIST_HEAD(&counter->list_entry); - INIT_LIST_HEAD(&counter->event_entry); - INIT_LIST_HEAD(&counter->sibling_list); - init_waitqueue_head(&counter->waitq); + INIT_LIST_HEAD(&event->group_entry); + INIT_LIST_HEAD(&event->event_entry); + INIT_LIST_HEAD(&event->sibling_list); + init_waitqueue_head(&event->waitq); - mutex_init(&counter->mmap_mutex); + mutex_init(&event->mmap_mutex); - counter->cpu = cpu; - counter->attr = *attr; - counter->group_leader = group_leader; - counter->pmu = NULL; - counter->ctx = ctx; - counter->oncpu = -1; + event->cpu = cpu; + event->attr = *attr; + event->group_leader = group_leader; + event->pmu = NULL; + event->ctx = ctx; + event->oncpu = -1; - counter->parent = parent_counter; + event->parent = parent_event; - counter->ns = get_pid_ns(current->nsproxy->pid_ns); - counter->id = atomic64_inc_return(&perf_counter_id); + event->ns = get_pid_ns(current->nsproxy->pid_ns); + event->id = atomic64_inc_return(&perf_event_id); - counter->state = PERF_COUNTER_STATE_INACTIVE; + event->state = PERF_EVENT_STATE_INACTIVE; if (attr->disabled) - counter->state = PERF_COUNTER_STATE_OFF; + event->state = PERF_EVENT_STATE_OFF; pmu = NULL; - hwc = &counter->hw; + hwc = &event->hw; hwc->sample_period = attr->sample_period; if (attr->freq && attr->sample_freq) hwc->sample_period = 1; @@ -4153,7 +4152,7 @@ perf_counter_alloc(struct perf_counter_attr *attr, atomic64_set(&hwc->period_left, hwc->sample_period); /* - * we currently do not support PERF_FORMAT_GROUP on inherited counters + * we currently do not support PERF_FORMAT_GROUP on inherited events */ if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) goto done; @@ -4162,15 +4161,15 @@ perf_counter_alloc(struct perf_counter_attr *attr, case PERF_TYPE_RAW: case PERF_TYPE_HARDWARE: case PERF_TYPE_HW_CACHE: - pmu = hw_perf_counter_init(counter); + pmu = hw_perf_event_init(event); break; case PERF_TYPE_SOFTWARE: - pmu = sw_perf_counter_init(counter); + pmu = sw_perf_event_init(event); break; case PERF_TYPE_TRACEPOINT: - pmu = tp_perf_counter_init(counter); + pmu = tp_perf_event_init(event); break; default: @@ -4184,29 +4183,29 @@ done: err = PTR_ERR(pmu); if (err) { - if (counter->ns) - put_pid_ns(counter->ns); - kfree(counter); + if (event->ns) + put_pid_ns(event->ns); + kfree(event); return ERR_PTR(err); } - counter->pmu = pmu; + event->pmu = pmu; - if (!counter->parent) { - atomic_inc(&nr_counters); - if (counter->attr.mmap) - atomic_inc(&nr_mmap_counters); - if (counter->attr.comm) - atomic_inc(&nr_comm_counters); - if (counter->attr.task) - atomic_inc(&nr_task_counters); + if (!event->parent) { + atomic_inc(&nr_events); + if (event->attr.mmap) + atomic_inc(&nr_mmap_events); + if (event->attr.comm) + atomic_inc(&nr_comm_events); + if (event->attr.task) + atomic_inc(&nr_task_events); } - return counter; + return event; } -static int perf_copy_attr(struct perf_counter_attr __user *uattr, - struct perf_counter_attr *attr) +static int perf_copy_attr(struct perf_event_attr __user *uattr, + struct perf_event_attr *attr) { u32 size; int ret; @@ -4285,11 +4284,11 @@ err_size: goto out; } -int perf_counter_set_output(struct perf_counter *counter, int output_fd) +int perf_event_set_output(struct perf_event *event, int output_fd) { - struct perf_counter *output_counter = NULL; + struct perf_event *output_event = NULL; struct file *output_file = NULL; - struct perf_counter *old_output; + struct perf_event *old_output; int fput_needed = 0; int ret = -EINVAL; @@ -4303,28 +4302,28 @@ int perf_counter_set_output(struct perf_counter *counter, int output_fd) if (output_file->f_op != &perf_fops) goto out; - output_counter = output_file->private_data; + output_event = output_file->private_data; /* Don't chain output fds */ - if (output_counter->output) + if (output_event->output) goto out; /* Don't set an output fd when we already have an output channel */ - if (counter->data) + if (event->data) goto out; atomic_long_inc(&output_file->f_count); set: - mutex_lock(&counter->mmap_mutex); - old_output = counter->output; - rcu_assign_pointer(counter->output, output_counter); - mutex_unlock(&counter->mmap_mutex); + mutex_lock(&event->mmap_mutex); + old_output = event->output; + rcu_assign_pointer(event->output, output_event); + mutex_unlock(&event->mmap_mutex); if (old_output) { /* * we need to make sure no existing perf_output_*() - * is still referencing this counter. + * is still referencing this event. */ synchronize_rcu(); fput(old_output->filp); @@ -4337,21 +4336,21 @@ out: } /** - * sys_perf_counter_open - open a performance counter, associate it to a task/cpu + * sys_perf_event_open - open a performance event, associate it to a task/cpu * - * @attr_uptr: event type attributes for monitoring/sampling + * @attr_uptr: event_id type attributes for monitoring/sampling * @pid: target pid * @cpu: target cpu - * @group_fd: group leader counter fd + * @group_fd: group leader event fd */ -SYSCALL_DEFINE5(perf_counter_open, - struct perf_counter_attr __user *, attr_uptr, +SYSCALL_DEFINE5(perf_event_open, + struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) { - struct perf_counter *counter, *group_leader; - struct perf_counter_attr attr; - struct perf_counter_context *ctx; - struct file *counter_file = NULL; + struct perf_event *event, *group_leader; + struct perf_event_attr attr; + struct perf_event_context *ctx; + struct file *event_file = NULL; struct file *group_file = NULL; int fput_needed = 0; int fput_needed2 = 0; @@ -4371,7 +4370,7 @@ SYSCALL_DEFINE5(perf_counter_open, } if (attr.freq) { - if (attr.sample_freq > sysctl_perf_counter_sample_rate) + if (attr.sample_freq > sysctl_perf_event_sample_rate) return -EINVAL; } @@ -4383,7 +4382,7 @@ SYSCALL_DEFINE5(perf_counter_open, return PTR_ERR(ctx); /* - * Look up the group leader (we will attach this counter to it): + * Look up the group leader (we will attach this event to it): */ group_leader = NULL; if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { @@ -4414,45 +4413,45 @@ SYSCALL_DEFINE5(perf_counter_open, goto err_put_context; } - counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, + event = perf_event_alloc(&attr, cpu, ctx, group_leader, NULL, GFP_KERNEL); - err = PTR_ERR(counter); - if (IS_ERR(counter)) + err = PTR_ERR(event); + if (IS_ERR(event)) goto err_put_context; - err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); + err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0); if (err < 0) goto err_free_put_context; - counter_file = fget_light(err, &fput_needed2); - if (!counter_file) + event_file = fget_light(err, &fput_needed2); + if (!event_file) goto err_free_put_context; if (flags & PERF_FLAG_FD_OUTPUT) { - err = perf_counter_set_output(counter, group_fd); + err = perf_event_set_output(event, group_fd); if (err) goto err_fput_free_put_context; } - counter->filp = counter_file; + event->filp = event_file; WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); - perf_install_in_context(ctx, counter, cpu); + perf_install_in_context(ctx, event, cpu); ++ctx->generation; mutex_unlock(&ctx->mutex); - counter->owner = current; + event->owner = current; get_task_struct(current); - mutex_lock(¤t->perf_counter_mutex); - list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); - mutex_unlock(¤t->perf_counter_mutex); + mutex_lock(¤t->perf_event_mutex); + list_add_tail(&event->owner_entry, ¤t->perf_event_list); + mutex_unlock(¤t->perf_event_mutex); err_fput_free_put_context: - fput_light(counter_file, fput_needed2); + fput_light(event_file, fput_needed2); err_free_put_context: if (err < 0) - kfree(counter); + kfree(event); err_put_context: if (err < 0) @@ -4464,88 +4463,88 @@ err_put_context: } /* - * inherit a counter from parent task to child task: + * inherit a event from parent task to child task: */ -static struct perf_counter * -inherit_counter(struct perf_counter *parent_counter, +static struct perf_event * +inherit_event(struct perf_event *parent_event, struct task_struct *parent, - struct perf_counter_context *parent_ctx, + struct perf_event_context *parent_ctx, struct task_struct *child, - struct perf_counter *group_leader, - struct perf_counter_context *child_ctx) + struct perf_event *group_leader, + struct perf_event_context *child_ctx) { - struct perf_counter *child_counter; + struct perf_event *child_event; /* - * Instead of creating recursive hierarchies of counters, - * we link inherited counters back to the original parent, + * Instead of creating recursive hierarchies of events, + * we link inherited events back to the original parent, * which has a filp for sure, which we use as the reference * count: */ - if (parent_counter->parent) - parent_counter = parent_counter->parent; + if (parent_event->parent) + parent_event = parent_event->parent; - child_counter = perf_counter_alloc(&parent_counter->attr, - parent_counter->cpu, child_ctx, - group_leader, parent_counter, + child_event = perf_event_alloc(&parent_event->attr, + parent_event->cpu, child_ctx, + group_leader, parent_event, GFP_KERNEL); - if (IS_ERR(child_counter)) - return child_counter; + if (IS_ERR(child_event)) + return child_event; get_ctx(child_ctx); /* - * Make the child state follow the state of the parent counter, + * Make the child state follow the state of the parent event, * not its attr.disabled bit. We hold the parent's mutex, - * so we won't race with perf_counter_{en, dis}able_family. + * so we won't race with perf_event_{en, dis}able_family. */ - if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) - child_counter->state = PERF_COUNTER_STATE_INACTIVE; + if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) + child_event->state = PERF_EVENT_STATE_INACTIVE; else - child_counter->state = PERF_COUNTER_STATE_OFF; + child_event->state = PERF_EVENT_STATE_OFF; - if (parent_counter->attr.freq) - child_counter->hw.sample_period = parent_counter->hw.sample_period; + if (parent_event->attr.freq) + child_event->hw.sample_period = parent_event->hw.sample_period; /* * Link it up in the child's context: */ - add_counter_to_ctx(child_counter, child_ctx); + add_event_to_ctx(child_event, child_ctx); /* * Get a reference to the parent filp - we will fput it - * when the child counter exits. This is safe to do because + * when the child event exits. This is safe to do because * we are in the parent and we know that the filp still * exists and has a nonzero count: */ - atomic_long_inc(&parent_counter->filp->f_count); + atomic_long_inc(&parent_event->filp->f_count); /* - * Link this into the parent counter's child list + * Link this into the parent event's child list */ - WARN_ON_ONCE(parent_counter->ctx->parent_ctx); - mutex_lock(&parent_counter->child_mutex); - list_add_tail(&child_counter->child_list, &parent_counter->child_list); - mutex_unlock(&parent_counter->child_mutex); + WARN_ON_ONCE(parent_event->ctx->parent_ctx); + mutex_lock(&parent_event->child_mutex); + list_add_tail(&child_event->child_list, &parent_event->child_list); + mutex_unlock(&parent_event->child_mutex); - return child_counter; + return child_event; } -static int inherit_group(struct perf_counter *parent_counter, +static int inherit_group(struct perf_event *parent_event, struct task_struct *parent, - struct perf_counter_context *parent_ctx, + struct perf_event_context *parent_ctx, struct task_struct *child, - struct perf_counter_context *child_ctx) + struct perf_event_context *child_ctx) { - struct perf_counter *leader; - struct perf_counter *sub; - struct perf_counter *child_ctr; + struct perf_event *leader; + struct perf_event *sub; + struct perf_event *child_ctr; - leader = inherit_counter(parent_counter, parent, parent_ctx, + leader = inherit_event(parent_event, parent, parent_ctx, child, NULL, child_ctx); if (IS_ERR(leader)) return PTR_ERR(leader); - list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) { - child_ctr = inherit_counter(sub, parent, parent_ctx, + list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { + child_ctr = inherit_event(sub, parent, parent_ctx, child, leader, child_ctx); if (IS_ERR(child_ctr)) return PTR_ERR(child_ctr); @@ -4553,74 +4552,74 @@ static int inherit_group(struct perf_counter *parent_counter, return 0; } -static void sync_child_counter(struct perf_counter *child_counter, +static void sync_child_event(struct perf_event *child_event, struct task_struct *child) { - struct perf_counter *parent_counter = child_counter->parent; + struct perf_event *parent_event = child_event->parent; u64 child_val; - if (child_counter->attr.inherit_stat) - perf_counter_read_event(child_counter, child); + if (child_event->attr.inherit_stat) + perf_event_read_event(child_event, child); - child_val = atomic64_read(&child_counter->count); + child_val = atomic64_read(&child_event->count); /* * Add back the child's count to the parent's count: */ - atomic64_add(child_val, &parent_counter->count); - atomic64_add(child_counter->total_time_enabled, - &parent_counter->child_total_time_enabled); - atomic64_add(child_counter->total_time_running, - &parent_counter->child_total_time_running); + atomic64_add(child_val, &parent_event->count); + atomic64_add(child_event->total_time_enabled, + &parent_event->child_total_time_enabled); + atomic64_add(child_event->total_time_running, + &parent_event->child_total_time_running); /* - * Remove this counter from the parent's list + * Remove this event from the parent's list */ - WARN_ON_ONCE(parent_counter->ctx->parent_ctx); - mutex_lock(&parent_counter->child_mutex); - list_del_init(&child_counter->child_list); - mutex_unlock(&parent_counter->child_mutex); + WARN_ON_ONCE(parent_event->ctx->parent_ctx); + mutex_lock(&parent_event->child_mutex); + list_del_init(&child_event->child_list); + mutex_unlock(&parent_event->child_mutex); /* - * Release the parent counter, if this was the last + * Release the parent event, if this was the last * reference to it. */ - fput(parent_counter->filp); + fput(parent_event->filp); } static void -__perf_counter_exit_task(struct perf_counter *child_counter, - struct perf_counter_context *child_ctx, +__perf_event_exit_task(struct perf_event *child_event, + struct perf_event_context *child_ctx, struct task_struct *child) { - struct perf_counter *parent_counter; + struct perf_event *parent_event; - update_counter_times(child_counter); - perf_counter_remove_from_context(child_counter); + update_event_times(child_event); + perf_event_remove_from_context(child_event); - parent_counter = child_counter->parent; + parent_event = child_event->parent; /* - * It can happen that parent exits first, and has counters + * It can happen that parent exits first, and has events * that are still around due to the child reference. These - * counters need to be zapped - but otherwise linger. + * events need to be zapped - but otherwise linger. */ - if (parent_counter) { - sync_child_counter(child_counter, child); - free_counter(child_counter); + if (parent_event) { + sync_child_event(child_event, child); + free_event(child_event); } } /* - * When a child task exits, feed back counter values to parent counters. + * When a child task exits, feed back event values to parent events. */ -void perf_counter_exit_task(struct task_struct *child) +void perf_event_exit_task(struct task_struct *child) { - struct perf_counter *child_counter, *tmp; - struct perf_counter_context *child_ctx; + struct perf_event *child_event, *tmp; + struct perf_event_context *child_ctx; unsigned long flags; - if (likely(!child->perf_counter_ctxp)) { - perf_counter_task(child, NULL, 0); + if (likely(!child->perf_event_ctxp)) { + perf_event_task(child, NULL, 0); return; } @@ -4631,37 +4630,37 @@ void perf_counter_exit_task(struct task_struct *child) * scheduled, so we are now safe from rescheduling changing * our context. */ - child_ctx = child->perf_counter_ctxp; - __perf_counter_task_sched_out(child_ctx); + child_ctx = child->perf_event_ctxp; + __perf_event_task_sched_out(child_ctx); /* * Take the context lock here so that if find_get_context is - * reading child->perf_counter_ctxp, we wait until it has + * reading child->perf_event_ctxp, we wait until it has * incremented the context's refcount before we do put_ctx below. */ spin_lock(&child_ctx->lock); - child->perf_counter_ctxp = NULL; + child->perf_event_ctxp = NULL; /* * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all - * the counters from it. + * the events from it. */ unclone_ctx(child_ctx); spin_unlock_irqrestore(&child_ctx->lock, flags); /* - * Report the task dead after unscheduling the counters so that we - * won't get any samples after PERF_EVENT_EXIT. We can however still - * get a few PERF_EVENT_READ events. + * Report the task dead after unscheduling the events so that we + * won't get any samples after PERF_RECORD_EXIT. We can however still + * get a few PERF_RECORD_READ events. */ - perf_counter_task(child, child_ctx, 0); + perf_event_task(child, child_ctx, 0); /* * We can recurse on the same lock type through: * - * __perf_counter_exit_task() - * sync_child_counter() - * fput(parent_counter->filp) + * __perf_event_exit_task() + * sync_child_event() + * fput(parent_event->filp) * perf_release() * mutex_lock(&ctx->mutex) * @@ -4670,16 +4669,16 @@ void perf_counter_exit_task(struct task_struct *child) mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); again: - list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, - list_entry) - __perf_counter_exit_task(child_counter, child_ctx, child); + list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, + group_entry) + __perf_event_exit_task(child_event, child_ctx, child); /* - * If the last counter was a group counter, it will have appended all + * If the last event was a group event, it will have appended all * its siblings to the list, but we obtained 'tmp' before that which * will still point to the list head terminating the iteration. */ - if (!list_empty(&child_ctx->counter_list)) + if (!list_empty(&child_ctx->group_list)) goto again; mutex_unlock(&child_ctx->mutex); @@ -4691,33 +4690,33 @@ again: * free an unexposed, unused context as created by inheritance by * init_task below, used by fork() in case of fail. */ -void perf_counter_free_task(struct task_struct *task) +void perf_event_free_task(struct task_struct *task) { - struct perf_counter_context *ctx = task->perf_counter_ctxp; - struct perf_counter *counter, *tmp; + struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_event *event, *tmp; if (!ctx) return; mutex_lock(&ctx->mutex); again: - list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) { - struct perf_counter *parent = counter->parent; + list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { + struct perf_event *parent = event->parent; if (WARN_ON_ONCE(!parent)) continue; mutex_lock(&parent->child_mutex); - list_del_init(&counter->child_list); + list_del_init(&event->child_list); mutex_unlock(&parent->child_mutex); fput(parent->filp); - list_del_counter(counter, ctx); - free_counter(counter); + list_del_event(event, ctx); + free_event(event); } - if (!list_empty(&ctx->counter_list)) + if (!list_empty(&ctx->group_list)) goto again; mutex_unlock(&ctx->mutex); @@ -4726,37 +4725,37 @@ again: } /* - * Initialize the perf_counter context in task_struct + * Initialize the perf_event context in task_struct */ -int perf_counter_init_task(struct task_struct *child) +int perf_event_init_task(struct task_struct *child) { - struct perf_counter_context *child_ctx, *parent_ctx; - struct perf_counter_context *cloned_ctx; - struct perf_counter *counter; + struct perf_event_context *child_ctx, *parent_ctx; + struct perf_event_context *cloned_ctx; + struct perf_event *event; struct task_struct *parent = current; int inherited_all = 1; int ret = 0; - child->perf_counter_ctxp = NULL; + child->perf_event_ctxp = NULL; - mutex_init(&child->perf_counter_mutex); - INIT_LIST_HEAD(&child->perf_counter_list); + mutex_init(&child->perf_event_mutex); + INIT_LIST_HEAD(&child->perf_event_list); - if (likely(!parent->perf_counter_ctxp)) + if (likely(!parent->perf_event_ctxp)) return 0; /* * This is executed from the parent task context, so inherit - * counters that have been marked for cloning. + * events that have been marked for cloning. * First allocate and initialize a context for the child. */ - child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); + child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); if (!child_ctx) return -ENOMEM; - __perf_counter_init_context(child_ctx, child); - child->perf_counter_ctxp = child_ctx; + __perf_event_init_context(child_ctx, child); + child->perf_event_ctxp = child_ctx; get_task_struct(child); /* @@ -4782,16 +4781,16 @@ int perf_counter_init_task(struct task_struct *child) * We dont have to disable NMIs - we are only looking at * the list, not manipulating it: */ - list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) { - if (counter != counter->group_leader) + list_for_each_entry_rcu(event, &parent_ctx->event_list, event_entry) { + if (event != event->group_leader) continue; - if (!counter->attr.inherit) { + if (!event->attr.inherit) { inherited_all = 0; continue; } - ret = inherit_group(counter, parent, parent_ctx, + ret = inherit_group(event, parent, parent_ctx, child, child_ctx); if (ret) { inherited_all = 0; @@ -4805,7 +4804,7 @@ int perf_counter_init_task(struct task_struct *child) * context, or of whatever the parent is a clone of. * Note that if the parent is a clone, it could get * uncloned at any point, but that doesn't matter - * because the list of counters and the generation + * because the list of events and the generation * count can't have changed since we took the mutex. */ cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); @@ -4826,41 +4825,41 @@ int perf_counter_init_task(struct task_struct *child) return ret; } -static void __cpuinit perf_counter_init_cpu(int cpu) +static void __cpuinit perf_event_init_cpu(int cpu) { struct perf_cpu_context *cpuctx; cpuctx = &per_cpu(perf_cpu_context, cpu); - __perf_counter_init_context(&cpuctx->ctx, NULL); + __perf_event_init_context(&cpuctx->ctx, NULL); spin_lock(&perf_resource_lock); - cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu; + cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; spin_unlock(&perf_resource_lock); - hw_perf_counter_setup(cpu); + hw_perf_event_setup(cpu); } #ifdef CONFIG_HOTPLUG_CPU -static void __perf_counter_exit_cpu(void *info) +static void __perf_event_exit_cpu(void *info) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter_context *ctx = &cpuctx->ctx; - struct perf_counter *counter, *tmp; + struct perf_event_context *ctx = &cpuctx->ctx; + struct perf_event *event, *tmp; - list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) - __perf_counter_remove_from_context(counter); + list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) + __perf_event_remove_from_context(event); } -static void perf_counter_exit_cpu(int cpu) +static void perf_event_exit_cpu(int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = &cpuctx->ctx; + struct perf_event_context *ctx = &cpuctx->ctx; mutex_lock(&ctx->mutex); - smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); + smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); mutex_unlock(&ctx->mutex); } #else -static inline void perf_counter_exit_cpu(int cpu) { } +static inline void perf_event_exit_cpu(int cpu) { } #endif static int __cpuinit @@ -4872,17 +4871,17 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - perf_counter_init_cpu(cpu); + perf_event_init_cpu(cpu); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: - hw_perf_counter_setup_online(cpu); + hw_perf_event_setup_online(cpu); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: - perf_counter_exit_cpu(cpu); + perf_event_exit_cpu(cpu); break; default: @@ -4900,7 +4899,7 @@ static struct notifier_block __cpuinitdata perf_cpu_nb = { .priority = 20, }; -void __init perf_counter_init(void) +void __init perf_event_init(void) { perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); @@ -4926,7 +4925,7 @@ perf_set_reserve_percpu(struct sysdev_class *class, err = strict_strtoul(buf, 10, &val); if (err) return err; - if (val > perf_max_counters) + if (val > perf_max_events) return -EINVAL; spin_lock(&perf_resource_lock); @@ -4934,8 +4933,8 @@ perf_set_reserve_percpu(struct sysdev_class *class, for_each_online_cpu(cpu) { cpuctx = &per_cpu(perf_cpu_context, cpu); spin_lock_irq(&cpuctx->ctx.lock); - mpt = min(perf_max_counters - cpuctx->ctx.nr_counters, - perf_max_counters - perf_reserved_percpu); + mpt = min(perf_max_events - cpuctx->ctx.nr_events, + perf_max_events - perf_reserved_percpu); cpuctx->max_pertask = mpt; spin_unlock_irq(&cpuctx->ctx.lock); } @@ -4990,12 +4989,12 @@ static struct attribute *perfclass_attrs[] = { static struct attribute_group perfclass_attr_group = { .attrs = perfclass_attrs, - .name = "perf_counters", + .name = "perf_events", }; -static int __init perf_counter_sysfs_init(void) +static int __init perf_event_sysfs_init(void) { return sysfs_create_group(&cpu_sysdev_class.kset.kobj, &perfclass_attr_group); } -device_initcall(perf_counter_sysfs_init); +device_initcall(perf_event_sysfs_init); diff --git a/kernel/pid.c b/kernel/pid.c index 31310b5..d3f722d 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -40,7 +40,7 @@ #define pid_hashfn(nr, ns) \ hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) static struct hlist_head *pid_hash; -static int pidhash_shift; +static unsigned int pidhash_shift = 4; struct pid init_struct_pid = INIT_STRUCT_PID; int pid_max = PID_MAX_DEFAULT; @@ -499,19 +499,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) void __init pidhash_init(void) { int i, pidhash_size; - unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT); - pidhash_shift = max(4, fls(megabytes * 4)); - pidhash_shift = min(12, pidhash_shift); + pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18, + HASH_EARLY | HASH_SMALL, + &pidhash_shift, NULL, 4096); pidhash_size = 1 << pidhash_shift; - printk("PID hash table entries: %d (order: %d, %Zd bytes)\n", - pidhash_size, pidhash_shift, - pidhash_size * sizeof(struct hlist_head)); - - pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash))); - if (!pid_hash) - panic("Could not alloc pidhash!\n"); for (i = 0; i < pidhash_size; i++) INIT_HLIST_HEAD(&pid_hash[i]); } diff --git a/kernel/power/process.c b/kernel/power/process.c index da2072d..cc2e553 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -9,6 +9,7 @@ #undef DEBUG #include <linux/interrupt.h> +#include <linux/oom.h> #include <linux/suspend.h> #include <linux/module.h> #include <linux/syscalls.h> diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 97955b0..36cb168 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -619,7 +619,7 @@ __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, BUG_ON(!region); } else /* This allocation cannot fail */ - region = alloc_bootmem_low(sizeof(struct nosave_region)); + region = alloc_bootmem(sizeof(struct nosave_region)); region->start_pfn = start_pfn; region->end_pfn = end_pfn; list_add_tail(®ion->list, &nosave_regions); diff --git a/kernel/profile.c b/kernel/profile.c index 419250e..a55d3a3 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -442,48 +442,51 @@ void profile_tick(int type) #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <asm/uaccess.h> -static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int prof_cpu_mask_proc_show(struct seq_file *m, void *v) { - int len = cpumask_scnprintf(page, count, data); - if (count - len < 2) - return -EINVAL; - len += sprintf(page + len, "\n"); - return len; + seq_cpumask(m, prof_cpu_mask); + seq_putc(m, '\n'); + return 0; } -static int prof_cpu_mask_write_proc(struct file *file, - const char __user *buffer, unsigned long count, void *data) +static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, prof_cpu_mask_proc_show, NULL); +} + +static ssize_t prof_cpu_mask_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *pos) { - struct cpumask *mask = data; - unsigned long full_count = count, err; cpumask_var_t new_value; + int err; if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) return -ENOMEM; err = cpumask_parse_user(buffer, count, new_value); if (!err) { - cpumask_copy(mask, new_value); - err = full_count; + cpumask_copy(prof_cpu_mask, new_value); + err = count; } free_cpumask_var(new_value); return err; } +static const struct file_operations prof_cpu_mask_proc_fops = { + .open = prof_cpu_mask_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = prof_cpu_mask_proc_write, +}; + void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir) { - struct proc_dir_entry *entry; - /* create /proc/irq/prof_cpu_mask */ - entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); - if (!entry) - return; - entry->data = prof_cpu_mask; - entry->read_proc = prof_cpu_mask_read_proc; - entry->write_proc = prof_cpu_mask_write_proc; + proc_create("prof_cpu_mask", 0600, root_irq_dir, &prof_cpu_mask_proc_fops); } /* diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index bd5d5c8..37ac454 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -19,7 +19,7 @@ * * Authors: Dipankar Sarma <dipankar@in.ibm.com> * Manfred Spraul <manfred@colorfullife.com> - * + * * Based on the original work by Paul McKenney <paulmck@us.ibm.com> * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. * Papers: @@ -27,7 +27,7 @@ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) * * For detailed explanation of Read-Copy Update mechanism see - - * http://lse.sourceforge.net/locking/rcupdate.html + * http://lse.sourceforge.net/locking/rcupdate.html * */ #include <linux/types.h> @@ -74,6 +74,8 @@ void wakeme_after_rcu(struct rcu_head *head) complete(&rcu->completion); } +#ifdef CONFIG_TREE_PREEMPT_RCU + /** * synchronize_rcu - wait until a grace period has elapsed. * @@ -87,7 +89,7 @@ void synchronize_rcu(void) { struct rcu_synchronize rcu; - if (rcu_blocking_is_gp()) + if (!rcu_scheduler_active) return; init_completion(&rcu.completion); @@ -98,6 +100,46 @@ void synchronize_rcu(void) } EXPORT_SYMBOL_GPL(synchronize_rcu); +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + +/** + * synchronize_sched - wait until an rcu-sched grace period has elapsed. + * + * Control will return to the caller some time after a full rcu-sched + * grace period has elapsed, in other words after all currently executing + * rcu-sched read-side critical sections have completed. These read-side + * critical sections are delimited by rcu_read_lock_sched() and + * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), + * local_irq_disable(), and so on may be used in place of + * rcu_read_lock_sched(). + * + * This means that all preempt_disable code sequences, including NMI and + * hardware-interrupt handlers, in progress on entry will have completed + * before this primitive returns. However, this does not guarantee that + * softirq handlers will have completed, since in some kernels, these + * handlers can run in process context, and can block. + * + * This primitive provides the guarantees made by the (now removed) + * synchronize_kernel() API. In contrast, synchronize_rcu() only + * guarantees that rcu_read_lock() sections will have completed. + * In "classic RCU", these two guarantees happen to be one and + * the same, but can differ in realtime RCU implementations. + */ +void synchronize_sched(void) +{ + struct rcu_synchronize rcu; + + if (rcu_blocking_is_gp()) + return; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_sched(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(synchronize_sched); + /** * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. * diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index b33db53..233768f 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -18,7 +18,7 @@ * Copyright (C) IBM Corporation, 2005, 2006 * * Authors: Paul E. McKenney <paulmck@us.ibm.com> - * Josh Triplett <josh@freedesktop.org> + * Josh Triplett <josh@freedesktop.org> * * See also: Documentation/RCU/torture.txt */ @@ -50,7 +50,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " - "Josh Triplett <josh@freedesktop.org>"); + "Josh Triplett <josh@freedesktop.org>"); static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ static int nfakewriters = 4; /* # fake writer threads */ @@ -110,8 +110,8 @@ struct rcu_torture { }; static LIST_HEAD(rcu_torture_freelist); -static struct rcu_torture *rcu_torture_current = NULL; -static long rcu_torture_current_version = 0; +static struct rcu_torture *rcu_torture_current; +static long rcu_torture_current_version; static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; static DEFINE_SPINLOCK(rcu_torture_lock); static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = @@ -124,11 +124,11 @@ static atomic_t n_rcu_torture_alloc_fail; static atomic_t n_rcu_torture_free; static atomic_t n_rcu_torture_mberror; static atomic_t n_rcu_torture_error; -static long n_rcu_torture_timers = 0; +static long n_rcu_torture_timers; static struct list_head rcu_torture_removed; static cpumask_var_t shuffle_tmp_mask; -static int stutter_pause_test = 0; +static int stutter_pause_test; #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) #define RCUTORTURE_RUNNABLE_INIT 1 @@ -267,7 +267,8 @@ struct rcu_torture_ops { int irq_capable; char *name; }; -static struct rcu_torture_ops *cur_ops = NULL; + +static struct rcu_torture_ops *cur_ops; /* * Definitions for rcu torture testing. @@ -281,14 +282,17 @@ static int rcu_torture_read_lock(void) __acquires(RCU) static void rcu_read_delay(struct rcu_random_state *rrsp) { - long delay; - const long longdelay = 200; + const unsigned long shortdelay_us = 200; + const unsigned long longdelay_ms = 50; - /* We want there to be long-running readers, but not all the time. */ + /* We want a short delay sometimes to make a reader delay the grace + * period, and we want a long delay occasionally to trigger + * force_quiescent_state. */ - delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay); - if (!delay) - udelay(longdelay); + if (!(rcu_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) + mdelay(longdelay_ms); + if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) + udelay(shortdelay_us); } static void rcu_torture_read_unlock(int idx) __releases(RCU) @@ -339,8 +343,8 @@ static struct rcu_torture_ops rcu_ops = { .sync = synchronize_rcu, .cb_barrier = rcu_barrier, .stats = NULL, - .irq_capable = 1, - .name = "rcu" + .irq_capable = 1, + .name = "rcu" }; static void rcu_sync_torture_deferred_free(struct rcu_torture *p) @@ -638,7 +642,8 @@ rcu_torture_writer(void *arg) do { schedule_timeout_uninterruptible(1); - if ((rp = rcu_torture_alloc()) == NULL) + rp = rcu_torture_alloc(); + if (rp == NULL) continue; rp->rtort_pipe_count = 0; udelay(rcu_random(&rand) & 0x3ff); @@ -1110,7 +1115,7 @@ rcu_torture_init(void) printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n", torture_type); mutex_unlock(&fullstop_mutex); - return (-EINVAL); + return -EINVAL; } if (cur_ops->init) cur_ops->init(); /* no "goto unwind" prior to this point!!! */ @@ -1161,7 +1166,7 @@ rcu_torture_init(void) goto unwind; } fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), - GFP_KERNEL); + GFP_KERNEL); if (fakewriter_tasks == NULL) { VERBOSE_PRINTK_ERRSTRING("out of memory"); firsterr = -ENOMEM; @@ -1170,7 +1175,7 @@ rcu_torture_init(void) for (i = 0; i < nfakewriters; i++) { VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task"); fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL, - "rcu_torture_fakewriter"); + "rcu_torture_fakewriter"); if (IS_ERR(fakewriter_tasks[i])) { firsterr = PTR_ERR(fakewriter_tasks[i]); VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter"); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6b11b07..52b06f6 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -25,7 +25,7 @@ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU + * Documentation/RCU */ #include <linux/types.h> #include <linux/kernel.h> @@ -107,27 +107,23 @@ static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp, */ void rcu_sched_qs(int cpu) { - unsigned long flags; struct rcu_data *rdp; - local_irq_save(flags); rdp = &per_cpu(rcu_sched_data, cpu); - rdp->passed_quiesc = 1; rdp->passed_quiesc_completed = rdp->completed; - rcu_preempt_qs(cpu); - local_irq_restore(flags); + barrier(); + rdp->passed_quiesc = 1; + rcu_preempt_note_context_switch(cpu); } void rcu_bh_qs(int cpu) { - unsigned long flags; struct rcu_data *rdp; - local_irq_save(flags); rdp = &per_cpu(rcu_bh_data, cpu); - rdp->passed_quiesc = 1; rdp->passed_quiesc_completed = rdp->completed; - local_irq_restore(flags); + barrier(); + rdp->passed_quiesc = 1; } #ifdef CONFIG_NO_HZ @@ -605,8 +601,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) { struct rcu_data *rdp = rsp->rda[smp_processor_id()]; struct rcu_node *rnp = rcu_get_root(rsp); - struct rcu_node *rnp_cur; - struct rcu_node *rnp_end; if (!cpu_needs_another_gp(rsp, rdp)) { spin_unlock_irqrestore(&rnp->lock, flags); @@ -615,6 +609,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) /* Advance to a new grace period and initialize state. */ rsp->gpnum++; + WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; record_gp_stall_check_time(rsp); @@ -631,7 +626,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) /* Special-case the common single-level case. */ if (NUM_RCU_NODES == 1) { + rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; + rnp->gpnum = rsp->gpnum; rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ spin_unlock_irqrestore(&rnp->lock, flags); return; @@ -644,42 +641,28 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) spin_lock(&rsp->onofflock); /* irqs already disabled. */ /* - * Set the quiescent-state-needed bits in all the non-leaf RCU - * nodes for all currently online CPUs. This operation relies - * on the layout of the hierarchy within the rsp->node[] array. - * Note that other CPUs will access only the leaves of the - * hierarchy, which still indicate that no grace period is in - * progress. In addition, we have excluded CPU-hotplug operations. - * - * We therefore do not need to hold any locks. Any required - * memory barriers will be supplied by the locks guarding the - * leaf rcu_nodes in the hierarchy. - */ - - rnp_end = rsp->level[NUM_RCU_LVLS - 1]; - for (rnp_cur = &rsp->node[0]; rnp_cur < rnp_end; rnp_cur++) - rnp_cur->qsmask = rnp_cur->qsmaskinit; - - /* - * Now set up the leaf nodes. Here we must be careful. First, - * we need to hold the lock in order to exclude other CPUs, which - * might be contending for the leaf nodes' locks. Second, as - * soon as we initialize a given leaf node, its CPUs might run - * up the rest of the hierarchy. We must therefore acquire locks - * for each node that we touch during this stage. (But we still - * are excluding CPU-hotplug operations.) + * Set the quiescent-state-needed bits in all the rcu_node + * structures for all currently online CPUs in breadth-first + * order, starting from the root rcu_node structure. This + * operation relies on the layout of the hierarchy within the + * rsp->node[] array. Note that other CPUs will access only + * the leaves of the hierarchy, which still indicate that no + * grace period is in progress, at least until the corresponding + * leaf node has been initialized. In addition, we have excluded + * CPU-hotplug operations. * * Note that the grace period cannot complete until we finish * the initialization process, as there will be at least one * qsmask bit set in the root node until that time, namely the - * one corresponding to this CPU. + * one corresponding to this CPU, due to the fact that we have + * irqs disabled. */ - rnp_end = &rsp->node[NUM_RCU_NODES]; - rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; - for (; rnp_cur < rnp_end; rnp_cur++) { - spin_lock(&rnp_cur->lock); /* irqs already disabled. */ - rnp_cur->qsmask = rnp_cur->qsmaskinit; - spin_unlock(&rnp_cur->lock); /* irqs already disabled. */ + for (rnp = &rsp->node[0]; rnp < &rsp->node[NUM_RCU_NODES]; rnp++) { + spin_lock(&rnp->lock); /* irqs already disabled. */ + rcu_preempt_check_blocked_tasks(rnp); + rnp->qsmask = rnp->qsmaskinit; + rnp->gpnum = rsp->gpnum; + spin_unlock(&rnp->lock); /* irqs already disabled. */ } rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ @@ -722,6 +705,7 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) __releases(rnp->lock) { + WARN_ON_ONCE(rsp->completed == rsp->gpnum); rsp->completed = rsp->gpnum; rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ @@ -739,6 +723,8 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, unsigned long flags) __releases(rnp->lock) { + struct rcu_node *rnp_c; + /* Walk up the rcu_node hierarchy. */ for (;;) { if (!(rnp->qsmask & mask)) { @@ -762,8 +748,10 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, break; } spin_unlock_irqrestore(&rnp->lock, flags); + rnp_c = rnp; rnp = rnp->parent; spin_lock_irqsave(&rnp->lock, flags); + WARN_ON_ONCE(rnp_c->qsmask); } /* @@ -776,10 +764,10 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, /* * Record a quiescent state for the specified CPU, which must either be - * the current CPU or an offline CPU. The lastcomp argument is used to - * make sure we are still in the grace period of interest. We don't want - * to end the current grace period based on quiescent states detected in - * an earlier grace period! + * the current CPU. The lastcomp argument is used to make sure we are + * still in the grace period of interest. We don't want to end the current + * grace period based on quiescent states detected in an earlier grace + * period! */ static void cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) @@ -814,7 +802,6 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) * This GP can't end until cpu checks in, so all of our * callbacks can be processed during the next GP. */ - rdp = rsp->rda[smp_processor_id()]; rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */ @@ -872,7 +859,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) spin_lock_irqsave(&rsp->onofflock, flags); /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ - rnp = rdp->mynode; + rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ mask = rdp->grpmask; /* rnp->grplo is constant. */ do { spin_lock(&rnp->lock); /* irqs already disabled. */ @@ -881,7 +868,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) spin_unlock(&rnp->lock); /* irqs remain disabled. */ break; } - rcu_preempt_offline_tasks(rsp, rnp); + rcu_preempt_offline_tasks(rsp, rnp, rdp); mask = rnp->grpmask; spin_unlock(&rnp->lock); /* irqs remain disabled. */ rnp = rnp->parent; @@ -890,9 +877,6 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ - /* Being offline is a quiescent state, so go record it. */ - cpu_quiet(cpu, rsp, rdp, lastcomp); - /* * Move callbacks from the outgoing CPU to the running CPU. * Note that the outgoing CPU is now quiscent, so it is now @@ -1457,20 +1441,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) rnp = rnp->parent; } while (rnp != NULL && !(rnp->qsmaskinit & mask)); - spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ - - /* - * A new grace period might start here. If so, we will be part of - * it, and its gpnum will be greater than ours, so we will - * participate. It is also possible for the gpnum to have been - * incremented before this function was called, and the bitmasks - * to not be filled out until now, in which case we will also - * participate due to our gpnum being behind. - */ - - /* Since it is coming online, the CPU is in a quiescent state. */ - cpu_quiet(cpu, rsp, rdp, lastcomp); - local_irq_restore(flags); + spin_unlock_irqrestore(&rsp->onofflock, flags); } static void __cpuinit rcu_online_cpu(int cpu) diff --git a/kernel/rcutree.h b/kernel/rcutree.h index bf8a6f9..8e8287a 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -142,7 +142,7 @@ struct rcu_data { */ struct rcu_head *nxtlist; struct rcu_head **nxttail[RCU_NEXT_SIZE]; - long qlen; /* # of queued callbacks */ + long qlen; /* # of queued callbacks */ long blimit; /* Upper limit on a processed batch */ #ifdef CONFIG_NO_HZ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 4778936..1cee04f 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -64,22 +64,31 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); * not in a quiescent state. There might be any number of tasks blocked * while in an RCU read-side critical section. */ -static void rcu_preempt_qs_record(int cpu) +static void rcu_preempt_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - rdp->passed_quiesc = 1; rdp->passed_quiesc_completed = rdp->completed; + barrier(); + rdp->passed_quiesc = 1; } /* - * We have entered the scheduler or are between softirqs in ksoftirqd. - * If we are in an RCU read-side critical section, we need to reflect - * that in the state of the rcu_node structure corresponding to this CPU. - * Caller must disable hardirqs. + * We have entered the scheduler, and the current task might soon be + * context-switched away from. If this task is in an RCU read-side + * critical section, we will no longer be able to rely on the CPU to + * record that fact, so we enqueue the task on the appropriate entry + * of the blocked_tasks[] array. The task will dequeue itself when + * it exits the outermost enclosing RCU read-side critical section. + * Therefore, the current grace period cannot be permitted to complete + * until the blocked_tasks[] entry indexed by the low-order bit of + * rnp->gpnum empties. + * + * Caller must disable preemption. */ -static void rcu_preempt_qs(int cpu) +static void rcu_preempt_note_context_switch(int cpu) { struct task_struct *t = current; + unsigned long flags; int phase; struct rcu_data *rdp; struct rcu_node *rnp; @@ -90,7 +99,7 @@ static void rcu_preempt_qs(int cpu) /* Possibly blocking in an RCU read-side critical section. */ rdp = rcu_preempt_state.rda[cpu]; rnp = rdp->mynode; - spin_lock(&rnp->lock); + spin_lock_irqsave(&rnp->lock, flags); t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; t->rcu_blocked_node = rnp; @@ -103,11 +112,15 @@ static void rcu_preempt_qs(int cpu) * state for the current grace period), then as long * as that task remains queued, the current grace period * cannot end. + * + * But first, note that the current CPU must still be + * on line! */ - phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1); + WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); + WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); + phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); - smp_mb(); /* Ensure later ctxt swtch seen after above. */ - spin_unlock(&rnp->lock); + spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -119,9 +132,10 @@ static void rcu_preempt_qs(int cpu) * grace period, then the fact that the task has been enqueued * means that we continue to block the current grace period. */ - rcu_preempt_qs_record(cpu); - t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS | - RCU_READ_UNLOCK_GOT_QS); + rcu_preempt_qs(cpu); + local_irq_save(flags); + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + local_irq_restore(flags); } /* @@ -157,7 +171,7 @@ static void rcu_read_unlock_special(struct task_struct *t) special = t->rcu_read_unlock_special; if (special & RCU_READ_UNLOCK_NEED_QS) { t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS; + rcu_preempt_qs(smp_processor_id()); } /* Hardware IRQ handlers cannot block. */ @@ -177,10 +191,10 @@ static void rcu_read_unlock_special(struct task_struct *t) */ for (;;) { rnp = t->rcu_blocked_node; - spin_lock(&rnp->lock); + spin_lock(&rnp->lock); /* irqs already disabled. */ if (rnp == t->rcu_blocked_node) break; - spin_unlock(&rnp->lock); + spin_unlock(&rnp->lock); /* irqs remain disabled. */ } empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); list_del_init(&t->rcu_node_entry); @@ -194,9 +208,8 @@ static void rcu_read_unlock_special(struct task_struct *t) */ if (!empty && rnp->qsmask == 0 && list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) { - t->rcu_read_unlock_special &= - ~(RCU_READ_UNLOCK_NEED_QS | - RCU_READ_UNLOCK_GOT_QS); + struct rcu_node *rnp_p; + if (rnp->parent == NULL) { /* Only one rcu_node in the tree. */ cpu_quiet_msk_finish(&rcu_preempt_state, flags); @@ -205,9 +218,10 @@ static void rcu_read_unlock_special(struct task_struct *t) /* Report up the rest of the hierarchy. */ mask = rnp->grpmask; spin_unlock_irqrestore(&rnp->lock, flags); - rnp = rnp->parent; - spin_lock_irqsave(&rnp->lock, flags); - cpu_quiet_msk(mask, &rcu_preempt_state, rnp, flags); + rnp_p = rnp->parent; + spin_lock_irqsave(&rnp_p->lock, flags); + WARN_ON_ONCE(rnp->qsmask); + cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags); return; } spin_unlock(&rnp->lock); @@ -259,6 +273,19 @@ static void rcu_print_task_stall(struct rcu_node *rnp) #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* + * Check that the list of blocked tasks for the newly completed grace + * period is in fact empty. It is a serious bug to complete a grace + * period that still has RCU readers blocked! This function must be + * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock + * must be held by the caller. + */ +static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) +{ + WARN_ON_ONCE(!list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])); + WARN_ON_ONCE(rnp->qsmask); +} + +/* * Check for preempted RCU readers for the specified rcu_node structure. * If the caller needs a reliable answer, it must hold the rcu_node's * >lock. @@ -280,7 +307,8 @@ static int rcu_preempted_readers(struct rcu_node *rnp) * The caller must hold rnp->lock with irqs disabled. */ static void rcu_preempt_offline_tasks(struct rcu_state *rsp, - struct rcu_node *rnp) + struct rcu_node *rnp, + struct rcu_data *rdp) { int i; struct list_head *lp; @@ -292,6 +320,9 @@ static void rcu_preempt_offline_tasks(struct rcu_state *rsp, WARN_ONCE(1, "Last CPU thought to be offlined?"); return; /* Shouldn't happen: at least one CPU online. */ } + WARN_ON_ONCE(rnp != rdp->mynode && + (!list_empty(&rnp->blocked_tasks[0]) || + !list_empty(&rnp->blocked_tasks[1]))); /* * Move tasks up to root rcu_node. Rely on the fact that the @@ -335,20 +366,12 @@ static void rcu_preempt_check_callbacks(int cpu) struct task_struct *t = current; if (t->rcu_read_lock_nesting == 0) { - t->rcu_read_unlock_special &= - ~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS); - rcu_preempt_qs_record(cpu); + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + rcu_preempt_qs(cpu); return; } - if (per_cpu(rcu_preempt_data, cpu).qs_pending) { - if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) { - rcu_preempt_qs_record(cpu); - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS; - } else if (!(t->rcu_read_unlock_special & - RCU_READ_UNLOCK_NEED_QS)) { - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; - } - } + if (per_cpu(rcu_preempt_data, cpu).qs_pending) + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; } /* @@ -434,7 +457,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); * Because preemptable RCU does not exist, we never have to check for * CPUs being in quiescent states. */ -static void rcu_preempt_qs(int cpu) +static void rcu_preempt_note_context_switch(int cpu) { } @@ -451,6 +474,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp) #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* + * Because there is no preemptable RCU, there can be no readers blocked, + * so there is no need to check for blocked tasks. So check only for + * bogus qsmask values. + */ +static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) +{ + WARN_ON_ONCE(rnp->qsmask); +} + +/* * Because preemptable RCU does not exist, there are never any preempted * RCU readers. */ @@ -466,7 +499,8 @@ static int rcu_preempted_readers(struct rcu_node *rnp) * tasks that were blocked within RCU read-side critical sections. */ static void rcu_preempt_offline_tasks(struct rcu_state *rsp, - struct rcu_node *rnp) + struct rcu_node *rnp, + struct rcu_data *rdp) { } diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 0ea1bff..c89f5e9 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -20,7 +20,7 @@ * Papers: http://www.rdrop.com/users/paulmck/RCU * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU + * Documentation/RCU * */ #include <linux/types.h> diff --git a/kernel/sched.c b/kernel/sched.c index faf4d46..0ac9053 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -39,7 +39,7 @@ #include <linux/completion.h> #include <linux/kernel_stat.h> #include <linux/debug_locks.h> -#include <linux/perf_counter.h> +#include <linux/perf_event.h> #include <linux/security.h> #include <linux/notifier.h> #include <linux/profile.h> @@ -681,15 +681,9 @@ inline void update_rq_clock(struct rq *rq) * This interface allows printk to be called with the runqueue lock * held and know whether or not it is OK to wake up the klogd. */ -int runqueue_is_locked(void) +int runqueue_is_locked(int cpu) { - int cpu = get_cpu(); - struct rq *rq = cpu_rq(cpu); - int ret; - - ret = spin_is_locked(&rq->lock); - put_cpu(); - return ret; + return spin_is_locked(&cpu_rq(cpu)->lock); } /* @@ -2059,7 +2053,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); #endif - perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS, + perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); } p->se.vruntime -= old_cfsrq->min_vruntime - @@ -2724,7 +2718,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ prev_state = prev->state; finish_arch_switch(prev); - perf_counter_task_sched_in(current, cpu_of(rq)); + perf_event_task_sched_in(current, cpu_of(rq)); finish_lock_switch(rq, prev); fire_sched_in_preempt_notifiers(current); @@ -2910,6 +2904,19 @@ unsigned long nr_iowait(void) return sum; } +unsigned long nr_iowait_cpu(void) +{ + struct rq *this = this_rq(); + return atomic_read(&this->nr_iowait); +} + +unsigned long this_cpu_load(void) +{ + struct rq *this = this_rq(); + return this->cpu_load[0]; +} + + /* Variables and functions for calc_load */ static atomic_long_t calc_load_tasks; static unsigned long calc_load_update; @@ -5199,7 +5206,7 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); spin_unlock(&rq->lock); - perf_counter_task_tick(curr, cpu); + perf_event_task_tick(curr, cpu); #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); @@ -5415,7 +5422,7 @@ need_resched_nonpreemptible: if (likely(prev != next)) { sched_info_switch(prev, next); - perf_counter_task_sched_out(prev, next, cpu); + perf_event_task_sched_out(prev, next, cpu); rq->nr_switches++; rq->curr = next; @@ -6825,23 +6832,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, if (retval) goto out_unlock; - /* - * Time slice is 0 for SCHED_FIFO tasks and for SCHED_OTHER - * tasks that are on an otherwise idle runqueue: - */ - time_slice = 0; - if (p->policy == SCHED_RR) { - time_slice = DEF_TIMESLICE; - } else if (p->policy != SCHED_FIFO) { - struct sched_entity *se = &p->se; - unsigned long flags; - struct rq *rq; + time_slice = p->sched_class->get_rr_interval(p); - rq = task_rq_lock(p, &flags); - if (rq->cfs.load.weight) - time_slice = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); - task_rq_unlock(rq, &flags); - } read_unlock(&tasklist_lock); jiffies_to_timespec(time_slice, &t); retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; @@ -7692,7 +7684,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) /* * Register at high priority so that task migration (migrate_all_tasks) * happens before everything else. This has to be lower priority than - * the notifier in the perf_counter subsystem, though. + * the notifier in the perf_event subsystem, though. */ static struct notifier_block __cpuinitdata migration_notifier = { .notifier_call = migration_call, @@ -9171,6 +9163,7 @@ void __init sched_init_smp(void) cpumask_var_t non_isolated_cpus; alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); + alloc_cpumask_var(&fallback_doms, GFP_KERNEL); #if defined(CONFIG_NUMA) sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), @@ -9202,7 +9195,6 @@ void __init sched_init_smp(void) sched_init_granularity(); free_cpumask_var(non_isolated_cpus); - alloc_cpumask_var(&fallback_doms, GFP_KERNEL); init_sched_rt_class(); } #else @@ -9549,7 +9541,7 @@ void __init sched_init(void) alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); #endif /* SMP */ - perf_counter_init(); + perf_event_init(); scheduler_running = 1; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 990b188..ecc637a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -710,31 +710,28 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) if (initial && sched_feat(START_DEBIT)) vruntime += sched_vslice(cfs_rq, se); - if (!initial) { - /* sleeps upto a single latency don't count. */ - if (sched_feat(FAIR_SLEEPERS)) { - unsigned long thresh = sysctl_sched_latency; + /* sleeps up to a single latency don't count. */ + if (!initial && sched_feat(FAIR_SLEEPERS)) { + unsigned long thresh = sysctl_sched_latency; - /* - * Convert the sleeper threshold into virtual time. - * SCHED_IDLE is a special sub-class. We care about - * fairness only relative to other SCHED_IDLE tasks, - * all of which have the same weight. - */ - if (sched_feat(NORMALIZED_SLEEPER) && - (!entity_is_task(se) || - task_of(se)->policy != SCHED_IDLE)) - thresh = calc_delta_fair(thresh, se); + /* + * Convert the sleeper threshold into virtual time. + * SCHED_IDLE is a special sub-class. We care about + * fairness only relative to other SCHED_IDLE tasks, + * all of which have the same weight. + */ + if (sched_feat(NORMALIZED_SLEEPER) && (!entity_is_task(se) || + task_of(se)->policy != SCHED_IDLE)) + thresh = calc_delta_fair(thresh, se); - /* - * Halve their sleep time's effect, to allow - * for a gentler effect of sleepers: - */ - if (sched_feat(GENTLE_FAIR_SLEEPERS)) - thresh >>= 1; + /* + * Halve their sleep time's effect, to allow + * for a gentler effect of sleepers: + */ + if (sched_feat(GENTLE_FAIR_SLEEPERS)) + thresh >>= 1; - vruntime -= thresh; - } + vruntime -= thresh; } /* ensure we never gain time by being placed backwards. */ @@ -1343,7 +1340,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag int sync = wake_flags & WF_SYNC; if (sd_flag & SD_BALANCE_WAKE) { - if (sched_feat(AFFINE_WAKEUPS)) + if (sched_feat(AFFINE_WAKEUPS) && + cpumask_test_cpu(cpu, &p->cpus_allowed)) want_affine = 1; new_cpu = prev_cpu; } @@ -1941,6 +1939,25 @@ static void moved_group_fair(struct task_struct *p) } #endif +unsigned int get_rr_interval_fair(struct task_struct *task) +{ + struct sched_entity *se = &task->se; + unsigned long flags; + struct rq *rq; + unsigned int rr_interval = 0; + + /* + * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise + * idle runqueue: + */ + rq = task_rq_lock(task, &flags); + if (rq->cfs.load.weight) + rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); + task_rq_unlock(rq, &flags); + + return rr_interval; +} + /* * All the scheduling class methods: */ @@ -1969,6 +1986,8 @@ static const struct sched_class fair_sched_class = { .prio_changed = prio_changed_fair, .switched_to = switched_to_fair, + .get_rr_interval = get_rr_interval_fair, + #ifdef CONFIG_FAIR_GROUP_SCHED .moved_group = moved_group_fair, #endif diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index a8b448a..b133a28 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -97,6 +97,11 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p, check_preempt_curr(rq, p, 0); } +unsigned int get_rr_interval_idle(struct task_struct *task) +{ + return 0; +} + /* * Simple, special scheduling class for the per-CPU idle tasks: */ @@ -122,6 +127,8 @@ static const struct sched_class idle_sched_class = { .set_curr_task = set_curr_task_idle, .task_tick = task_tick_idle, + .get_rr_interval = get_rr_interval_idle, + .prio_changed = prio_changed_idle, .switched_to = switched_to_idle, diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 13de712..a4d790c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1734,6 +1734,17 @@ static void set_curr_task_rt(struct rq *rq) dequeue_pushable_task(rq, p); } +unsigned int get_rr_interval_rt(struct task_struct *task) +{ + /* + * Time slice is 0 for SCHED_FIFO tasks + */ + if (task->policy == SCHED_RR) + return DEF_TIMESLICE; + else + return 0; +} + static const struct sched_class rt_sched_class = { .next = &fair_sched_class, .enqueue_task = enqueue_task_rt, @@ -1762,6 +1773,8 @@ static const struct sched_class rt_sched_class = { .set_curr_task = set_curr_task_rt, .task_tick = task_tick_rt, + .get_rr_interval = get_rr_interval_rt, + .prio_changed = prio_changed_rt, .switched_to = switched_to_rt, }; diff --git a/kernel/sys.c b/kernel/sys.c index b3f1097..ea5c3bc 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -14,7 +14,7 @@ #include <linux/prctl.h> #include <linux/highuid.h> #include <linux/fs.h> -#include <linux/perf_counter.h> +#include <linux/perf_event.h> #include <linux/resource.h> #include <linux/kernel.h> #include <linux/kexec.h> @@ -1511,11 +1511,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_TSC: error = SET_TSC_CTL(arg2); break; - case PR_TASK_PERF_COUNTERS_DISABLE: - error = perf_counter_task_disable(); + case PR_TASK_PERF_EVENTS_DISABLE: + error = perf_event_task_disable(); break; - case PR_TASK_PERF_COUNTERS_ENABLE: - error = perf_counter_task_enable(); + case PR_TASK_PERF_EVENTS_ENABLE: + error = perf_event_task_enable(); break; case PR_GET_TIMERSLACK: error = current->timer_slack_ns; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 68320f6..515bc23 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -177,4 +177,4 @@ cond_syscall(sys_eventfd); cond_syscall(sys_eventfd2); /* performance counters: */ -cond_syscall(sys_perf_counter_open); +cond_syscall(sys_perf_event_open); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1a631ba..6ba49c7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -50,7 +50,7 @@ #include <linux/reboot.h> #include <linux/ftrace.h> #include <linux/slow-work.h> -#include <linux/perf_counter.h> +#include <linux/perf_event.h> #include <asm/uaccess.h> #include <asm/processor.h> @@ -964,28 +964,28 @@ static struct ctl_table kern_table[] = { .child = slow_work_sysctls, }, #endif -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS { .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_paranoid", - .data = &sysctl_perf_counter_paranoid, - .maxlen = sizeof(sysctl_perf_counter_paranoid), + .procname = "perf_event_paranoid", + .data = &sysctl_perf_event_paranoid, + .maxlen = sizeof(sysctl_perf_event_paranoid), .mode = 0644, .proc_handler = &proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_mlock_kb", - .data = &sysctl_perf_counter_mlock, - .maxlen = sizeof(sysctl_perf_counter_mlock), + .procname = "perf_event_mlock_kb", + .data = &sysctl_perf_event_mlock, + .maxlen = sizeof(sysctl_perf_event_mlock), .mode = 0644, .proc_handler = &proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_max_sample_rate", - .data = &sysctl_perf_counter_sample_rate, - .maxlen = sizeof(sysctl_perf_counter_sample_rate), + .procname = "perf_event_max_sample_rate", + .data = &sysctl_perf_event_sample_rate, + .maxlen = sizeof(sysctl_perf_event_sample_rate), .mode = 0644, .proc_handler = &proc_dointvec, }, diff --git a/kernel/timer.c b/kernel/timer.c index bbb5107..811e5c3 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -37,7 +37,7 @@ #include <linux/delay.h> #include <linux/tick.h> #include <linux/kallsyms.h> -#include <linux/perf_counter.h> +#include <linux/perf_event.h> #include <linux/sched.h> #include <asm/uaccess.h> @@ -1187,7 +1187,7 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = __get_cpu_var(tvec_bases); - perf_counter_do_pending(); + perf_event_do_pending(); hrtimer_run_pending(); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cc615f8..c71e91b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2414,11 +2414,9 @@ unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; static void * __g_next(struct seq_file *m, loff_t *pos) { - unsigned long *array = m->private; - if (*pos >= ftrace_graph_count) return NULL; - return &array[*pos]; + return &ftrace_graph_funcs[*pos]; } static void * @@ -2482,16 +2480,10 @@ ftrace_graph_open(struct inode *inode, struct file *file) ftrace_graph_count = 0; memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); } + mutex_unlock(&graph_lock); - if (file->f_mode & FMODE_READ) { + if (file->f_mode & FMODE_READ) ret = seq_open(file, &ftrace_graph_seq_ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = ftrace_graph_funcs; - } - } else - file->private_data = ftrace_graph_funcs; - mutex_unlock(&graph_lock); return ret; } @@ -2560,7 +2552,6 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_parser parser; - unsigned long *array; size_t read = 0; ssize_t ret; @@ -2574,12 +2565,6 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, goto out; } - if (file->f_mode & FMODE_READ) { - struct seq_file *m = file->private_data; - array = m->private; - } else - array = file->private_data; - if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { ret = -ENOMEM; goto out; @@ -2591,7 +2576,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, parser.buffer[parser.idx] = 0; /* we allow only one expression at a time */ - ret = ftrace_set_func(array, &ftrace_graph_count, + ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, parser.buffer); if (ret) goto out; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fd52a19..a35925d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -125,13 +125,13 @@ int ftrace_dump_on_oops; static int tracing_set_tracer(const char *buf); -#define BOOTUP_TRACER_SIZE 100 -static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata; +#define MAX_TRACER_SIZE 100 +static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; static char *default_bootup_tracer; static int __init set_ftrace(char *str) { - strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE); + strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); default_bootup_tracer = bootup_tracer_buf; /* We are using ftrace early, expand it */ ring_buffer_expanded = 1; @@ -242,13 +242,6 @@ static struct tracer *trace_types __read_mostly; static struct tracer *current_trace __read_mostly; /* - * max_tracer_type_len is used to simplify the allocating of - * buffers to read userspace tracer names. We keep track of - * the longest tracer name registered. - */ -static int max_tracer_type_len; - -/* * trace_types_lock is used to protect the trace_types list. * This lock is also used to keep user access serialized. * Accesses from userspace will grab this lock while userspace @@ -275,12 +268,18 @@ static DEFINE_SPINLOCK(tracing_start_lock); */ void trace_wake_up(void) { + int cpu; + + if (trace_flags & TRACE_ITER_BLOCK) + return; /* * The runqueue_is_locked() can fail, but this is the best we * have for now: */ - if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked()) + cpu = get_cpu(); + if (!runqueue_is_locked(cpu)) wake_up(&trace_wait); + put_cpu(); } static int __init set_buf_size(char *str) @@ -619,7 +618,6 @@ __releases(kernel_lock) __acquires(kernel_lock) { struct tracer *t; - int len; int ret = 0; if (!type->name) { @@ -627,6 +625,11 @@ __acquires(kernel_lock) return -1; } + if (strlen(type->name) > MAX_TRACER_SIZE) { + pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); + return -1; + } + /* * When this gets called we hold the BKL which means that * preemption is disabled. Various trace selftests however @@ -641,7 +644,7 @@ __acquires(kernel_lock) for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */ - pr_info("Trace %s already registered\n", + pr_info("Tracer %s already registered\n", type->name); ret = -1; goto out; @@ -692,9 +695,6 @@ __acquires(kernel_lock) type->next = trace_types; trace_types = type; - len = strlen(type->name); - if (len > max_tracer_type_len) - max_tracer_type_len = len; out: tracing_selftest_running = false; @@ -703,7 +703,7 @@ __acquires(kernel_lock) if (ret || !default_bootup_tracer) goto out_unlock; - if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE)) + if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) goto out_unlock; printk(KERN_INFO "Starting tracer '%s'\n", type->name); @@ -725,14 +725,13 @@ __acquires(kernel_lock) void unregister_tracer(struct tracer *type) { struct tracer **t; - int len; mutex_lock(&trace_types_lock); for (t = &trace_types; *t; t = &(*t)->next) { if (*t == type) goto found; } - pr_info("Trace %s not registered\n", type->name); + pr_info("Tracer %s not registered\n", type->name); goto out; found: @@ -745,17 +744,7 @@ void unregister_tracer(struct tracer *type) current_trace->stop(&global_trace); current_trace = &nop_trace; } - - if (strlen(type->name) != max_tracer_type_len) - goto out; - - max_tracer_type_len = 0; - for (t = &trace_types; *t; t = &(*t)->next) { - len = strlen((*t)->name); - if (len > max_tracer_type_len) - max_tracer_type_len = len; - } - out: +out: mutex_unlock(&trace_types_lock); } @@ -2604,7 +2593,7 @@ static ssize_t tracing_set_trace_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - char buf[max_tracer_type_len+2]; + char buf[MAX_TRACER_SIZE+2]; int r; mutex_lock(&trace_types_lock); @@ -2754,15 +2743,15 @@ static ssize_t tracing_set_trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - char buf[max_tracer_type_len+1]; + char buf[MAX_TRACER_SIZE+1]; int i; size_t ret; int err; ret = cnt; - if (cnt > max_tracer_type_len) - cnt = max_tracer_type_len; + if (cnt > MAX_TRACER_SIZE) + cnt = MAX_TRACER_SIZE; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 55a25c9..dd44b87 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -8,6 +8,57 @@ #include <linux/module.h> #include "trace.h" +/* + * We can't use a size but a type in alloc_percpu() + * So let's create a dummy type that matches the desired size + */ +typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t; + +char *trace_profile_buf; +EXPORT_SYMBOL_GPL(trace_profile_buf); + +char *trace_profile_buf_nmi; +EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); + +/* Count the events in use (per event id, not per instance) */ +static int total_profile_count; + +static int ftrace_profile_enable_event(struct ftrace_event_call *event) +{ + char *buf; + int ret = -ENOMEM; + + if (atomic_inc_return(&event->profile_count)) + return 0; + + if (!total_profile_count++) { + buf = (char *)alloc_percpu(profile_buf_t); + if (!buf) + goto fail_buf; + + rcu_assign_pointer(trace_profile_buf, buf); + + buf = (char *)alloc_percpu(profile_buf_t); + if (!buf) + goto fail_buf_nmi; + + rcu_assign_pointer(trace_profile_buf_nmi, buf); + } + + ret = event->profile_enable(); + if (!ret) + return 0; + + kfree(trace_profile_buf_nmi); +fail_buf_nmi: + kfree(trace_profile_buf); +fail_buf: + total_profile_count--; + atomic_dec(&event->profile_count); + + return ret; +} + int ftrace_profile_enable(int event_id) { struct ftrace_event_call *event; @@ -17,7 +68,7 @@ int ftrace_profile_enable(int event_id) list_for_each_entry(event, &ftrace_events, list) { if (event->id == event_id && event->profile_enable && try_module_get(event->mod)) { - ret = event->profile_enable(event); + ret = ftrace_profile_enable_event(event); break; } } @@ -26,6 +77,33 @@ int ftrace_profile_enable(int event_id) return ret; } +static void ftrace_profile_disable_event(struct ftrace_event_call *event) +{ + char *buf, *nmi_buf; + + if (!atomic_add_negative(-1, &event->profile_count)) + return; + + event->profile_disable(); + + if (!--total_profile_count) { + buf = trace_profile_buf; + rcu_assign_pointer(trace_profile_buf, NULL); + + nmi_buf = trace_profile_buf_nmi; + rcu_assign_pointer(trace_profile_buf_nmi, NULL); + + /* + * Ensure every events in profiling have finished before + * releasing the buffers + */ + synchronize_sched(); + + free_percpu(buf); + free_percpu(nmi_buf); + } +} + void ftrace_profile_disable(int event_id) { struct ftrace_event_call *event; @@ -33,7 +111,7 @@ void ftrace_profile_disable(int event_id) mutex_lock(&event_mutex); list_for_each_entry(event, &ftrace_events, list) { if (event->id == event_id) { - event->profile_disable(event); + ftrace_profile_disable_event(event); module_put(event->mod); break; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 56c260b..6f03c8a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -271,42 +271,32 @@ ftrace_event_write(struct file *file, const char __user *ubuf, static void * t_next(struct seq_file *m, void *v, loff_t *pos) { - struct list_head *list = m->private; - struct ftrace_event_call *call; + struct ftrace_event_call *call = v; (*pos)++; - for (;;) { - if (list == &ftrace_events) - return NULL; - - call = list_entry(list, struct ftrace_event_call, list); - + list_for_each_entry_continue(call, &ftrace_events, list) { /* * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. */ if (call->regfunc) - break; - - list = list->next; + return call; } - m->private = list->next; - - return call; + return NULL; } static void *t_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_call *call = NULL; + struct ftrace_event_call *call; loff_t l; mutex_lock(&event_mutex); - m->private = ftrace_events.next; + call = list_entry(&ftrace_events, struct ftrace_event_call, list); for (l = 0; l <= *pos; ) { - call = t_next(m, NULL, &l); + call = t_next(m, call, &l); if (!call) break; } @@ -316,37 +306,28 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void * s_next(struct seq_file *m, void *v, loff_t *pos) { - struct list_head *list = m->private; - struct ftrace_event_call *call; + struct ftrace_event_call *call = v; (*pos)++; - retry: - if (list == &ftrace_events) - return NULL; - - call = list_entry(list, struct ftrace_event_call, list); - - if (!call->enabled) { - list = list->next; - goto retry; + list_for_each_entry_continue(call, &ftrace_events, list) { + if (call->enabled) + return call; } - m->private = list->next; - - return call; + return NULL; } static void *s_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_call *call = NULL; + struct ftrace_event_call *call; loff_t l; mutex_lock(&event_mutex); - m->private = ftrace_events.next; + call = list_entry(&ftrace_events, struct ftrace_event_call, list); for (l = 0; l <= *pos; ) { - call = s_next(m, NULL, &l); + call = s_next(m, call, &l); if (!call) break; } diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 687699d..2547d88 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -11,7 +11,6 @@ #include <linux/ftrace.h> #include <linux/string.h> #include <linux/module.h> -#include <linux/marker.h> #include <linux/mutex.h> #include <linux/ctype.h> #include <linux/list.h> diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8712ce3..9fbce6c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -2,7 +2,7 @@ #include <trace/events/syscalls.h> #include <linux/kernel.h> #include <linux/ftrace.h> -#include <linux/perf_counter.h> +#include <linux/perf_event.h> #include <asm/syscall.h> #include "trace_output.h" @@ -384,10 +384,13 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { - struct syscall_trace_enter *rec; struct syscall_metadata *sys_data; + struct syscall_trace_enter *rec; + unsigned long flags; + char *raw_data; int syscall_nr; int size; + int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) @@ -402,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) size = ALIGN(size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - do { - char raw_data[size]; + if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + "profile buffer not large enough")) + return; + + /* Protect the per cpu buffer, begin the rcu read side */ + local_irq_save(flags); - /* zero the dead bytes from align to not leak stack to user */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + cpu = smp_processor_id(); + + if (in_nmi()) + raw_data = rcu_dereference(trace_profile_buf_nmi); + else + raw_data = rcu_dereference(trace_profile_buf); + + if (!raw_data) + goto end; - rec = (struct syscall_trace_enter *) raw_data; - tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->enter_id; - rec->nr = syscall_nr; - syscall_get_arguments(current, regs, 0, sys_data->nb_args, - (unsigned long *)&rec->args); - perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); - } while(0); + raw_data = per_cpu_ptr(raw_data, cpu); + + /* zero the dead bytes from align to not leak stack to user */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + + rec = (struct syscall_trace_enter *) raw_data; + tracing_generic_entry_update(&rec->ent, 0, 0); + rec->ent.type = sys_data->enter_id; + rec->nr = syscall_nr; + syscall_get_arguments(current, regs, 0, sys_data->nb_args, + (unsigned long *)&rec->args); + perf_tp_event(sys_data->enter_id, 0, 1, rec, size); + +end: + local_irq_restore(flags); } int reg_prof_syscall_enter(char *name) @@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name) static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; - struct syscall_trace_exit rec; + struct syscall_trace_exit *rec; + unsigned long flags; int syscall_nr; + char *raw_data; + int size; + int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) @@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) if (!sys_data) return; - tracing_generic_entry_update(&rec.ent, 0, 0); - rec.ent.type = sys_data->exit_id; - rec.nr = syscall_nr; - rec.ret = syscall_get_return_value(current, regs); + /* We can probably do that at build time */ + size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); + size -= sizeof(u32); - perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); + /* + * Impossible, but be paranoid with the future + * How to put this check outside runtime? + */ + if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + "exit event has grown above profile buffer size")) + return; + + /* Protect the per cpu buffer, begin the rcu read side */ + local_irq_save(flags); + cpu = smp_processor_id(); + + if (in_nmi()) + raw_data = rcu_dereference(trace_profile_buf_nmi); + else + raw_data = rcu_dereference(trace_profile_buf); + + if (!raw_data) + goto end; + + raw_data = per_cpu_ptr(raw_data, cpu); + + /* zero the dead bytes from align to not leak stack to user */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + + rec = (struct syscall_trace_exit *)raw_data; + + tracing_generic_entry_update(&rec->ent, 0, 0); + rec->ent.type = sys_data->exit_id; + rec->nr = syscall_nr; + rec->ret = syscall_get_return_value(current, regs); + + perf_tp_event(sys_data->exit_id, 0, 1, rec, size); + +end: + local_irq_restore(flags); } int reg_prof_syscall_exit(char *name) |