summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-07-23 11:48:17 +0200
committerIngo Molnar <mingo@kernel.org>2013-07-23 11:48:17 +0200
commitb59f2b4d30c187160df597bae41cabe497b6acf4 (patch)
treee29ed9e3cbaf777cc1a10c99c4542985002d2c88 /kernel
parent1e40c2edef2537f87f94d0baf80aeaeb7d51cc23 (diff)
parent3b2f64d00c46e1e4e9bd0bb9bb12619adac27a4b (diff)
downloadop-kernel-dev-b59f2b4d30c187160df597bae41cabe497b6acf4.zip
op-kernel-dev-b59f2b4d30c187160df597bae41cabe497b6acf4.tar.gz
Merge tag 'v3.11-rc2' into core/locking
Merge in Linux 3.11-rc2 before moving on with new work. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c11
-rw-r--r--kernel/cpu.c6
-rw-r--r--kernel/events/core.c38
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/hrtimer.c6
-rw-r--r--kernel/irq/generic-chip.c2
-rw-r--r--kernel/irq/irqdomain.c12
-rw-r--r--kernel/module.c77
-rw-r--r--kernel/panic.c3
-rw-r--r--kernel/params.c2
-rw-r--r--kernel/power/autosleep.c3
-rw-r--r--kernel/printk.c4
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/rcutorture.c6
-rw-r--r--kernel/rcutree.c6
-rw-r--r--kernel/rcutree.h4
-rw-r--r--kernel/rcutree_plugin.h6
-rw-r--r--kernel/relay.c2
-rw-r--r--kernel/sched/core.c32
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/smpboot.c2
-rw-r--r--kernel/softirq.c8
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/sysctl_binary.c1
-rw-r--r--kernel/time/tick-broadcast.c5
-rw-r--r--kernel/time/tick-sched.c17
-rw-r--r--kernel/timer.c10
-rw-r--r--kernel/trace/ftrace.c38
-rw-r--r--kernel/trace/trace.c338
-rw-r--r--kernel/trace/trace.h17
-rw-r--r--kernel/trace/trace_events.c166
-rw-r--r--kernel/trace/trace_events_filter.c6
-rw-r--r--kernel/trace/trace_functions.c103
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_kprobe.c190
-rw-r--r--kernel/trace/trace_selftest.c18
-rw-r--r--kernel/trace/trace_syscalls.c21
-rw-r--r--kernel/trace/trace_uprobe.c4
-rw-r--r--kernel/watchdog.c113
-rw-r--r--kernel/workqueue.c4
41 files changed, 856 insertions, 452 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e5583d1..0e0b20b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -802,7 +802,6 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
*/
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
-static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
unsigned long subsys_mask);
@@ -2642,7 +2641,7 @@ static const struct inode_operations cgroup_file_inode_operations = {
};
static const struct inode_operations cgroup_dir_inode_operations = {
- .lookup = cgroup_lookup,
+ .lookup = simple_lookup,
.mkdir = cgroup_mkdir,
.rmdir = cgroup_rmdir,
.rename = cgroup_rename,
@@ -2652,14 +2651,6 @@ static const struct inode_operations cgroup_dir_inode_operations = {
.removexattr = cgroup_removexattr,
};
-static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
-{
- if (dentry->d_name.len > NAME_MAX)
- return ERR_PTR(-ENAMETOOLONG);
- d_add(dentry, NULL);
- return NULL;
-}
-
/*
* Check if a file is a control file
*/
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 198a388..b2b227b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -366,7 +366,7 @@ EXPORT_SYMBOL(cpu_down);
#endif /*CONFIG_HOTPLUG_CPU*/
/* Requires cpu_add_remove_lock to be held */
-static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
+static int _cpu_up(unsigned int cpu, int tasks_frozen)
{
int ret, nr_calls = 0;
void *hcpu = (void *)(long)cpu;
@@ -419,7 +419,7 @@ out:
return ret;
}
-int __cpuinit cpu_up(unsigned int cpu)
+int cpu_up(unsigned int cpu)
{
int err = 0;
@@ -618,7 +618,7 @@ core_initcall(cpu_hotplug_pm_sync_init);
* It must be called by the arch code on the new cpu, before the new cpu
* enables interrupts and before the "boot" cpu returns from __cpu_up().
*/
-void __cpuinit notify_cpu_starting(unsigned int cpu)
+void notify_cpu_starting(unsigned int cpu)
{
unsigned long val = CPU_STARTING;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1db3af9..f86599e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -182,7 +182,7 @@ void update_perf_cpu_limits(void)
u64 tmp = perf_sample_period_ns;
tmp *= sysctl_perf_cpu_time_max_percent;
- tmp = do_div(tmp, 100);
+ do_div(tmp, 100);
atomic_set(&perf_sample_allowed_ns, tmp);
}
@@ -232,7 +232,7 @@ DEFINE_PER_CPU(u64, running_sample_length);
void perf_sample_event_took(u64 sample_len_ns)
{
u64 avg_local_sample_len;
- u64 local_samples_len = __get_cpu_var(running_sample_length);
+ u64 local_samples_len;
if (atomic_read(&perf_sample_allowed_ns) == 0)
return;
@@ -947,8 +947,18 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
{
struct perf_event_context *ctx;
- rcu_read_lock();
retry:
+ /*
+ * One of the few rules of preemptible RCU is that one cannot do
+ * rcu_read_unlock() while holding a scheduler (or nested) lock when
+ * part of the read side critical section was preemptible -- see
+ * rcu_read_unlock_special().
+ *
+ * Since ctx->lock nests under rq->lock we must ensure the entire read
+ * side critical section is non-preemptible.
+ */
+ preempt_disable();
+ rcu_read_lock();
ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
if (ctx) {
/*
@@ -964,6 +974,8 @@ retry:
raw_spin_lock_irqsave(&ctx->lock, *flags);
if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
raw_spin_unlock_irqrestore(&ctx->lock, *flags);
+ rcu_read_unlock();
+ preempt_enable();
goto retry;
}
@@ -973,6 +985,7 @@ retry:
}
}
rcu_read_unlock();
+ preempt_enable();
return ctx;
}
@@ -1950,7 +1963,16 @@ static int __perf_event_enable(void *info)
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
int err;
- if (WARN_ON_ONCE(!ctx->is_active))
+ /*
+ * There's a time window between 'ctx->is_active' check
+ * in perf_event_enable function and this place having:
+ * - IRQs on
+ * - ctx->lock unlocked
+ *
+ * where the task could be killed and 'ctx' deactivated
+ * by perf_event_exit_task.
+ */
+ if (!ctx->is_active)
return -EINVAL;
raw_spin_lock(&ctx->lock);
@@ -6212,8 +6234,6 @@ perf_event_mux_interval_ms_store(struct device *dev,
return count;
}
-#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
-
static struct device_attribute pmu_dev_attrs[] = {
__ATTR_RO(type),
__ATTR_RW(perf_event_mux_interval_ms),
@@ -7465,7 +7485,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
* child.
*/
- child_ctx = alloc_perf_context(event->pmu, child);
+ child_ctx = alloc_perf_context(parent_ctx->pmu, child);
if (!child_ctx)
return -ENOMEM;
@@ -7608,7 +7628,7 @@ static void __init perf_event_init_all_cpus(void)
}
}
-static void __cpuinit perf_event_init_cpu(int cpu)
+static void perf_event_init_cpu(int cpu)
{
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
@@ -7697,7 +7717,7 @@ static struct notifier_block perf_reboot_notifier = {
.priority = INT_MIN,
};
-static int __cpuinit
+static int
perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
diff --git a/kernel/fork.c b/kernel/fork.c
index 6e6a1c1..403d2bb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -365,8 +365,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
mm->locked_vm = 0;
mm->mmap = NULL;
mm->mmap_cache = NULL;
- mm->free_area_cache = oldmm->mmap_base;
- mm->cached_hole_size = ~0UL;
mm->map_count = 0;
cpumask_clear(mm_cpumask(mm));
mm->mm_rb = RB_ROOT;
@@ -540,8 +538,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
mm->nr_ptes = 0;
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
- mm->free_area_cache = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = ~0UL;
mm_init_aio(mm);
mm_init_owner(mm, p);
@@ -1550,7 +1546,7 @@ static inline void init_idle_pids(struct pid_link *links)
}
}
-struct task_struct * __cpuinit fork_idle(int cpu)
+struct task_struct *fork_idle(int cpu)
{
struct task_struct *task;
task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f0f4fe2..383319b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1659,7 +1659,7 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
/*
* Functions related to boot-time initialization:
*/
-static void __cpuinit init_hrtimers_cpu(int cpu)
+static void init_hrtimers_cpu(int cpu)
{
struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
int i;
@@ -1740,7 +1740,7 @@ static void migrate_hrtimers(int scpu)
#endif /* CONFIG_HOTPLUG_CPU */
-static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
+static int hrtimer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
int scpu = (long)hcpu;
@@ -1773,7 +1773,7 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata hrtimers_nb = {
+static struct notifier_block hrtimers_nb = {
.notifier_call = hrtimer_cpu_notify,
};
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 10e663a..452d6f2 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -275,7 +275,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
if (d->gc)
return -EBUSY;
- numchips = d->revmap_size / irqs_per_chip;
+ numchips = DIV_ROUND_UP(d->revmap_size, irqs_per_chip);
if (!numchips)
return -EINVAL;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 2d7cd34..706724e 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -475,18 +475,6 @@ unsigned int irq_create_of_mapping(struct device_node *controller,
domain = controller ? irq_find_host(controller) : irq_default_domain;
if (!domain) {
-#ifdef CONFIG_MIPS
- /*
- * Workaround to avoid breaking interrupt controller drivers
- * that don't yet register an irq_domain. This is temporary
- * code. ~~~gcl, Feb 24, 2012
- *
- * Scheduled for removal in Linux v3.6. That should be enough
- * time.
- */
- if (intsize > 0)
- return intspec[0];
-#endif
pr_warn("no irq domain found for %s !\n",
of_node_full_name(controller));
return 0;
diff --git a/kernel/module.c b/kernel/module.c
index cab4bce..2069158 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -455,7 +455,7 @@ const struct kernel_symbol *find_symbol(const char *name,
EXPORT_SYMBOL_GPL(find_symbol);
/* Search for module by name: must hold module_mutex. */
-static struct module *find_module_all(const char *name,
+static struct module *find_module_all(const char *name, size_t len,
bool even_unformed)
{
struct module *mod;
@@ -463,7 +463,7 @@ static struct module *find_module_all(const char *name,
list_for_each_entry(mod, &modules, list) {
if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
continue;
- if (strcmp(mod->name, name) == 0)
+ if (strlen(mod->name) == len && !memcmp(mod->name, name, len))
return mod;
}
return NULL;
@@ -471,7 +471,7 @@ static struct module *find_module_all(const char *name,
struct module *find_module(const char *name)
{
- return find_module_all(name, false);
+ return find_module_all(name, strlen(name), false);
}
EXPORT_SYMBOL_GPL(find_module);
@@ -482,23 +482,28 @@ static inline void __percpu *mod_percpu(struct module *mod)
return mod->percpu;
}
-static int percpu_modalloc(struct module *mod,
- unsigned long size, unsigned long align)
+static int percpu_modalloc(struct module *mod, struct load_info *info)
{
+ Elf_Shdr *pcpusec = &info->sechdrs[info->index.pcpu];
+ unsigned long align = pcpusec->sh_addralign;
+
+ if (!pcpusec->sh_size)
+ return 0;
+
if (align > PAGE_SIZE) {
printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
mod->name, align, PAGE_SIZE);
align = PAGE_SIZE;
}
- mod->percpu = __alloc_reserved_percpu(size, align);
+ mod->percpu = __alloc_reserved_percpu(pcpusec->sh_size, align);
if (!mod->percpu) {
printk(KERN_WARNING
"%s: Could not allocate %lu bytes percpu data\n",
- mod->name, size);
+ mod->name, (unsigned long)pcpusec->sh_size);
return -ENOMEM;
}
- mod->percpu_size = size;
+ mod->percpu_size = pcpusec->sh_size;
return 0;
}
@@ -563,10 +568,12 @@ static inline void __percpu *mod_percpu(struct module *mod)
{
return NULL;
}
-static inline int percpu_modalloc(struct module *mod,
- unsigned long size, unsigned long align)
+static int percpu_modalloc(struct module *mod, struct load_info *info)
{
- return -ENOMEM;
+ /* UP modules shouldn't have this section: ENOMEM isn't quite right */
+ if (info->sechdrs[info->index.pcpu].sh_size != 0)
+ return -ENOMEM;
+ return 0;
}
static inline void percpu_modfree(struct module *mod)
{
@@ -2927,7 +2934,6 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
{
/* Module within temporary copy. */
struct module *mod;
- Elf_Shdr *pcpusec;
int err;
mod = setup_load_info(info, flags);
@@ -2942,17 +2948,10 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
err = module_frob_arch_sections(info->hdr, info->sechdrs,
info->secstrings, mod);
if (err < 0)
- goto out;
+ return ERR_PTR(err);
- pcpusec = &info->sechdrs[info->index.pcpu];
- if (pcpusec->sh_size) {
- /* We have a special allocation for this section. */
- err = percpu_modalloc(mod,
- pcpusec->sh_size, pcpusec->sh_addralign);
- if (err)
- goto out;
- pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC;
- }
+ /* We will do a special allocation for per-cpu sections later. */
+ info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC;
/* Determine total sizes, and put offsets in sh_entsize. For now
this is done generically; there doesn't appear to be any
@@ -2963,17 +2962,12 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
/* Allocate and move to the final place */
err = move_module(mod, info);
if (err)
- goto free_percpu;
+ return ERR_PTR(err);
/* Module has been copied to its final place now: return it. */
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
kmemleak_load_module(mod, info);
return mod;
-
-free_percpu:
- percpu_modfree(mod);
-out:
- return ERR_PTR(err);
}
/* mod is no longer valid after this! */
@@ -3014,7 +3008,7 @@ static bool finished_loading(const char *name)
bool ret;
mutex_lock(&module_mutex);
- mod = find_module_all(name, true);
+ mod = find_module_all(name, strlen(name), true);
ret = !mod || mod->state == MODULE_STATE_LIVE
|| mod->state == MODULE_STATE_GOING;
mutex_unlock(&module_mutex);
@@ -3152,7 +3146,8 @@ static int add_unformed_module(struct module *mod)
again:
mutex_lock(&module_mutex);
- if ((old = find_module_all(mod->name, true)) != NULL) {
+ old = find_module_all(mod->name, strlen(mod->name), true);
+ if (old != NULL) {
if (old->state == MODULE_STATE_COMING
|| old->state == MODULE_STATE_UNFORMED) {
/* Wait in case it fails to load. */
@@ -3198,6 +3193,17 @@ out:
return err;
}
+static int unknown_module_param_cb(char *param, char *val, const char *modname)
+{
+ /* Check for magic 'dyndbg' arg */
+ int ret = ddebug_dyndbg_module_param_cb(param, val, modname);
+ if (ret != 0) {
+ printk(KERN_WARNING "%s: unknown parameter '%s' ignored\n",
+ modname, param);
+ }
+ return 0;
+}
+
/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
static int load_module(struct load_info *info, const char __user *uargs,
@@ -3237,6 +3243,11 @@ static int load_module(struct load_info *info, const char __user *uargs,
}
#endif
+ /* To avoid stressing percpu allocator, do this once we're unique. */
+ err = percpu_modalloc(mod, info);
+ if (err)
+ goto unlink_mod;
+
/* Now module is in final location, initialize linked lists, etc. */
err = module_unload_init(mod);
if (err)
@@ -3284,7 +3295,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
/* Module is ready to execute: parsing args may do that. */
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
- -32768, 32767, &ddebug_dyndbg_module_param_cb);
+ -32768, 32767, unknown_module_param_cb);
if (err < 0)
goto bug_cleanup;
@@ -3563,10 +3574,8 @@ unsigned long module_kallsyms_lookup_name(const char *name)
/* Don't lock: we're in enough trouble already. */
preempt_disable();
if ((colon = strchr(name, ':')) != NULL) {
- *colon = '\0';
- if ((mod = find_module(name)) != NULL)
+ if ((mod = find_module_all(name, colon - name, false)) != NULL)
ret = mod_find_symname(mod, colon+1);
- *colon = ':';
} else {
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
diff --git a/kernel/panic.c b/kernel/panic.c
index 9771231..8018646 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -15,6 +15,7 @@
#include <linux/notifier.h>
#include <linux/module.h>
#include <linux/random.h>
+#include <linux/ftrace.h>
#include <linux/reboot.h>
#include <linux/delay.h>
#include <linux/kexec.h>
@@ -399,6 +400,8 @@ struct slowpath_args {
static void warn_slowpath_common(const char *file, int line, void *caller,
unsigned taint, struct slowpath_args *args)
{
+ disable_trace_on_warning();
+
pr_warn("------------[ cut here ]------------\n");
pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS()\n",
raw_smp_processor_id(), current->pid, file, line, caller);
diff --git a/kernel/params.c b/kernel/params.c
index 53b958f..440e65d 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -787,7 +787,7 @@ static void __init kernel_add_sysfs_param(const char *name,
}
/*
- * param_sysfs_builtin - add contents in /sys/parameters for built-in modules
+ * param_sysfs_builtin - add sysfs parameters for built-in modules
*
* Add module_parameters to sysfs for "modules" built into the kernel.
*
diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c
index c6422ff..9012ecf 100644
--- a/kernel/power/autosleep.c
+++ b/kernel/power/autosleep.c
@@ -32,7 +32,8 @@ static void try_to_suspend(struct work_struct *work)
mutex_lock(&autosleep_lock);
- if (!pm_save_wakeup_count(initial_count)) {
+ if (!pm_save_wakeup_count(initial_count) ||
+ system_state != SYSTEM_RUNNING) {
mutex_unlock(&autosleep_lock);
goto out;
}
diff --git a/kernel/printk.c b/kernel/printk.c
index 8212c1a..69b0890 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1369,9 +1369,9 @@ static int console_trylock_for_printk(unsigned int cpu)
}
}
logbuf_cpu = UINT_MAX;
+ raw_spin_unlock(&logbuf_lock);
if (wake)
up(&console_sem);
- raw_spin_unlock(&logbuf_lock);
return retval;
}
@@ -1921,7 +1921,7 @@ void resume_console(void)
* called when a new CPU comes online (or fails to come up), and ensures
* that any such output gets printed.
*/
-static int __cpuinit console_cpu_notify(struct notifier_block *self,
+static int console_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
switch (action) {
diff --git a/kernel/profile.c b/kernel/profile.c
index 0bf4007..6631e1e 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -331,7 +331,7 @@ out:
put_cpu();
}
-static int __cpuinit profile_cpu_callback(struct notifier_block *info,
+static int profile_cpu_callback(struct notifier_block *info,
unsigned long action, void *__cpu)
{
int node, cpu = (unsigned long)__cpu;
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index b1fa551..f4871e5 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -1476,7 +1476,7 @@ rcu_torture_shutdown(void *arg)
* Execute random CPU-hotplug operations at the interval specified
* by the onoff_interval.
*/
-static int __cpuinit
+static int
rcu_torture_onoff(void *arg)
{
int cpu;
@@ -1558,7 +1558,7 @@ rcu_torture_onoff(void *arg)
return 0;
}
-static int __cpuinit
+static int
rcu_torture_onoff_init(void)
{
int ret;
@@ -1601,7 +1601,7 @@ static void rcu_torture_onoff_cleanup(void)
* CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then
* induces a CPU stall for the time specified by stall_cpu.
*/
-static int __cpuinit rcu_torture_stall(void *args)
+static int rcu_torture_stall(void *args)
{
unsigned long stop_at;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e08abb9..068de3a 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -2910,7 +2910,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
* can accept some slop in the rsp->completed access due to the fact
* that this CPU cannot possibly have any RCU callbacks in flight yet.
*/
-static void __cpuinit
+static void
rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
{
unsigned long flags;
@@ -2962,7 +2962,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
mutex_unlock(&rsp->onoff_mutex);
}
-static void __cpuinit rcu_prepare_cpu(int cpu)
+static void rcu_prepare_cpu(int cpu)
{
struct rcu_state *rsp;
@@ -2974,7 +2974,7 @@ static void __cpuinit rcu_prepare_cpu(int cpu)
/*
* Handle CPU online/offline notification events.
*/
-static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
+static int rcu_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 4a39d36..b383258 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -521,10 +521,10 @@ static void invoke_rcu_callbacks_kthread(void);
static bool rcu_is_callbacks_kthread(void);
#ifdef CONFIG_RCU_BOOST
static void rcu_preempt_do_callbacks(void);
-static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
+static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
struct rcu_node *rnp);
#endif /* #ifdef CONFIG_RCU_BOOST */
-static void __cpuinit rcu_prepare_kthreads(int cpu);
+static void rcu_prepare_kthreads(int cpu);
static void rcu_cleanup_after_idle(int cpu);
static void rcu_prepare_for_idle(int cpu);
static void rcu_idle_count_callbacks_posted(void);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 63098a5..769e12e 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1352,7 +1352,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
* already exist. We only create this kthread for preemptible RCU.
* Returns zero if all is well, a negated errno otherwise.
*/
-static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
+static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
struct rcu_node *rnp)
{
int rnp_index = rnp - &rsp->node[0];
@@ -1507,7 +1507,7 @@ static int __init rcu_spawn_kthreads(void)
}
early_initcall(rcu_spawn_kthreads);
-static void __cpuinit rcu_prepare_kthreads(int cpu)
+static void rcu_prepare_kthreads(int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
struct rcu_node *rnp = rdp->mynode;
@@ -1549,7 +1549,7 @@ static int __init rcu_scheduler_really_started(void)
}
early_initcall(rcu_scheduler_really_started);
-static void __cpuinit rcu_prepare_kthreads(int cpu)
+static void rcu_prepare_kthreads(int cpu)
{
}
diff --git a/kernel/relay.c b/kernel/relay.c
index b91488b..5001c98 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -516,7 +516,7 @@ static void setup_callbacks(struct rchan *chan,
*
* Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD)
*/
-static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb,
+static int relay_hotcpu_callback(struct notifier_block *nb,
unsigned long action,
void *hcpu)
{
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9b1f2e5..b7c32cb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -370,13 +370,6 @@ static struct rq *this_rq_lock(void)
#ifdef CONFIG_SCHED_HRTICK
/*
* Use HR-timers to deliver accurate preemption points.
- *
- * Its all a bit involved since we cannot program an hrt while holding the
- * rq->lock. So what we do is store a state in in rq->hrtick_* and ask for a
- * reschedule event.
- *
- * When we get rescheduled we reprogram the hrtick_timer outside of the
- * rq->lock.
*/
static void hrtick_clear(struct rq *rq)
@@ -404,6 +397,15 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
}
#ifdef CONFIG_SMP
+
+static int __hrtick_restart(struct rq *rq)
+{
+ struct hrtimer *timer = &rq->hrtick_timer;
+ ktime_t time = hrtimer_get_softexpires(timer);
+
+ return __hrtimer_start_range_ns(timer, time, 0, HRTIMER_MODE_ABS_PINNED, 0);
+}
+
/*
* called from hardirq (IPI) context
*/
@@ -412,7 +414,7 @@ static void __hrtick_start(void *arg)
struct rq *rq = arg;
raw_spin_lock(&rq->lock);
- hrtimer_restart(&rq->hrtick_timer);
+ __hrtick_restart(rq);
rq->hrtick_csd_pending = 0;
raw_spin_unlock(&rq->lock);
}
@@ -430,7 +432,7 @@ void hrtick_start(struct rq *rq, u64 delay)
hrtimer_set_expires(timer, time);
if (rq == this_rq()) {
- hrtimer_restart(timer);
+ __hrtick_restart(rq);
} else if (!rq->hrtick_csd_pending) {
__smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
rq->hrtick_csd_pending = 1;
@@ -4131,7 +4133,7 @@ void show_state_filter(unsigned long state_filter)
debug_show_all_locks();
}
-void __cpuinit init_idle_bootup_task(struct task_struct *idle)
+void init_idle_bootup_task(struct task_struct *idle)
{
idle->sched_class = &idle_sched_class;
}
@@ -4144,7 +4146,7 @@ void __cpuinit init_idle_bootup_task(struct task_struct *idle)
* NOTE: this function does not set the idle thread's NEED_RESCHED
* flag, to make booting more robust.
*/
-void __cpuinit init_idle(struct task_struct *idle, int cpu)
+void init_idle(struct task_struct *idle, int cpu)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
@@ -4628,7 +4630,7 @@ static void set_rq_offline(struct rq *rq)
* migration_call - callback that gets triggered when a CPU is added.
* Here we can start up the necessary migration thread for the new CPU.
*/
-static int __cpuinit
+static int
migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
int cpu = (long)hcpu;
@@ -4682,12 +4684,12 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
* happens before everything else. This has to be lower priority than
* the notifier in the perf_event subsystem, though.
*/
-static struct notifier_block __cpuinitdata migration_notifier = {
+static struct notifier_block migration_notifier = {
.notifier_call = migration_call,
.priority = CPU_PRI_MIGRATION,
};
-static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
+static int sched_cpu_active(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
switch (action & ~CPU_TASKS_FROZEN) {
@@ -4700,7 +4702,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
}
}
-static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
+static int sched_cpu_inactive(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
switch (action & ~CPU_TASKS_FROZEN) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f77f9c5..bb456f4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5506,7 +5506,7 @@ void nohz_balance_enter_idle(int cpu)
set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
}
-static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
+static int sched_ilb_notifier(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
switch (action & ~CPU_TASKS_FROZEN) {
diff --git a/kernel/smp.c b/kernel/smp.c
index 4dba0f7..fe9f773 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -73,7 +73,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
+static struct notifier_block hotplug_cfd_notifier = {
.notifier_call = hotplug_cfd,
};
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 02fc5c9..eb89e18 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -24,7 +24,7 @@
*/
static DEFINE_PER_CPU(struct task_struct *, idle_threads);
-struct task_struct * __cpuinit idle_thread_get(unsigned int cpu)
+struct task_struct *idle_thread_get(unsigned int cpu)
{
struct task_struct *tsk = per_cpu(idle_threads, cpu);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index ca25e6e..be3d351 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -699,7 +699,7 @@ void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
}
EXPORT_SYMBOL(send_remote_softirq);
-static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
+static int remote_softirq_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
/*
@@ -728,7 +728,7 @@ static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
+static struct notifier_block remote_softirq_cpu_notifier = {
.notifier_call = remote_softirq_cpu_notify,
};
@@ -830,7 +830,7 @@ static void takeover_tasklets(unsigned int cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
-static int __cpuinit cpu_callback(struct notifier_block *nfb,
+static int cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
@@ -845,7 +845,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata cpu_nfb = {
+static struct notifier_block cpu_nfb = {
.notifier_call = cpu_callback
};
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4ce13c3..ac09d98 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -599,6 +599,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "traceoff_on_warning",
+ .data = &__disable_trace_on_warning,
+ .maxlen = sizeof(__disable_trace_on_warning),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#endif
#ifdef CONFIG_MODULES
{
@@ -800,7 +807,7 @@ static struct ctl_table kern_table[] = {
#if defined(CONFIG_LOCKUP_DETECTOR)
{
.procname = "watchdog",
- .data = &watchdog_enabled,
+ .data = &watchdog_user_enabled,
.maxlen = sizeof (int),
.mode = 0644,
.proc_handler = proc_dowatchdog,
@@ -827,7 +834,7 @@ static struct ctl_table kern_table[] = {
},
{
.procname = "nmi_watchdog",
- .data = &watchdog_enabled,
+ .data = &watchdog_user_enabled,
.maxlen = sizeof (int),
.mode = 0644,
.proc_handler = proc_dowatchdog,
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index aea4a9e..b609213 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -3,7 +3,6 @@
#include "../fs/xfs/xfs_sysctl.h"
#include <linux/sunrpc/debug.h>
#include <linux/string.h>
-#include <net/ip_vs.h>
#include <linux/syscalls.h>
#include <linux/namei.h>
#include <linux/mount.h>
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 6d3f916..218bcb5 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -157,7 +157,10 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
dev->event_handler = tick_handle_periodic;
tick_device_setup_broadcast_func(dev);
cpumask_set_cpu(cpu, tick_broadcast_mask);
- tick_broadcast_start_periodic(bc);
+ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
+ tick_broadcast_start_periodic(bc);
+ else
+ tick_broadcast_setup_oneshot(bc);
ret = 1;
} else {
/*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 0cf1c14..e80183f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -178,6 +178,11 @@ static bool can_stop_full_tick(void)
*/
if (!sched_clock_stable) {
trace_tick_stop(0, "unstable sched clock\n");
+ /*
+ * Don't allow the user to think they can get
+ * full NO_HZ with this machine.
+ */
+ WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock");
return false;
}
#endif
@@ -293,7 +298,7 @@ static int __init tick_nohz_full_setup(char *str)
}
__setup("nohz_full=", tick_nohz_full_setup);
-static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb,
+static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
@@ -346,16 +351,6 @@ void __init tick_nohz_init(void)
}
cpu_notifier(tick_nohz_cpu_down_callback, 0);
-
- /* Make sure full dynticks CPU are also RCU nocbs */
- for_each_cpu(cpu, nohz_full_mask) {
- if (!rcu_is_nocb_cpu(cpu)) {
- pr_warning("NO_HZ: CPU %d is not RCU nocb: "
- "cleared from nohz_full range", cpu);
- cpumask_clear_cpu(cpu, nohz_full_mask);
- }
- }
-
cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask);
pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
}
diff --git a/kernel/timer.c b/kernel/timer.c
index 15bc1b4..4296d13 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1505,11 +1505,11 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
}
EXPORT_SYMBOL(schedule_timeout_uninterruptible);
-static int __cpuinit init_timers_cpu(int cpu)
+static int init_timers_cpu(int cpu)
{
int j;
struct tvec_base *base;
- static char __cpuinitdata tvec_base_done[NR_CPUS];
+ static char tvec_base_done[NR_CPUS];
if (!tvec_base_done[cpu]) {
static char boot_done;
@@ -1577,7 +1577,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
}
}
-static void __cpuinit migrate_timers(int cpu)
+static void migrate_timers(int cpu)
{
struct tvec_base *old_base;
struct tvec_base *new_base;
@@ -1610,7 +1610,7 @@ static void __cpuinit migrate_timers(int cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
-static int __cpuinit timer_cpu_notify(struct notifier_block *self,
+static int timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
@@ -1635,7 +1635,7 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata timers_nb = {
+static struct notifier_block timers_nb = {
.notifier_call = timer_cpu_notify,
};
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6c508ff..67708f4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -413,6 +413,17 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
return 0;
}
+static void ftrace_sync(struct work_struct *work)
+{
+ /*
+ * This function is just a stub to implement a hard force
+ * of synchronize_sched(). This requires synchronizing
+ * tasks even in userspace and idle.
+ *
+ * Yes, function tracing is rude.
+ */
+}
+
static int __unregister_ftrace_function(struct ftrace_ops *ops)
{
int ret;
@@ -440,8 +451,12 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
* so there'll be no new users. We must ensure
* all current users are done before we free
* the control data.
+ * Note synchronize_sched() is not enough, as we
+ * use preempt_disable() to do RCU, but the function
+ * tracer can be called where RCU is not active
+ * (before user_exit()).
*/
- synchronize_sched();
+ schedule_on_each_cpu(ftrace_sync);
control_ops_free(ops);
}
} else
@@ -456,9 +471,13 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
/*
* Dynamic ops may be freed, we must make sure that all
* callers are done before leaving this function.
+ *
+ * Again, normal synchronize_sched() is not good enough.
+ * We need to do a hard force of sched synchronization.
*/
if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
- synchronize_sched();
+ schedule_on_each_cpu(ftrace_sync);
+
return 0;
}
@@ -622,12 +641,18 @@ static int function_stat_show(struct seq_file *m, void *v)
if (rec->counter <= 1)
stddev = 0;
else {
- stddev = rec->time_squared - rec->counter * avg * avg;
+ /*
+ * Apply Welford's method:
+ * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
+ */
+ stddev = rec->counter * rec->time_squared -
+ rec->time * rec->time;
+
/*
* Divide only 1000 for ns^2 -> us^2 conversion.
* trace_print_graph_duration will divide 1000 again.
*/
- do_div(stddev, (rec->counter - 1) * 1000);
+ do_div(stddev, rec->counter * (rec->counter - 1) * 1000);
}
trace_seq_init(&s);
@@ -3512,8 +3537,12 @@ EXPORT_SYMBOL_GPL(ftrace_set_global_notrace);
static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
+/* Used by function selftest to not test if filter is set */
+bool ftrace_filter_param __initdata;
+
static int __init set_ftrace_notrace(char *str)
{
+ ftrace_filter_param = true;
strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
return 1;
}
@@ -3521,6 +3550,7 @@ __setup("ftrace_notrace=", set_ftrace_notrace);
static int __init set_ftrace_filter(char *str)
{
+ ftrace_filter_param = true;
strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
return 1;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e71a8be..0cd500b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -115,6 +115,9 @@ cpumask_var_t __read_mostly tracing_buffer_mask;
enum ftrace_dump_mode ftrace_dump_on_oops;
+/* When set, tracing will stop when a WARN*() is hit */
+int __disable_trace_on_warning;
+
static int tracing_set_tracer(const char *buf);
#define MAX_TRACER_SIZE 100
@@ -149,6 +152,13 @@ static int __init set_ftrace_dump_on_oops(char *str)
}
__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
+static int __init stop_trace_on_warning(char *str)
+{
+ __disable_trace_on_warning = 1;
+ return 1;
+}
+__setup("traceoff_on_warning=", stop_trace_on_warning);
+
static int __init boot_alloc_snapshot(char *str)
{
allocate_snapshot = true;
@@ -170,6 +180,7 @@ static int __init set_trace_boot_options(char *str)
}
__setup("trace_options=", set_trace_boot_options);
+
unsigned long long ns2usecs(cycle_t nsec)
{
nsec += 500;
@@ -193,6 +204,37 @@ static struct trace_array global_trace;
LIST_HEAD(ftrace_trace_arrays);
+int trace_array_get(struct trace_array *this_tr)
+{
+ struct trace_array *tr;
+ int ret = -ENODEV;
+
+ mutex_lock(&trace_types_lock);
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (tr == this_tr) {
+ tr->ref++;
+ ret = 0;
+ break;
+ }
+ }
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
+}
+
+static void __trace_array_put(struct trace_array *this_tr)
+{
+ WARN_ON(!this_tr->ref);
+ this_tr->ref--;
+}
+
+void trace_array_put(struct trace_array *this_tr)
+{
+ mutex_lock(&trace_types_lock);
+ __trace_array_put(this_tr);
+ mutex_unlock(&trace_types_lock);
+}
+
int filter_current_check_discard(struct ring_buffer *buffer,
struct ftrace_event_call *call, void *rec,
struct ring_buffer_event *event)
@@ -215,9 +257,24 @@ cycle_t ftrace_now(int cpu)
return ts;
}
+/**
+ * tracing_is_enabled - Show if global_trace has been disabled
+ *
+ * Shows if the global trace has been enabled or not. It uses the
+ * mirror flag "buffer_disabled" to be used in fast paths such as for
+ * the irqsoff tracer. But it may be inaccurate due to races. If you
+ * need to know the accurate state, use tracing_is_on() which is a little
+ * slower, but accurate.
+ */
int tracing_is_enabled(void)
{
- return tracing_is_on();
+ /*
+ * For quick access (irqsoff uses this in fast path), just
+ * return the mirror variable of the state of the ring buffer.
+ * It's a little racy, but we don't really care.
+ */
+ smp_rmb();
+ return !global_trace.buffer_disabled;
}
/*
@@ -240,7 +297,7 @@ static struct tracer *trace_types __read_mostly;
/*
* trace_types_lock is used to protect the trace_types list.
*/
-static DEFINE_MUTEX(trace_types_lock);
+DEFINE_MUTEX(trace_types_lock);
/*
* serialize the access of the ring buffer
@@ -330,6 +387,23 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
+static void tracer_tracing_on(struct trace_array *tr)
+{
+ if (tr->trace_buffer.buffer)
+ ring_buffer_record_on(tr->trace_buffer.buffer);
+ /*
+ * This flag is looked at when buffers haven't been allocated
+ * yet, or by some tracers (like irqsoff), that just want to
+ * know if the ring buffer has been disabled, but it can handle
+ * races of where it gets disabled but we still do a record.
+ * As the check is in the fast path of the tracers, it is more
+ * important to be fast than accurate.
+ */
+ tr->buffer_disabled = 0;
+ /* Make the flag seen by readers */
+ smp_wmb();
+}
+
/**
* tracing_on - enable tracing buffers
*
@@ -338,15 +412,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
*/
void tracing_on(void)
{
- if (global_trace.trace_buffer.buffer)
- ring_buffer_record_on(global_trace.trace_buffer.buffer);
- /*
- * This flag is only looked at when buffers haven't been
- * allocated yet. We don't really care about the race
- * between setting this flag and actually turning
- * on the buffer.
- */
- global_trace.buffer_disabled = 0;
+ tracer_tracing_on(&global_trace);
}
EXPORT_SYMBOL_GPL(tracing_on);
@@ -540,6 +606,23 @@ void tracing_snapshot_alloc(void)
EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
#endif /* CONFIG_TRACER_SNAPSHOT */
+static void tracer_tracing_off(struct trace_array *tr)
+{
+ if (tr->trace_buffer.buffer)
+ ring_buffer_record_off(tr->trace_buffer.buffer);
+ /*
+ * This flag is looked at when buffers haven't been allocated
+ * yet, or by some tracers (like irqsoff), that just want to
+ * know if the ring buffer has been disabled, but it can handle
+ * races of where it gets disabled but we still do a record.
+ * As the check is in the fast path of the tracers, it is more
+ * important to be fast than accurate.
+ */
+ tr->buffer_disabled = 1;
+ /* Make the flag seen by readers */
+ smp_wmb();
+}
+
/**
* tracing_off - turn off tracing buffers
*
@@ -550,26 +633,35 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
*/
void tracing_off(void)
{
- if (global_trace.trace_buffer.buffer)
- ring_buffer_record_off(global_trace.trace_buffer.buffer);
- /*
- * This flag is only looked at when buffers haven't been
- * allocated yet. We don't really care about the race
- * between setting this flag and actually turning
- * on the buffer.
- */
- global_trace.buffer_disabled = 1;
+ tracer_tracing_off(&global_trace);
}
EXPORT_SYMBOL_GPL(tracing_off);
+void disable_trace_on_warning(void)
+{
+ if (__disable_trace_on_warning)
+ tracing_off();
+}
+
+/**
+ * tracer_tracing_is_on - show real state of ring buffer enabled
+ * @tr : the trace array to know if ring buffer is enabled
+ *
+ * Shows real state of the ring buffer if it is enabled or not.
+ */
+static int tracer_tracing_is_on(struct trace_array *tr)
+{
+ if (tr->trace_buffer.buffer)
+ return ring_buffer_record_is_on(tr->trace_buffer.buffer);
+ return !tr->buffer_disabled;
+}
+
/**
* tracing_is_on - show state of ring buffers enabled
*/
int tracing_is_on(void)
{
- if (global_trace.trace_buffer.buffer)
- return ring_buffer_record_is_on(global_trace.trace_buffer.buffer);
- return !global_trace.buffer_disabled;
+ return tracer_tracing_is_on(&global_trace);
}
EXPORT_SYMBOL_GPL(tracing_is_on);
@@ -1543,15 +1635,6 @@ trace_function(struct trace_array *tr,
__buffer_unlock_commit(buffer, event);
}
-void
-ftrace(struct trace_array *tr, struct trace_array_cpu *data,
- unsigned long ip, unsigned long parent_ip, unsigned long flags,
- int pc)
-{
- if (likely(!atomic_read(&data->disabled)))
- trace_function(tr, ip, parent_ip, flags, pc);
-}
-
#ifdef CONFIG_STACKTRACE
#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
@@ -2768,10 +2851,9 @@ static const struct seq_operations tracer_seq_ops = {
};
static struct trace_iterator *
-__tracing_open(struct inode *inode, struct file *file, bool snapshot)
+__tracing_open(struct trace_array *tr, struct trace_cpu *tc,
+ struct inode *inode, struct file *file, bool snapshot)
{
- struct trace_cpu *tc = inode->i_private;
- struct trace_array *tr = tc->tr;
struct trace_iterator *iter;
int cpu;
@@ -2850,8 +2932,6 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
tracing_iter_reset(iter, cpu);
}
- tr->ref++;
-
mutex_unlock(&trace_types_lock);
return iter;
@@ -2874,6 +2954,43 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
return 0;
}
+/*
+ * Open and update trace_array ref count.
+ * Must have the current trace_array passed to it.
+ */
+static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
+{
+ struct trace_array *tr = inode->i_private;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
+ filp->private_data = inode->i_private;
+
+ return 0;
+
+}
+
+static int tracing_open_generic_tc(struct inode *inode, struct file *filp)
+{
+ struct trace_cpu *tc = inode->i_private;
+ struct trace_array *tr = tc->tr;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
+ filp->private_data = inode->i_private;
+
+ return 0;
+
+}
+
static int tracing_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
@@ -2881,17 +2998,20 @@ static int tracing_release(struct inode *inode, struct file *file)
struct trace_array *tr;
int cpu;
- if (!(file->f_mode & FMODE_READ))
+ /* Writes do not use seq_file, need to grab tr from inode */
+ if (!(file->f_mode & FMODE_READ)) {
+ struct trace_cpu *tc = inode->i_private;
+
+ trace_array_put(tc->tr);
return 0;
+ }
iter = m->private;
tr = iter->tr;
+ trace_array_put(tr);
mutex_lock(&trace_types_lock);
- WARN_ON(!tr->ref);
- tr->ref--;
-
for_each_tracing_cpu(cpu) {
if (iter->buffer_iter[cpu])
ring_buffer_read_finish(iter->buffer_iter[cpu]);
@@ -2910,20 +3030,49 @@ static int tracing_release(struct inode *inode, struct file *file)
kfree(iter->trace);
kfree(iter->buffer_iter);
seq_release_private(inode, file);
+
+ return 0;
+}
+
+static int tracing_release_generic_tr(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+
+ trace_array_put(tr);
return 0;
}
+static int tracing_release_generic_tc(struct inode *inode, struct file *file)
+{
+ struct trace_cpu *tc = inode->i_private;
+ struct trace_array *tr = tc->tr;
+
+ trace_array_put(tr);
+ return 0;
+}
+
+static int tracing_single_release_tr(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+
+ trace_array_put(tr);
+
+ return single_release(inode, file);
+}
+
static int tracing_open(struct inode *inode, struct file *file)
{
+ struct trace_cpu *tc = inode->i_private;
+ struct trace_array *tr = tc->tr;
struct trace_iterator *iter;
int ret = 0;
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
/* If this file was open for write, then erase contents */
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC)) {
- struct trace_cpu *tc = inode->i_private;
- struct trace_array *tr = tc->tr;
-
if (tc->cpu == RING_BUFFER_ALL_CPUS)
tracing_reset_online_cpus(&tr->trace_buffer);
else
@@ -2931,12 +3080,16 @@ static int tracing_open(struct inode *inode, struct file *file)
}
if (file->f_mode & FMODE_READ) {
- iter = __tracing_open(inode, file, false);
+ iter = __tracing_open(tr, tc, inode, file, false);
if (IS_ERR(iter))
ret = PTR_ERR(iter);
else if (trace_flags & TRACE_ITER_LATENCY_FMT)
iter->iter_flags |= TRACE_FILE_LAT_FMT;
}
+
+ if (ret < 0)
+ trace_array_put(tr);
+
return ret;
}
@@ -3293,9 +3446,14 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
static int tracing_trace_options_open(struct inode *inode, struct file *file)
{
+ struct trace_array *tr = inode->i_private;
+
if (tracing_disabled)
return -ENODEV;
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
return single_open(file, tracing_trace_options_show, inode->i_private);
}
@@ -3303,7 +3461,7 @@ static const struct file_operations tracing_iter_fops = {
.open = tracing_trace_options_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = tracing_single_release_tr,
.write = tracing_trace_options_write,
};
@@ -3791,6 +3949,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
if (tracing_disabled)
return -ENODEV;
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
mutex_lock(&trace_types_lock);
/* create a buffer to store the information to pass to userspace */
@@ -3843,6 +4004,7 @@ out:
fail:
kfree(iter->trace);
kfree(iter);
+ __trace_array_put(tr);
mutex_unlock(&trace_types_lock);
return ret;
}
@@ -3850,6 +4012,8 @@ fail:
static int tracing_release_pipe(struct inode *inode, struct file *file)
{
struct trace_iterator *iter = file->private_data;
+ struct trace_cpu *tc = inode->i_private;
+ struct trace_array *tr = tc->tr;
mutex_lock(&trace_types_lock);
@@ -3863,6 +4027,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
kfree(iter->trace);
kfree(iter);
+ trace_array_put(tr);
+
return 0;
}
@@ -3939,7 +4105,7 @@ static int tracing_wait_pipe(struct file *filp)
*
* iter->pos will be 0 if we haven't read anything.
*/
- if (!tracing_is_enabled() && iter->pos)
+ if (!tracing_is_on() && iter->pos)
break;
}
@@ -4320,6 +4486,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
/* resize the ring buffer to 0 */
tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
+ trace_array_put(tr);
+
return 0;
}
@@ -4328,6 +4496,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
unsigned long addr = (unsigned long)ubuf;
+ struct trace_array *tr = filp->private_data;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
struct print_entry *entry;
@@ -4387,7 +4556,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
local_save_flags(irq_flags);
size = sizeof(*entry) + cnt + 2; /* possible \n added */
- buffer = global_trace.trace_buffer.buffer;
+ buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
irq_flags, preempt_count());
if (!event) {
@@ -4495,10 +4664,20 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
static int tracing_clock_open(struct inode *inode, struct file *file)
{
+ struct trace_array *tr = inode->i_private;
+ int ret;
+
if (tracing_disabled)
return -ENODEV;
- return single_open(file, tracing_clock_show, inode->i_private);
+ if (trace_array_get(tr))
+ return -ENODEV;
+
+ ret = single_open(file, tracing_clock_show, inode->i_private);
+ if (ret < 0)
+ trace_array_put(tr);
+
+ return ret;
}
struct ftrace_buffer_info {
@@ -4511,12 +4690,16 @@ struct ftrace_buffer_info {
static int tracing_snapshot_open(struct inode *inode, struct file *file)
{
struct trace_cpu *tc = inode->i_private;
+ struct trace_array *tr = tc->tr;
struct trace_iterator *iter;
struct seq_file *m;
int ret = 0;
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
if (file->f_mode & FMODE_READ) {
- iter = __tracing_open(inode, file, true);
+ iter = __tracing_open(tr, tc, inode, file, true);
if (IS_ERR(iter))
ret = PTR_ERR(iter);
} else {
@@ -4529,13 +4712,16 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file)
kfree(m);
return -ENOMEM;
}
- iter->tr = tc->tr;
+ iter->tr = tr;
iter->trace_buffer = &tc->tr->max_buffer;
iter->cpu_file = tc->cpu;
m->private = iter;
file->private_data = m;
}
+ if (ret < 0)
+ trace_array_put(tr);
+
return ret;
}
@@ -4616,9 +4802,12 @@ out:
static int tracing_snapshot_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
+ int ret;
+
+ ret = tracing_release(inode, file);
if (file->f_mode & FMODE_READ)
- return tracing_release(inode, file);
+ return ret;
/* If write only, the seq_file is just a stub */
if (m)
@@ -4684,34 +4873,38 @@ static const struct file_operations tracing_pipe_fops = {
};
static const struct file_operations tracing_entries_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_tc,
.read = tracing_entries_read,
.write = tracing_entries_write,
.llseek = generic_file_llseek,
+ .release = tracing_release_generic_tc,
};
static const struct file_operations tracing_total_entries_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_tr,
.read = tracing_total_entries_read,
.llseek = generic_file_llseek,
+ .release = tracing_release_generic_tr,
};
static const struct file_operations tracing_free_buffer_fops = {
+ .open = tracing_open_generic_tr,
.write = tracing_free_buffer_write,
.release = tracing_free_buffer_release,
};
static const struct file_operations tracing_mark_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_tr,
.write = tracing_mark_write,
.llseek = generic_file_llseek,
+ .release = tracing_release_generic_tr,
};
static const struct file_operations trace_clock_fops = {
.open = tracing_clock_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = tracing_single_release_tr,
.write = tracing_clock_write,
};
@@ -4739,13 +4932,19 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
struct trace_cpu *tc = inode->i_private;
struct trace_array *tr = tc->tr;
struct ftrace_buffer_info *info;
+ int ret;
if (tracing_disabled)
return -ENODEV;
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
+ if (!info) {
+ trace_array_put(tr);
return -ENOMEM;
+ }
mutex_lock(&trace_types_lock);
@@ -4763,7 +4962,11 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
mutex_unlock(&trace_types_lock);
- return nonseekable_open(inode, filp);
+ ret = nonseekable_open(inode, filp);
+ if (ret < 0)
+ trace_array_put(tr);
+
+ return ret;
}
static unsigned int
@@ -4863,8 +5066,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
mutex_lock(&trace_types_lock);
- WARN_ON(!iter->tr->ref);
- iter->tr->ref--;
+ __trace_array_put(iter->tr);
if (info->spare)
ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
@@ -5612,15 +5814,10 @@ rb_simple_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_array *tr = filp->private_data;
- struct ring_buffer *buffer = tr->trace_buffer.buffer;
char buf[64];
int r;
- if (buffer)
- r = ring_buffer_record_is_on(buffer);
- else
- r = 0;
-
+ r = tracer_tracing_is_on(tr);
r = sprintf(buf, "%d\n", r);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
@@ -5642,11 +5839,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
if (buffer) {
mutex_lock(&trace_types_lock);
if (val) {
- ring_buffer_record_on(buffer);
+ tracer_tracing_on(tr);
if (tr->current_trace->start)
tr->current_trace->start(tr);
} else {
- ring_buffer_record_off(buffer);
+ tracer_tracing_off(tr);
if (tr->current_trace->stop)
tr->current_trace->stop(tr);
}
@@ -5659,9 +5856,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
}
static const struct file_operations rb_simple_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_tr,
.read = rb_simple_read,
.write = rb_simple_write,
+ .release = tracing_release_generic_tr,
.llseek = default_llseek,
};
@@ -5933,7 +6131,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
trace_create_file("buffer_total_size_kb", 0444, d_tracer,
tr, &tracing_total_entries_fops);
- trace_create_file("free_buffer", 0644, d_tracer,
+ trace_create_file("free_buffer", 0200, d_tracer,
tr, &tracing_free_buffer_fops);
trace_create_file("trace_marker", 0220, d_tracer,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 20572ed..4a4f6e1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -224,6 +224,11 @@ enum {
extern struct list_head ftrace_trace_arrays;
+extern struct mutex trace_types_lock;
+
+extern int trace_array_get(struct trace_array *tr);
+extern void trace_array_put(struct trace_array *tr);
+
/*
* The global tracer (top) should be the first trace array added,
* but we check the flag anyway.
@@ -554,11 +559,6 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu);
void poll_wait_pipe(struct trace_iterator *iter);
-void ftrace(struct trace_array *tr,
- struct trace_array_cpu *data,
- unsigned long ip,
- unsigned long parent_ip,
- unsigned long flags, int pc);
void tracing_sched_switch_trace(struct trace_array *tr,
struct task_struct *prev,
struct task_struct *next,
@@ -774,6 +774,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
extern struct list_head ftrace_pids;
#ifdef CONFIG_FUNCTION_TRACER
+extern bool ftrace_filter_param __initdata;
static inline int ftrace_trace_task(struct task_struct *task)
{
if (list_empty(&ftrace_pids))
@@ -899,12 +900,6 @@ static inline void trace_branch_disable(void)
/* set ring buffers to default size if not already done so */
int tracing_update_buffers(void);
-/* trace event type bit fields, not numeric */
-enum {
- TRACE_EVENT_TYPE_PRINTF = 1,
- TRACE_EVENT_TYPE_RAW = 2,
-};
-
struct ftrace_event_field {
struct list_head link;
const char *name;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 27963e2..7d85429 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -41,6 +41,23 @@ static LIST_HEAD(ftrace_common_fields);
static struct kmem_cache *field_cachep;
static struct kmem_cache *file_cachep;
+#define SYSTEM_FL_FREE_NAME (1 << 31)
+
+static inline int system_refcount(struct event_subsystem *system)
+{
+ return system->ref_count & ~SYSTEM_FL_FREE_NAME;
+}
+
+static int system_refcount_inc(struct event_subsystem *system)
+{
+ return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME;
+}
+
+static int system_refcount_dec(struct event_subsystem *system)
+{
+ return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME;
+}
+
/* Double loops, do not use break, only goto's work */
#define do_for_each_event_file(tr, file) \
list_for_each_entry(tr, &ftrace_trace_arrays, list) { \
@@ -97,7 +114,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
field = kmem_cache_alloc(field_cachep, GFP_TRACE);
if (!field)
- goto err;
+ return -ENOMEM;
field->name = name;
field->type = type;
@@ -114,11 +131,6 @@ static int __trace_define_field(struct list_head *head, const char *type,
list_add(&field->link, head);
return 0;
-
-err:
- kmem_cache_free(field_cachep, field);
-
- return -ENOMEM;
}
int trace_define_field(struct ftrace_event_call *call, const char *type,
@@ -279,9 +291,11 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
}
call->class->reg(call, TRACE_REG_UNREGISTER, file);
}
- /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT */
+ /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
if (file->flags & FTRACE_EVENT_FL_SOFT_MODE)
set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
+ else
+ clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
break;
case 1:
/*
@@ -349,8 +363,8 @@ static void __put_system(struct event_subsystem *system)
{
struct event_filter *filter = system->filter;
- WARN_ON_ONCE(system->ref_count == 0);
- if (--system->ref_count)
+ WARN_ON_ONCE(system_refcount(system) == 0);
+ if (system_refcount_dec(system))
return;
list_del(&system->list);
@@ -359,13 +373,15 @@ static void __put_system(struct event_subsystem *system)
kfree(filter->filter_string);
kfree(filter);
}
+ if (system->ref_count & SYSTEM_FL_FREE_NAME)
+ kfree(system->name);
kfree(system);
}
static void __get_system(struct event_subsystem *system)
{
- WARN_ON_ONCE(system->ref_count == 0);
- system->ref_count++;
+ WARN_ON_ONCE(system_refcount(system) == 0);
+ system_refcount_inc(system);
}
static void __get_system_dir(struct ftrace_subsystem_dir *dir)
@@ -379,7 +395,7 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir)
{
WARN_ON_ONCE(dir->ref_count == 0);
/* If the subsystem is about to be freed, the dir must be too */
- WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1);
+ WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
__put_system(dir->subsystem);
if (!--dir->ref_count)
@@ -394,16 +410,45 @@ static void put_system(struct ftrace_subsystem_dir *dir)
}
/*
+ * Open and update trace_array ref count.
+ * Must have the current trace_array passed to it.
+ */
+static int tracing_open_generic_file(struct inode *inode, struct file *filp)
+{
+ struct ftrace_event_file *file = inode->i_private;
+ struct trace_array *tr = file->tr;
+ int ret;
+
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
+ ret = tracing_open_generic(inode, filp);
+ if (ret < 0)
+ trace_array_put(tr);
+ return ret;
+}
+
+static int tracing_release_generic_file(struct inode *inode, struct file *filp)
+{
+ struct ftrace_event_file *file = inode->i_private;
+ struct trace_array *tr = file->tr;
+
+ trace_array_put(tr);
+
+ return 0;
+}
+
+/*
* __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
*/
-static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
- const char *sub, const char *event, int set)
+static int
+__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
+ const char *sub, const char *event, int set)
{
struct ftrace_event_file *file;
struct ftrace_event_call *call;
int ret = -EINVAL;
- mutex_lock(&event_mutex);
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
@@ -429,6 +474,17 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
ret = 0;
}
+
+ return ret;
+}
+
+static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
+ const char *sub, const char *event, int set)
+{
+ int ret;
+
+ mutex_lock(&event_mutex);
+ ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
mutex_unlock(&event_mutex);
return ret;
@@ -624,17 +680,17 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
struct ftrace_event_file *file = filp->private_data;
- char *buf;
+ char buf[4] = "0";
- if (file->flags & FTRACE_EVENT_FL_ENABLED) {
- if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)
- buf = "0*\n";
- else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE)
- buf = "1*\n";
- else
- buf = "1\n";
- } else
- buf = "0\n";
+ if (file->flags & FTRACE_EVENT_FL_ENABLED &&
+ !(file->flags & FTRACE_EVENT_FL_SOFT_DISABLED))
+ strcpy(buf, "1");
+
+ if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED ||
+ file->flags & FTRACE_EVENT_FL_SOFT_MODE)
+ strcat(buf, "*");
+
+ strcat(buf, "\n");
return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
}
@@ -992,6 +1048,7 @@ static int subsystem_open(struct inode *inode, struct file *filp)
int ret;
/* Make sure the system still exists */
+ mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
list_for_each_entry(dir, &tr->systems, list) {
@@ -1007,6 +1064,7 @@ static int subsystem_open(struct inode *inode, struct file *filp)
}
exit_loop:
mutex_unlock(&event_mutex);
+ mutex_unlock(&trace_types_lock);
if (!system)
return -ENODEV;
@@ -1014,9 +1072,17 @@ static int subsystem_open(struct inode *inode, struct file *filp)
/* Some versions of gcc think dir can be uninitialized here */
WARN_ON(!dir);
+ /* Still need to increment the ref count of the system */
+ if (trace_array_get(tr) < 0) {
+ put_system(dir);
+ return -ENODEV;
+ }
+
ret = tracing_open_generic(inode, filp);
- if (ret < 0)
+ if (ret < 0) {
+ trace_array_put(tr);
put_system(dir);
+ }
return ret;
}
@@ -1027,16 +1093,23 @@ static int system_tr_open(struct inode *inode, struct file *filp)
struct trace_array *tr = inode->i_private;
int ret;
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
/* Make a temporary dir that has no system but points to tr */
dir = kzalloc(sizeof(*dir), GFP_KERNEL);
- if (!dir)
+ if (!dir) {
+ trace_array_put(tr);
return -ENOMEM;
+ }
dir->tr = tr;
ret = tracing_open_generic(inode, filp);
- if (ret < 0)
+ if (ret < 0) {
+ trace_array_put(tr);
kfree(dir);
+ }
filp->private_data = dir;
@@ -1047,6 +1120,8 @@ static int subsystem_release(struct inode *inode, struct file *file)
{
struct ftrace_subsystem_dir *dir = file->private_data;
+ trace_array_put(dir->tr);
+
/*
* If dir->subsystem is NULL, then this is a temporary
* descriptor that was made for a trace_array to enable
@@ -1174,9 +1249,10 @@ static const struct file_operations ftrace_set_event_fops = {
};
static const struct file_operations ftrace_enable_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_file,
.read = event_enable_read,
.write = event_enable_write,
+ .release = tracing_release_generic_file,
.llseek = default_llseek,
};
@@ -1279,7 +1355,15 @@ create_new_subsystem(const char *name)
return NULL;
system->ref_count = 1;
- system->name = name;
+
+ /* Only allocate if dynamic (kprobes and modules) */
+ if (!core_kernel_data((unsigned long)name)) {
+ system->ref_count |= SYSTEM_FL_FREE_NAME;
+ system->name = kstrdup(name, GFP_KERNEL);
+ if (!system->name)
+ goto out_free;
+ } else
+ system->name = name;
system->filter = NULL;
@@ -1292,6 +1376,8 @@ create_new_subsystem(const char *name)
return system;
out_free:
+ if (system->ref_count & SYSTEM_FL_FREE_NAME)
+ kfree(system->name);
kfree(system);
return NULL;
}
@@ -1591,6 +1677,7 @@ static void __add_event_to_tracers(struct ftrace_event_call *call,
int trace_add_event_call(struct ftrace_event_call *call)
{
int ret;
+ mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
ret = __register_event(call, NULL);
@@ -1598,11 +1685,13 @@ int trace_add_event_call(struct ftrace_event_call *call)
__add_event_to_tracers(call, NULL);
mutex_unlock(&event_mutex);
+ mutex_unlock(&trace_types_lock);
return ret;
}
/*
- * Must be called under locking both of event_mutex and trace_event_sem.
+ * Must be called under locking of trace_types_lock, event_mutex and
+ * trace_event_sem.
*/
static void __trace_remove_event_call(struct ftrace_event_call *call)
{
@@ -1614,11 +1703,13 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)
/* Remove an event_call */
void trace_remove_event_call(struct ftrace_event_call *call)
{
+ mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
down_write(&trace_event_sem);
__trace_remove_event_call(call);
up_write(&trace_event_sem);
mutex_unlock(&event_mutex);
+ mutex_unlock(&trace_types_lock);
}
#define for_each_event(event, start, end) \
@@ -1762,6 +1853,7 @@ static int trace_module_notify(struct notifier_block *self,
{
struct module *mod = data;
+ mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
switch (val) {
case MODULE_STATE_COMING:
@@ -1772,6 +1864,7 @@ static int trace_module_notify(struct notifier_block *self,
break;
}
mutex_unlock(&event_mutex);
+ mutex_unlock(&trace_types_lock);
return 0;
}
@@ -2011,10 +2104,7 @@ event_enable_func(struct ftrace_hash *hash,
int ret;
/* hash funcs only work with set_ftrace_filter */
- if (!enabled)
- return -EINVAL;
-
- if (!param)
+ if (!enabled || !param)
return -EINVAL;
system = strsep(&param, ":");
@@ -2329,11 +2419,11 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
int event_trace_del_tracer(struct trace_array *tr)
{
- /* Disable any running events */
- __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
-
mutex_lock(&event_mutex);
+ /* Disable any running events */
+ __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
+
down_write(&trace_event_sem);
__trace_remove_event_dirs(tr);
debugfs_remove_recursive(tr->event_dir);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e1b653f..0d883dc 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -44,6 +44,7 @@ enum filter_op_ids
OP_LE,
OP_GT,
OP_GE,
+ OP_BAND,
OP_NONE,
OP_OPEN_PAREN,
};
@@ -54,6 +55,7 @@ struct filter_op {
int precedence;
};
+/* Order must be the same as enum filter_op_ids above */
static struct filter_op filter_ops[] = {
{ OP_OR, "||", 1 },
{ OP_AND, "&&", 2 },
@@ -64,6 +66,7 @@ static struct filter_op filter_ops[] = {
{ OP_LE, "<=", 5 },
{ OP_GT, ">", 5 },
{ OP_GE, ">=", 5 },
+ { OP_BAND, "&", 6 },
{ OP_NONE, "OP_NONE", 0 },
{ OP_OPEN_PAREN, "(", 0 },
};
@@ -156,6 +159,9 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \
case OP_GE: \
match = (*addr >= val); \
break; \
+ case OP_BAND: \
+ match = (*addr & val); \
+ break; \
default: \
break; \
} \
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index c4d6d71..b863f93 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -290,6 +290,21 @@ ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data)
trace_dump_stack(STACK_SKIP);
}
+static void
+ftrace_dump_probe(unsigned long ip, unsigned long parent_ip, void **data)
+{
+ if (update_count(data))
+ ftrace_dump(DUMP_ALL);
+}
+
+/* Only dump the current CPU buffer. */
+static void
+ftrace_cpudump_probe(unsigned long ip, unsigned long parent_ip, void **data)
+{
+ if (update_count(data))
+ ftrace_dump(DUMP_ORIG);
+}
+
static int
ftrace_probe_print(const char *name, struct seq_file *m,
unsigned long ip, void *data)
@@ -327,6 +342,20 @@ ftrace_stacktrace_print(struct seq_file *m, unsigned long ip,
return ftrace_probe_print("stacktrace", m, ip, data);
}
+static int
+ftrace_dump_print(struct seq_file *m, unsigned long ip,
+ struct ftrace_probe_ops *ops, void *data)
+{
+ return ftrace_probe_print("dump", m, ip, data);
+}
+
+static int
+ftrace_cpudump_print(struct seq_file *m, unsigned long ip,
+ struct ftrace_probe_ops *ops, void *data)
+{
+ return ftrace_probe_print("cpudump", m, ip, data);
+}
+
static struct ftrace_probe_ops traceon_count_probe_ops = {
.func = ftrace_traceon_count,
.print = ftrace_traceon_print,
@@ -342,6 +371,16 @@ static struct ftrace_probe_ops stacktrace_count_probe_ops = {
.print = ftrace_stacktrace_print,
};
+static struct ftrace_probe_ops dump_probe_ops = {
+ .func = ftrace_dump_probe,
+ .print = ftrace_dump_print,
+};
+
+static struct ftrace_probe_ops cpudump_probe_ops = {
+ .func = ftrace_cpudump_probe,
+ .print = ftrace_cpudump_print,
+};
+
static struct ftrace_probe_ops traceon_probe_ops = {
.func = ftrace_traceon,
.print = ftrace_traceon_print,
@@ -425,6 +464,32 @@ ftrace_stacktrace_callback(struct ftrace_hash *hash,
param, enable);
}
+static int
+ftrace_dump_callback(struct ftrace_hash *hash,
+ char *glob, char *cmd, char *param, int enable)
+{
+ struct ftrace_probe_ops *ops;
+
+ ops = &dump_probe_ops;
+
+ /* Only dump once. */
+ return ftrace_trace_probe_callback(ops, hash, glob, cmd,
+ "1", enable);
+}
+
+static int
+ftrace_cpudump_callback(struct ftrace_hash *hash,
+ char *glob, char *cmd, char *param, int enable)
+{
+ struct ftrace_probe_ops *ops;
+
+ ops = &cpudump_probe_ops;
+
+ /* Only dump once. */
+ return ftrace_trace_probe_callback(ops, hash, glob, cmd,
+ "1", enable);
+}
+
static struct ftrace_func_command ftrace_traceon_cmd = {
.name = "traceon",
.func = ftrace_trace_onoff_callback,
@@ -440,6 +505,16 @@ static struct ftrace_func_command ftrace_stacktrace_cmd = {
.func = ftrace_stacktrace_callback,
};
+static struct ftrace_func_command ftrace_dump_cmd = {
+ .name = "dump",
+ .func = ftrace_dump_callback,
+};
+
+static struct ftrace_func_command ftrace_cpudump_cmd = {
+ .name = "cpudump",
+ .func = ftrace_cpudump_callback,
+};
+
static int __init init_func_cmd_traceon(void)
{
int ret;
@@ -450,13 +525,31 @@ static int __init init_func_cmd_traceon(void)
ret = register_ftrace_command(&ftrace_traceon_cmd);
if (ret)
- unregister_ftrace_command(&ftrace_traceoff_cmd);
+ goto out_free_traceoff;
ret = register_ftrace_command(&ftrace_stacktrace_cmd);
- if (ret) {
- unregister_ftrace_command(&ftrace_traceoff_cmd);
- unregister_ftrace_command(&ftrace_traceon_cmd);
- }
+ if (ret)
+ goto out_free_traceon;
+
+ ret = register_ftrace_command(&ftrace_dump_cmd);
+ if (ret)
+ goto out_free_stacktrace;
+
+ ret = register_ftrace_command(&ftrace_cpudump_cmd);
+ if (ret)
+ goto out_free_dump;
+
+ return 0;
+
+ out_free_dump:
+ unregister_ftrace_command(&ftrace_dump_cmd);
+ out_free_stacktrace:
+ unregister_ftrace_command(&ftrace_stacktrace_cmd);
+ out_free_traceon:
+ unregister_ftrace_command(&ftrace_traceon_cmd);
+ out_free_traceoff:
+ unregister_ftrace_command(&ftrace_traceoff_cmd);
+
return ret;
}
#else
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index b19d065..2aefbee 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -373,7 +373,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
struct trace_array_cpu *data;
unsigned long flags;
- if (likely(!tracer_enabled))
+ if (!tracer_enabled || !tracing_is_enabled())
return;
cpu = raw_smp_processor_id();
@@ -416,7 +416,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
else
return;
- if (!tracer_enabled)
+ if (!tracer_enabled || !tracing_is_enabled())
return;
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9f46e98..7ed6976 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -35,12 +35,17 @@ struct trace_probe {
const char *symbol; /* symbol name */
struct ftrace_event_class class;
struct ftrace_event_call call;
- struct ftrace_event_file * __rcu *files;
+ struct list_head files;
ssize_t size; /* trace entry size */
unsigned int nr_args;
struct probe_arg args[];
};
+struct event_file_link {
+ struct ftrace_event_file *file;
+ struct list_head list;
+};
+
#define SIZEOF_TRACE_PROBE(n) \
(offsetof(struct trace_probe, args) + \
(sizeof(struct probe_arg) * (n)))
@@ -150,6 +155,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
goto error;
INIT_LIST_HEAD(&tp->list);
+ INIT_LIST_HEAD(&tp->files);
return tp;
error:
kfree(tp->call.name);
@@ -183,25 +189,6 @@ static struct trace_probe *find_trace_probe(const char *event,
return NULL;
}
-static int trace_probe_nr_files(struct trace_probe *tp)
-{
- struct ftrace_event_file **file;
- int ret = 0;
-
- /*
- * Since all tp->files updater is protected by probe_enable_lock,
- * we don't need to lock an rcu_read_lock.
- */
- file = rcu_dereference_raw(tp->files);
- if (file)
- while (*(file++))
- ret++;
-
- return ret;
-}
-
-static DEFINE_MUTEX(probe_enable_lock);
-
/*
* Enable trace_probe
* if the file is NULL, enable "perf" handler, or enable "trace" handler.
@@ -211,67 +198,42 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
{
int ret = 0;
- mutex_lock(&probe_enable_lock);
-
if (file) {
- struct ftrace_event_file **new, **old;
- int n = trace_probe_nr_files(tp);
-
- old = rcu_dereference_raw(tp->files);
- /* 1 is for new one and 1 is for stopper */
- new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *),
- GFP_KERNEL);
- if (!new) {
+ struct event_file_link *link;
+
+ link = kmalloc(sizeof(*link), GFP_KERNEL);
+ if (!link) {
ret = -ENOMEM;
- goto out_unlock;
+ goto out;
}
- memcpy(new, old, n * sizeof(struct ftrace_event_file *));
- new[n] = file;
- /* The last one keeps a NULL */
- rcu_assign_pointer(tp->files, new);
- tp->flags |= TP_FLAG_TRACE;
+ link->file = file;
+ list_add_tail_rcu(&link->list, &tp->files);
- if (old) {
- /* Make sure the probe is done with old files */
- synchronize_sched();
- kfree(old);
- }
+ tp->flags |= TP_FLAG_TRACE;
} else
tp->flags |= TP_FLAG_PROFILE;
- if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) &&
- !trace_probe_has_gone(tp)) {
+ if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) {
if (trace_probe_is_return(tp))
ret = enable_kretprobe(&tp->rp);
else
ret = enable_kprobe(&tp->rp.kp);
}
-
- out_unlock:
- mutex_unlock(&probe_enable_lock);
-
+ out:
return ret;
}
-static int
-trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file)
+static struct event_file_link *
+find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
{
- struct ftrace_event_file **files;
- int i;
+ struct event_file_link *link;
- /*
- * Since all tp->files updater is protected by probe_enable_lock,
- * we don't need to lock an rcu_read_lock.
- */
- files = rcu_dereference_raw(tp->files);
- if (files) {
- for (i = 0; files[i]; i++)
- if (files[i] == file)
- return i;
- }
+ list_for_each_entry(link, &tp->files, list)
+ if (link->file == file)
+ return link;
- return -1;
+ return NULL;
}
/*
@@ -283,41 +245,24 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
{
int ret = 0;
- mutex_lock(&probe_enable_lock);
-
if (file) {
- struct ftrace_event_file **new, **old;
- int n = trace_probe_nr_files(tp);
- int i, j;
+ struct event_file_link *link;
- old = rcu_dereference_raw(tp->files);
- if (n == 0 || trace_probe_file_index(tp, file) < 0) {
+ link = find_event_file_link(tp, file);
+ if (!link) {
ret = -EINVAL;
- goto out_unlock;
+ goto out;
}
- if (n == 1) { /* Remove the last file */
- tp->flags &= ~TP_FLAG_TRACE;
- new = NULL;
- } else {
- new = kzalloc(n * sizeof(struct ftrace_event_file *),
- GFP_KERNEL);
- if (!new) {
- ret = -ENOMEM;
- goto out_unlock;
- }
-
- /* This copy & check loop copies the NULL stopper too */
- for (i = 0, j = 0; j < n && i < n + 1; i++)
- if (old[i] != file)
- new[j++] = old[i];
- }
+ list_del_rcu(&link->list);
+ /* synchronize with kprobe_trace_func/kretprobe_trace_func */
+ synchronize_sched();
+ kfree(link);
- rcu_assign_pointer(tp->files, new);
+ if (!list_empty(&tp->files))
+ goto out;
- /* Make sure the probe is done with old files */
- synchronize_sched();
- kfree(old);
+ tp->flags &= ~TP_FLAG_TRACE;
} else
tp->flags &= ~TP_FLAG_PROFILE;
@@ -327,10 +272,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
else
disable_kprobe(&tp->rp.kp);
}
-
- out_unlock:
- mutex_unlock(&probe_enable_lock);
-
+ out:
return ret;
}
@@ -885,20 +827,10 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,
static __kprobes void
kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs)
{
- /*
- * Note: preempt is already disabled around the kprobe handler.
- * However, we still need an smp_read_barrier_depends() corresponding
- * to smp_wmb() in rcu_assign_pointer() to access the pointer.
- */
- struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
-
- if (unlikely(!file))
- return;
+ struct event_file_link *link;
- while (*file) {
- __kprobe_trace_func(tp, regs, *file);
- file++;
- }
+ list_for_each_entry_rcu(link, &tp->files, list)
+ __kprobe_trace_func(tp, regs, link->file);
}
/* Kretprobe handler */
@@ -945,20 +877,10 @@ static __kprobes void
kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
struct pt_regs *regs)
{
- /*
- * Note: preempt is already disabled around the kprobe handler.
- * However, we still need an smp_read_barrier_depends() corresponding
- * to smp_wmb() in rcu_assign_pointer() to access the pointer.
- */
- struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
+ struct event_file_link *link;
- if (unlikely(!file))
- return;
-
- while (*file) {
- __kretprobe_trace_func(tp, ri, regs, *file);
- file++;
- }
+ list_for_each_entry_rcu(link, &tp->files, list)
+ __kretprobe_trace_func(tp, ri, regs, link->file);
}
/* Event entry printers */
@@ -1157,6 +1079,10 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
int size, __size, dsize;
int rctx;
+ head = this_cpu_ptr(call->perf_events);
+ if (hlist_empty(head))
+ return;
+
dsize = __get_data_size(tp, regs);
__size = sizeof(*entry) + tp->size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
@@ -1172,10 +1098,7 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
entry->ip = (unsigned long)tp->rp.kp.addr;
memset(&entry[1], 0, dsize);
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
-
- head = this_cpu_ptr(call->perf_events);
- perf_trace_buf_submit(entry, size, rctx,
- entry->ip, 1, regs, head, NULL);
+ perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
}
/* Kretprobe profile handler */
@@ -1189,6 +1112,10 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
int size, __size, dsize;
int rctx;
+ head = this_cpu_ptr(call->perf_events);
+ if (hlist_empty(head))
+ return;
+
dsize = __get_data_size(tp, regs);
__size = sizeof(*entry) + tp->size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
@@ -1204,13 +1131,16 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
-
- head = this_cpu_ptr(call->perf_events);
- perf_trace_buf_submit(entry, size, rctx,
- entry->ret_ip, 1, regs, head, NULL);
+ perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
}
#endif /* CONFIG_PERF_EVENTS */
+/*
+ * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
+ *
+ * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
+ * lockless, but we can't race with this __init function.
+ */
static __kprobes
int kprobe_register(struct ftrace_event_call *event,
enum trace_reg type, void *data)
@@ -1376,6 +1306,10 @@ find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr)
return NULL;
}
+/*
+ * Nobody but us can call enable_trace_probe/disable_trace_probe at this
+ * stage, we can do this lockless.
+ */
static __init int kprobe_trace_self_tests_init(void)
{
int ret, warn = 0;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 2901e3b..a7329b7 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -640,13 +640,20 @@ out:
* Enable ftrace, sleep 1/10 second, and then read the trace
* buffer to see if all is in order.
*/
-int
+__init int
trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
{
int save_ftrace_enabled = ftrace_enabled;
unsigned long count;
int ret;
+#ifdef CONFIG_DYNAMIC_FTRACE
+ if (ftrace_filter_param) {
+ printk(KERN_CONT " ... kernel command line filter set: force PASS ... ");
+ return 0;
+ }
+#endif
+
/* make sure msleep has been recorded */
msleep(1);
@@ -727,13 +734,20 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
* Pretty much the same than for the function tracer from which the selftest
* has been borrowed.
*/
-int
+__init int
trace_selftest_startup_function_graph(struct tracer *trace,
struct trace_array *tr)
{
int ret;
unsigned long count;
+#ifdef CONFIG_DYNAMIC_FTRACE
+ if (ftrace_filter_param) {
+ printk(KERN_CONT " ... kernel command line filter set: force PASS ... ");
+ return 0;
+ }
+#endif
+
/*
* Simulate the init() callback but we attach a watchdog callback
* to detect and recover from possible hangs
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8f2ac73..322e164 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -306,6 +306,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
+ unsigned long irq_flags;
+ int pc;
int syscall_nr;
int size;
@@ -321,9 +323,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer,
- sys_data->enter_event->event.type, size, 0, 0);
+ sys_data->enter_event->event.type, size, irq_flags, pc);
if (!event)
return;
@@ -333,7 +338,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
if (!filter_current_check_discard(buffer, sys_data->enter_event,
entry, event))
- trace_current_buffer_unlock_commit(buffer, event, 0, 0);
+ trace_current_buffer_unlock_commit(buffer, event,
+ irq_flags, pc);
}
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
@@ -343,6 +349,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
+ unsigned long irq_flags;
+ int pc;
int syscall_nr;
syscall_nr = trace_get_syscall_nr(current, regs);
@@ -355,9 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
if (!sys_data)
return;
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer,
- sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
+ sys_data->exit_event->event.type, sizeof(*entry),
+ irq_flags, pc);
if (!event)
return;
@@ -367,7 +379,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
if (!filter_current_check_discard(buffer, sys_data->exit_event,
entry, event))
- trace_current_buffer_unlock_commit(buffer, event, 0, 0);
+ trace_current_buffer_unlock_commit(buffer, event,
+ irq_flags, pc);
}
static int reg_event_syscall_enter(struct ftrace_event_file *file,
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 32494fb0..d5d0cd3 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -283,8 +283,10 @@ static int create_trace_uprobe(int argc, char **argv)
return -EINVAL;
}
arg = strchr(argv[1], ':');
- if (!arg)
+ if (!arg) {
+ ret = -EINVAL;
goto fail_address_parse;
+ }
*arg++ = '\0';
filename = argv[1];
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 05039e3..1241d8c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -29,9 +29,9 @@
#include <linux/kvm_para.h>
#include <linux/perf_event.h>
-int watchdog_enabled = 1;
+int watchdog_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
-static int __read_mostly watchdog_disabled;
+static int __read_mostly watchdog_running;
static u64 __read_mostly sample_period;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -63,7 +63,7 @@ static int __init hardlockup_panic_setup(char *str)
else if (!strncmp(str, "nopanic", 7))
hardlockup_panic = 0;
else if (!strncmp(str, "0", 1))
- watchdog_enabled = 0;
+ watchdog_user_enabled = 0;
return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -82,7 +82,7 @@ __setup("softlockup_panic=", softlockup_panic_setup);
static int __init nowatchdog_setup(char *str)
{
- watchdog_enabled = 0;
+ watchdog_user_enabled = 0;
return 1;
}
__setup("nowatchdog", nowatchdog_setup);
@@ -90,7 +90,7 @@ __setup("nowatchdog", nowatchdog_setup);
/* deprecated */
static int __init nosoftlockup_setup(char *str)
{
- watchdog_enabled = 0;
+ watchdog_user_enabled = 0;
return 1;
}
__setup("nosoftlockup", nosoftlockup_setup);
@@ -158,7 +158,7 @@ void touch_all_softlockup_watchdogs(void)
#ifdef CONFIG_HARDLOCKUP_DETECTOR
void touch_nmi_watchdog(void)
{
- if (watchdog_enabled) {
+ if (watchdog_user_enabled) {
unsigned cpu;
for_each_present_cpu(cpu) {
@@ -347,11 +347,6 @@ static void watchdog_enable(unsigned int cpu)
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
- if (!watchdog_enabled) {
- kthread_park(current);
- return;
- }
-
/* Enable the perf event */
watchdog_nmi_enable(cpu);
@@ -374,6 +369,11 @@ static void watchdog_disable(unsigned int cpu)
watchdog_nmi_disable(cpu);
}
+static void watchdog_cleanup(unsigned int cpu, bool online)
+{
+ watchdog_disable(cpu);
+}
+
static int watchdog_should_run(unsigned int cpu)
{
return __this_cpu_read(hrtimer_interrupts) !=
@@ -475,28 +475,40 @@ static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
static void watchdog_nmi_disable(unsigned int cpu) { return; }
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
-/* prepare/enable/disable routines */
-/* sysctl functions */
-#ifdef CONFIG_SYSCTL
-static void watchdog_enable_all_cpus(void)
+static struct smp_hotplug_thread watchdog_threads = {
+ .store = &softlockup_watchdog,
+ .thread_should_run = watchdog_should_run,
+ .thread_fn = watchdog,
+ .thread_comm = "watchdog/%u",
+ .setup = watchdog_enable,
+ .cleanup = watchdog_cleanup,
+ .park = watchdog_disable,
+ .unpark = watchdog_enable,
+};
+
+static int watchdog_enable_all_cpus(void)
{
- unsigned int cpu;
+ int err = 0;
- if (watchdog_disabled) {
- watchdog_disabled = 0;
- for_each_online_cpu(cpu)
- kthread_unpark(per_cpu(softlockup_watchdog, cpu));
+ if (!watchdog_running) {
+ err = smpboot_register_percpu_thread(&watchdog_threads);
+ if (err)
+ pr_err("Failed to create watchdog threads, disabled\n");
+ else
+ watchdog_running = 1;
}
+
+ return err;
}
+/* prepare/enable/disable routines */
+/* sysctl functions */
+#ifdef CONFIG_SYSCTL
static void watchdog_disable_all_cpus(void)
{
- unsigned int cpu;
-
- if (!watchdog_disabled) {
- watchdog_disabled = 1;
- for_each_online_cpu(cpu)
- kthread_park(per_cpu(softlockup_watchdog, cpu));
+ if (watchdog_running) {
+ watchdog_running = 0;
+ smpboot_unregister_percpu_thread(&watchdog_threads);
}
}
@@ -507,45 +519,48 @@ static void watchdog_disable_all_cpus(void)
int proc_dowatchdog(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- int ret;
+ int err, old_thresh, old_enabled;
- if (watchdog_disabled < 0)
- return -ENODEV;
+ old_thresh = ACCESS_ONCE(watchdog_thresh);
+ old_enabled = ACCESS_ONCE(watchdog_user_enabled);
- ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (ret || !write)
- return ret;
+ err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (err || !write)
+ return err;
set_sample_period();
/*
* Watchdog threads shouldn't be enabled if they are
- * disabled. The 'watchdog_disabled' variable check in
+ * disabled. The 'watchdog_running' variable check in
* watchdog_*_all_cpus() function takes care of this.
*/
- if (watchdog_enabled && watchdog_thresh)
- watchdog_enable_all_cpus();
+ if (watchdog_user_enabled && watchdog_thresh)
+ err = watchdog_enable_all_cpus();
else
watchdog_disable_all_cpus();
- return ret;
+ /* Restore old values on failure */
+ if (err) {
+ watchdog_thresh = old_thresh;
+ watchdog_user_enabled = old_enabled;
+ }
+
+ return err;
}
#endif /* CONFIG_SYSCTL */
-static struct smp_hotplug_thread watchdog_threads = {
- .store = &softlockup_watchdog,
- .thread_should_run = watchdog_should_run,
- .thread_fn = watchdog,
- .thread_comm = "watchdog/%u",
- .setup = watchdog_enable,
- .park = watchdog_disable,
- .unpark = watchdog_enable,
-};
-
void __init lockup_detector_init(void)
{
set_sample_period();
- if (smpboot_register_percpu_thread(&watchdog_threads)) {
- pr_err("Failed to create watchdog threads, disabled\n");
- watchdog_disabled = -ENODEV;
+
+#ifdef CONFIG_NO_HZ_FULL
+ if (watchdog_user_enabled) {
+ watchdog_user_enabled = 0;
+ pr_warning("Disabled lockup detectors by default for full dynticks\n");
+ pr_warning("You can reactivate it with 'sysctl -w kernel.watchdog=1'\n");
}
+#endif
+
+ if (watchdog_user_enabled)
+ watchdog_enable_all_cpus();
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f02c4a4..0b72e81 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4644,7 +4644,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
* Workqueues should be brought up before normal priority CPU notifiers.
* This will be registered high priority CPU notifier.
*/
-static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
+static int workqueue_cpu_up_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
@@ -4697,7 +4697,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
* Workqueues should be brought down after normal priority CPU notifiers.
* This will be registered as low priority CPU notifier.
*/
-static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
+static int workqueue_cpu_down_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
OpenPOWER on IntegriCloud