From 01e04f466e12e883907937eb04a9010533363f55 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 27 Feb 2015 00:39:21 +0100 Subject: idle / sleep: Avoid excessive disabling and enabling interrupts Disabling interrupts at the end of cpuidle_enter_freeze() is not useful, because its caller, cpuidle_idle_call(), re-enables them right away after invoking it. To avoid that unnecessary back and forth dance with interrupts, make cpuidle_enter_freeze() enable interrupts after calling enter_freeze_proper() and drop the local_irq_disable() at its end, so that all of the code paths in it end up with interrupts enabled. Then, cpuidle_idle_call() will not need to re-enable interrupts after calling cpuidle_enter_freeze() any more, because the latter will return with interrupts enabled, in analogy with cpuidle_enter(). Reported-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- kernel/sched/idle.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 94b2d7b..f59198b 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -116,7 +116,6 @@ static void cpuidle_idle_call(void) */ if (idle_should_freeze()) { cpuidle_enter_freeze(); - local_irq_enable(); goto exit_idle; } -- cgit v1.1 From 790317e1b266c776765a4bdcedefea706ff0fada Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Fri, 13 Feb 2015 11:19:49 +0800 Subject: cpuset: initialize effective masks when clone_children is enabled If clone_children is enabled, effective masks won't be initialized due to the bug: # mount -t cgroup -o cpuset xxx /mnt # echo 1 > cgroup.clone_children # mkdir /mnt/tmp # cat /mnt/tmp/ # cat cpuset.effective_cpus # cat cpuset.cpus 0-15 And then this cpuset won't constrain the tasks in it. Either the bug or the fix has no effect on unified hierarchy, as there's no clone_chidren flag there any more. Reported-by: Christian Brauner Reported-by: Serge Hallyn Cc: # 3.17+ Signed-off-by: Zefan Li Signed-off-by: Tejun Heo Tested-by: Serge Hallyn --- kernel/cpuset.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1d1fe93..89d4ed0 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1979,7 +1979,9 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) spin_lock_irq(&callback_lock); cs->mems_allowed = parent->mems_allowed; + cs->effective_mems = parent->mems_allowed; cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); + cpumask_copy(cs->effective_cpus, parent->cpus_allowed); spin_unlock_irq(&callback_lock); out_unlock: mutex_unlock(&cpuset_mutex); -- cgit v1.1 From 79063bffc81f82689bd90e16da1b49408f3bf095 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Fri, 13 Feb 2015 11:20:30 +0800 Subject: cpuset: fix a warning when clearing configured masks in old hierarchy When we clear cpuset.cpus, cpuset.effective_cpus won't be cleared: # mount -t cgroup -o cpuset xxx /mnt # mkdir /mnt/tmp # echo 0 > /mnt/tmp/cpuset.cpus # echo > /mnt/tmp/cpuset.cpus # cat cpuset.cpus # cat cpuset.effective_cpus 0-15 And a kernel warning in update_cpumasks_hier() is triggered: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 4028 at kernel/cpuset.c:894 update_cpumasks_hier+0x471/0x650() Cc: # 3.17+ Signed-off-by: Zefan Li Signed-off-by: Tejun Heo Tested-by: Serge Hallyn --- kernel/cpuset.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 89d4ed0..407611b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -873,7 +873,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) * If it becomes empty, inherit the effective mask of the * parent, which is guaranteed to have some CPUs. */ - if (cpumask_empty(new_cpus)) + if (cgroup_on_dfl(cp->css.cgroup) && cpumask_empty(new_cpus)) cpumask_copy(new_cpus, parent->effective_cpus); /* Skip the whole subtree if the cpumask remains the same. */ @@ -1129,7 +1129,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) * If it becomes empty, inherit the effective mask of the * parent, which is guaranteed to have some MEMs. */ - if (nodes_empty(*new_mems)) + if (cgroup_on_dfl(cp->css.cgroup) && nodes_empty(*new_mems)) *new_mems = parent->effective_mems; /* Skip the whole subtree if the nodemask remains the same. */ -- cgit v1.1 From 283cb41f426b723a0255702b761b0fc5d1b53a81 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Fri, 13 Feb 2015 11:58:07 +0800 Subject: cpuset: Fix cpuset sched_relax_domain_level The cpuset.sched_relax_domain_level can control how far we do immediate load balancing on a system. However, it was found on recent kernels that echo'ing a value into cpuset.sched_relax_domain_level did not reduce any immediate load balancing. The reason this occurred was because the update_domain_attr_tree() traversal did not update for the "top_cpuset". This resulted in nothing being changed when modifying the sched_relax_domain_level parameter. This patch is able to address that problem by having update_domain_attr_tree() allow updates for the root in the cpuset traversal. Fixes: fc560a26acce ("cpuset: replace cpuset->stack_list with cpuset_for_each_descendant_pre()") Cc: # 3.9+ Signed-off-by: Jason Low Signed-off-by: Zefan Li Signed-off-by: Tejun Heo Tested-by: Serge Hallyn --- kernel/cpuset.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 407611b..fc7f474 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -548,9 +548,6 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr, rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { - if (cp == root_cs) - continue; - /* skip the whole subtree if @cp doesn't have any CPU */ if (cpumask_empty(cp->cpus_allowed)) { pos_css = css_rightmost_descendant(pos_css); -- cgit v1.1 From dfcacc154fb38fdb2c243c3dbbdc1f26a64cedc8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 2 Mar 2015 22:25:37 +0100 Subject: cpuidle: Clean up fallback handling in cpuidle_idle_call() Move the fallback code path in cpuidle_idle_call() to the end of the function to avoid jumping to a label in an if () branch. Signed-off-by: Rafael J. Wysocki --- kernel/sched/idle.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index f59198b..84b93b6 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -124,20 +124,8 @@ static void cpuidle_idle_call(void) * Fall back to the default arch idle method on errors. */ next_state = cpuidle_select(drv, dev); - if (next_state < 0) { -use_default: - /* - * We can't use the cpuidle framework, let's use the default - * idle routine. - */ - if (current_clr_polling_and_test()) - local_irq_enable(); - else - arch_cpu_idle(); - - goto exit_idle; - } - + if (next_state < 0) + goto use_default; /* * The idle task must be scheduled, it is pointless to @@ -195,6 +183,19 @@ exit_idle: rcu_idle_exit(); start_critical_timings(); + return; + +use_default: + /* + * We can't use the cpuidle framework, let's use the default + * idle routine. + */ + if (current_clr_polling_and_test()) + local_irq_enable(); + else + arch_cpu_idle(); + + goto exit_idle; } /* -- cgit v1.1 From c064a0de1bfb07c34a3798822c7e1636eea866e8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 28 Feb 2015 22:24:48 +0100 Subject: livepatch: fix RCU usage in klp_find_external_symbol() While one must hold RCU-sched (aka. preempt_disable) for find_symbol() one must equally hold it over the use of the object returned. The moment you release the RCU-sched read lock, the object can be dead and gone. [jkosina@suse.cz: change subject line to be aligned with other patches] Cc: Seth Jennings Cc: Josh Poimboeuf Cc: Masami Hiramatsu Cc: Miroslav Benes Cc: Petr Mladek Cc: Jiri Kosina Cc: "Paul E. McKenney" Cc: Rusty Russell Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu Acked-by: Paul E. McKenney Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- kernel/livepatch/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 782172f..01ca088 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -248,11 +248,12 @@ static int klp_find_external_symbol(struct module *pmod, const char *name, /* first, check if it's an exported symbol */ preempt_disable(); sym = find_symbol(name, NULL, NULL, true, true); - preempt_enable(); if (sym) { *addr = sym->value; + preempt_enable(); return 0; } + preempt_enable(); /* otherwise check if it's in another .o within the patch module */ return klp_find_object_symbol(pmod->name, name, addr); -- cgit v1.1 From 17f480342026e54000731acaa69bf32787ce46cb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 27 Feb 2015 00:07:55 +0100 Subject: genirq / PM: Add flag for shared NO_SUSPEND interrupt lines It currently is required that all users of NO_SUSPEND interrupt lines pass the IRQF_NO_SUSPEND flag when requesting the IRQ or the WARN_ON_ONCE() in irq_pm_install_action() will trigger. That is done to warn about situations in which unprepared interrupt handlers may be run unnecessarily for suspended devices and may attempt to access those devices by mistake. However, it may cause drivers that have no technical reasons for using IRQF_NO_SUSPEND to set that flag just because they happen to share the interrupt line with something like a timer. Moreover, the generic handling of wakeup interrupts introduced by commit 9ce7a25849e8 (genirq: Simplify wakeup mechanism) only works for IRQs without any NO_SUSPEND users, so the drivers of wakeup devices needing to use shared NO_SUSPEND interrupt lines for signaling system wakeup generally have to detect wakeup in their interrupt handlers. Thus if they happen to share an interrupt line with a NO_SUSPEND user, they also need to request that their interrupt handlers be run after suspend_device_irqs(). In both cases the reason for using IRQF_NO_SUSPEND is not because the driver in question has a genuine need to run its interrupt handler after suspend_device_irqs(), but because it happens to share the line with some other NO_SUSPEND user. Otherwise, the driver would do without IRQF_NO_SUSPEND just fine. To make it possible to specify that condition explicitly, introduce a new IRQ action handler flag for shared IRQs, IRQF_COND_SUSPEND, that, when set, will indicate to the IRQ core that the interrupt user is generally fine with suspending the IRQ, but it also can tolerate handler invocations after suspend_device_irqs() and, in particular, it is capable of detecting system wakeup and triggering it as appropriate from its interrupt handler. That will allow us to work around a problem with a shared timer interrupt line on at91 platforms. Link: http://marc.info/?l=linux-kernel&m=142252777602084&w=2 Link: http://marc.info/?t=142252775300011&r=1&w=2 Link: https://lkml.org/lkml/2014/12/15/552 Reported-by: Boris Brezillon Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland --- kernel/irq/manage.c | 7 ++++++- kernel/irq/pm.c | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 196a06f..886d09e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1474,8 +1474,13 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, * otherwise we'll have trouble later trying to figure out * which interrupt is which (messes up the interrupt freeing * logic etc). + * + * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and + * it cannot be set along with IRQF_NO_SUSPEND. */ - if ((irqflags & IRQF_SHARED) && !dev_id) + if (((irqflags & IRQF_SHARED) && !dev_id) || + (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) || + ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND))) return -EINVAL; desc = irq_to_desc(irq); diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index 3ca5325..5204a6d 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -43,9 +43,12 @@ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth++; + else if (action->flags & IRQF_COND_SUSPEND) + desc->cond_suspend_depth++; WARN_ON_ONCE(desc->no_suspend_depth && - desc->no_suspend_depth != desc->nr_actions); + (desc->no_suspend_depth + + desc->cond_suspend_depth) != desc->nr_actions); } /* @@ -61,6 +64,8 @@ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth--; + else if (action->flags & IRQF_COND_SUSPEND) + desc->cond_suspend_depth--; } static bool suspend_device_irq(struct irq_desc *desc, int irq) -- cgit v1.1 From 8603e1b30027f943cc9c1eef2b291d42c3347af1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 5 Mar 2015 08:04:13 -0500 Subject: workqueue: fix hang involving racing cancel[_delayed]_work_sync()'s for PREEMPT_NONE cancel[_delayed]_work_sync() are implemented using __cancel_work_timer() which grabs the PENDING bit using try_to_grab_pending() and then flushes the work item with PENDING set to prevent the on-going execution of the work item from requeueing itself. try_to_grab_pending() can always grab PENDING bit without blocking except when someone else is doing the above flushing during cancelation. In that case, try_to_grab_pending() returns -ENOENT. In this case, __cancel_work_timer() currently invokes flush_work(). The assumption is that the completion of the work item is what the other canceling task would be waiting for too and thus waiting for the same condition and retrying should allow forward progress without excessive busy looping Unfortunately, this doesn't work if preemption is disabled or the latter task has real time priority. Let's say task A just got woken up from flush_work() by the completion of the target work item. If, before task A starts executing, task B gets scheduled and invokes __cancel_work_timer() on the same work item, its try_to_grab_pending() will return -ENOENT as the work item is still being canceled by task A and flush_work() will also immediately return false as the work item is no longer executing. This puts task B in a busy loop possibly preventing task A from executing and clearing the canceling state on the work item leading to a hang. task A task B worker executing work __cancel_work_timer() try_to_grab_pending() set work CANCELING flush_work() block for work completion completion, wakes up A __cancel_work_timer() while (forever) { try_to_grab_pending() -ENOENT as work is being canceled flush_work() false as work is no longer executing } This patch removes the possible hang by updating __cancel_work_timer() to explicitly wait for clearing of CANCELING rather than invoking flush_work() after try_to_grab_pending() fails with -ENOENT. Link: http://lkml.kernel.org/g/20150206171156.GA8942@axis.com v3: bit_waitqueue() can't be used for work items defined in vmalloc area. Switched to custom wake function which matches the target work item and exclusive wait and wakeup. v2: v1 used wake_up() on bit_waitqueue() which leads to NULL deref if the target bit waitqueue has wait_bit_queue's on it. Use DEFINE_WAIT_BIT() and __wake_up_bit() instead. Reported by Tomeu Vizoso. Signed-off-by: Tejun Heo Reported-by: Rabin Vincent Cc: Tomeu Vizoso Cc: stable@vger.kernel.org Tested-by: Jesper Nilsson Tested-by: Rabin Vincent --- kernel/workqueue.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f288493..41ff75b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2728,19 +2728,57 @@ bool flush_work(struct work_struct *work) } EXPORT_SYMBOL_GPL(flush_work); +struct cwt_wait { + wait_queue_t wait; + struct work_struct *work; +}; + +static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait); + + if (cwait->work != key) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} + static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) { + static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq); unsigned long flags; int ret; do { ret = try_to_grab_pending(work, is_dwork, &flags); /* - * If someone else is canceling, wait for the same event it - * would be waiting for before retrying. + * If someone else is already canceling, wait for it to + * finish. flush_work() doesn't work for PREEMPT_NONE + * because we may get scheduled between @work's completion + * and the other canceling task resuming and clearing + * CANCELING - flush_work() will return false immediately + * as @work is no longer busy, try_to_grab_pending() will + * return -ENOENT as @work is still being canceled and the + * other canceling task won't be able to clear CANCELING as + * we're hogging the CPU. + * + * Let's wait for completion using a waitqueue. As this + * may lead to the thundering herd problem, use a custom + * wake function which matches @work along with exclusive + * wait and wakeup. */ - if (unlikely(ret == -ENOENT)) - flush_work(work); + if (unlikely(ret == -ENOENT)) { + struct cwt_wait cwait; + + init_wait(&cwait.wait); + cwait.wait.func = cwt_wakefn; + cwait.work = work; + + prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait, + TASK_UNINTERRUPTIBLE); + if (work_is_canceling(work)) + schedule(); + finish_wait(&cancel_waitq, &cwait.wait); + } } while (unlikely(ret < 0)); /* tell other tasks trying to grab @work to back off */ @@ -2749,6 +2787,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) flush_work(work); clear_work_data(work); + + /* + * Paired with prepare_to_wait() above so that either + * waitqueue_active() is visible here or !work_is_canceling() is + * visible there. + */ + smp_mb(); + if (waitqueue_active(&cancel_waitq)) + __wake_up(&cancel_waitq, TASK_NORMAL, 1, work); + return ret; } -- cgit v1.1 From ef2b22ac540c018bd574d1846ab95b9bfcf38702 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 2 Mar 2015 22:26:55 +0100 Subject: cpuidle / sleep: Use broadcast timer for states that stop local timer Commit 381063133246 (PM / sleep: Re-implement suspend-to-idle handling) overlooked the fact that entering some sufficiently deep idle states by CPUs may cause their local timers to stop and in those cases it is necessary to switch over to a broadcast timer prior to entering the idle state. If the cpuidle driver in use does not provide the new ->enter_freeze callback for any of the idle states, that problem affects suspend-to-idle too, but it is not taken into account after the changes made by commit 381063133246. Fix that by changing the definition of cpuidle_enter_freeze() and re-arranging of the code in cpuidle_idle_call(), so the former does not call cpuidle_enter() any more and the fallback case is handled by cpuidle_idle_call() directly. Fixes: 381063133246 (PM / sleep: Re-implement suspend-to-idle handling) Reported-and-tested-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- kernel/sched/idle.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 84b93b6..80014a1 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -82,6 +82,7 @@ static void cpuidle_idle_call(void) struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; unsigned int broadcast; + bool reflect; /* * Check if the idle task must be rescheduled. If it is the @@ -105,6 +106,9 @@ static void cpuidle_idle_call(void) */ rcu_idle_enter(); + if (cpuidle_not_available(drv, dev)) + goto use_default; + /* * Suspend-to-idle ("freeze") is a system state in which all user space * has been frozen, all I/O devices have been suspended and the only @@ -115,15 +119,22 @@ static void cpuidle_idle_call(void) * until a proper wakeup interrupt happens. */ if (idle_should_freeze()) { - cpuidle_enter_freeze(); - goto exit_idle; - } + entered_state = cpuidle_enter_freeze(drv, dev); + if (entered_state >= 0) { + local_irq_enable(); + goto exit_idle; + } - /* - * Ask the cpuidle framework to choose a convenient idle state. - * Fall back to the default arch idle method on errors. - */ - next_state = cpuidle_select(drv, dev); + reflect = false; + next_state = cpuidle_find_deepest_state(drv, dev); + } else { + reflect = true; + /* + * Ask the cpuidle framework to choose a convenient idle state. + */ + next_state = cpuidle_select(drv, dev); + } + /* Fall back to the default arch idle method on errors. */ if (next_state < 0) goto use_default; @@ -170,7 +181,8 @@ static void cpuidle_idle_call(void) /* * Give the governor an opportunity to reflect on the outcome */ - cpuidle_reflect(dev, entered_state); + if (reflect) + cpuidle_reflect(dev, entered_state); exit_idle: __current_set_polling(); -- cgit v1.1 From 168e47f2a6581fdbc5bb1845aeca1e50e2bc5c4b Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 25 Feb 2015 14:14:57 -0800 Subject: kernel/module.c: Update debug alignment after symtable generation When CONFIG_DEBUG_SET_MODULE_RONX is enabled, the sizes of module sections are aligned up so appropriate permissions can be applied. Adjusting for the symbol table may cause them to become unaligned. Make sure to re-align the sizes afterward. Signed-off-by: Laura Abbott Acked-by: Rusty Russell Signed-off-by: Catalin Marinas --- kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index b34813f..cc93cf6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2313,11 +2313,13 @@ static void layout_symtab(struct module *mod, struct load_info *info) info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); mod->core_size += strtab_size; + mod->core_size = debug_align(mod->core_size); /* Put string table section at end of init part of module. */ strsect->sh_flags |= SHF_ALLOC; strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, info->index.str) | INIT_OFFSET_MASK; + mod->init_size = debug_align(mod->init_size); pr_debug("\t%s\n", info->secstrings + strsect->sh_name); } -- cgit v1.1 From 30a22c215a0007603ffc08021f2e8b64018517dd Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 1 Mar 2015 10:11:05 -0500 Subject: console: Fix console name size mismatch commit 6ae9200f2cab7 ("enlarge console.name") increased the storage for the console name to 16 bytes, but not the corresponding struct console_cmdline::name storage. Console names longer than 8 bytes cause read beyond end-of-string and failure to match console; I'm not sure if there are other unexpected consequences. Cc: # 2.6.22+ Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- kernel/printk/console_cmdline.h | 2 +- kernel/printk/printk.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h index cbd69d8..2ca4a8b 100644 --- a/kernel/printk/console_cmdline.h +++ b/kernel/printk/console_cmdline.h @@ -3,7 +3,7 @@ struct console_cmdline { - char name[8]; /* Name of the driver */ + char name[16]; /* Name of the driver */ int index; /* Minor dev. to use */ char *options; /* Options for the driver */ #ifdef CONFIG_A11Y_BRAILLE_CONSOLE diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 01cfd69..bb0635b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2464,6 +2464,7 @@ void register_console(struct console *newcon) for (i = 0, c = console_cmdline; i < MAX_CMDLINECONSOLES && c->name[0]; i++, c++) { + BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); if (strcmp(c->name, newcon->name) != 0) continue; if (newcon->index >= 0 && -- cgit v1.1 From b24d443b8f17d9776f5fc1f6c780a0a21eb02913 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 4 Mar 2015 23:10:28 -0500 Subject: ftrace: Clear REGS_EN and TRAMP_EN flags on disabling record via sysctl When /proc/sys/kernel/ftrace_enabled is set to zero, all function tracing is disabled. But the records that represent the functions still hold information about the ftrace_ops that are hooked to them. ftrace_ops may request "REGS" (have a full set of pt_regs passed to the callback), or "TRAMP" (the ops has its own trampoline to use). When the record is updated to represent the state of the ops hooked to it, it sets "REGS_EN" and/or "TRAMP_EN" to state that the callback points to the correct trampoline (REGS has its own trampoline). When ftrace_enabled is set to zero, all ftrace locations are a nop, so they do not point to any trampoline. But the _EN flags are still set. This can cause the accounting to go wrong when ftrace_enabled is cleared and an ops that has a trampoline is registered or unregistered. For example, the following will cause ftrace to crash: # echo function_graph > /sys/kernel/debug/tracing/current_tracer # echo 0 > /proc/sys/kernel/ftrace_enabled # echo nop > /sys/kernel/debug/tracing/current_tracer # echo 1 > /proc/sys/kernel/ftrace_enabled # echo function_graph > /sys/kernel/debug/tracing/current_tracer As function_graph uses a trampoline, when ftrace_enabled is set to zero the updates to the record are not done. When enabling function_graph again, the record will still have the TRAMP_EN flag set, and it will look for an op that has a trampoline other than the function_graph ops, and fail to find one. Cc: stable@vger.kernel.org # 3.17+ Reported-by: Pratyush Anand Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 45e5cb1..14947e0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2041,8 +2041,12 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) if (!ftrace_rec_count(rec)) rec->flags = 0; else - /* Just disable the record (keep REGS state) */ - rec->flags &= ~FTRACE_FL_ENABLED; + /* + * Just disable the record, but keep the ops TRAMP + * and REGS states. The _EN flags must be disabled though. + */ + rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN | + FTRACE_FL_REGS_EN); } return FTRACE_UPDATE_MAKE_NOP; -- cgit v1.1 From 1619dc3f8f555ee1cdd3c75db3885d5715442b12 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Fri, 6 Mar 2015 23:58:06 +0530 Subject: ftrace: Fix en(dis)able graph caller when en(dis)abling record via sysctl When ftrace is enabled globally through the proc interface, we must check if ftrace_graph_active is set. If it is set, then we should also pass the FTRACE_START_FUNC_RET command to ftrace_run_update_code(). Similarly, when ftrace is disabled globally through the proc interface, we must check if ftrace_graph_active is set. If it is set, then we should also pass the FTRACE_STOP_FUNC_RET command to ftrace_run_update_code(). Consider the following situation. # echo 0 > /proc/sys/kernel/ftrace_enabled After this ftrace_enabled = 0. # echo function_graph > /sys/kernel/debug/tracing/current_tracer Since ftrace_enabled = 0, ftrace_enable_ftrace_graph_caller() is never called. # echo 1 > /proc/sys/kernel/ftrace_enabled Now ftrace_enabled will be set to true, but still ftrace_enable_ftrace_graph_caller() will not be called, which is not desired. Further if we execute the following after this: # echo nop > /sys/kernel/debug/tracing/current_tracer Now since ftrace_enabled is set it will call ftrace_disable_ftrace_graph_caller(), which causes a kernel warning on the ARM platform. On the ARM platform, when ftrace_enable_ftrace_graph_caller() is called, it checks whether the old instruction is a nop or not. If it's not a nop, then it returns an error. If it is a nop then it replaces instruction at that address with a branch to ftrace_graph_caller. ftrace_disable_ftrace_graph_caller() behaves just the opposite. Therefore, if generic ftrace code ever calls either ftrace_enable_ftrace_graph_caller() or ftrace_disable_ftrace_graph_caller() consecutively two times in a row, then it will return an error, which will cause the generic ftrace code to raise a warning. Note, x86 does not have an issue with this because the architecture specific code for ftrace_enable_ftrace_graph_caller() and ftrace_disable_ftrace_graph_caller() does not check the previous state, and calling either of these functions twice in a row has no ill effect. Link: http://lkml.kernel.org/r/e4fbe64cdac0dd0e86a3bf914b0f83c0b419f146.1425666454.git.panand@redhat.com Cc: stable@vger.kernel.org # 2.6.31+ Signed-off-by: Pratyush Anand [ removed extra if (ftrace_start_up) and defined ftrace_graph_active as 0 if CONFIG_FUNCTION_GRAPH_TRACER is not set. ] Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 14947e0..ea520bb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1059,6 +1059,12 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer) static struct pid * const ftrace_swapper_pid = &init_struct_pid; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int ftrace_graph_active; +#else +# define ftrace_graph_active 0 +#endif + #ifdef CONFIG_DYNAMIC_FTRACE static struct ftrace_ops *removed_ops; @@ -2692,24 +2698,36 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) static void ftrace_startup_sysctl(void) { + int command; + if (unlikely(ftrace_disabled)) return; /* Force update next time */ saved_ftrace_func = NULL; /* ftrace_start_up is true if we want ftrace running */ - if (ftrace_start_up) - ftrace_run_update_code(FTRACE_UPDATE_CALLS); + if (ftrace_start_up) { + command = FTRACE_UPDATE_CALLS; + if (ftrace_graph_active) + command |= FTRACE_START_FUNC_RET; + ftrace_run_update_code(command); + } } static void ftrace_shutdown_sysctl(void) { + int command; + if (unlikely(ftrace_disabled)) return; /* ftrace_start_up is true if ftrace is running */ - if (ftrace_start_up) - ftrace_run_update_code(FTRACE_DISABLE_CALLS); + if (ftrace_start_up) { + command = FTRACE_DISABLE_CALLS; + if (ftrace_graph_active) + command |= FTRACE_STOP_FUNC_RET; + ftrace_run_update_code(command); + } } static cycle_t ftrace_update_time; @@ -5594,8 +5612,6 @@ static struct ftrace_ops graph_ops = { ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) }; -static int ftrace_graph_active; - int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) { return 0; -- cgit v1.1 From 524a38682573b2e15ab6317ccfe50280441514be Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 6 Mar 2015 19:55:13 -0500 Subject: ftrace: Fix ftrace enable ordering of sysctl ftrace_enabled Some archs (specifically PowerPC), are sensitive with the ordering of the enabling of the calls to function tracing and setting of the function to use to be traced. That is, update_ftrace_function() sets what function the ftrace_caller trampoline should call. Some archs require this to be set before calling ftrace_run_update_code(). Another bug was discovered, that ftrace_startup_sysctl() called ftrace_run_update_code() directly. If the function the ftrace_caller trampoline changes, then it will not be updated. Instead a call to ftrace_startup_enable() should be called because it tests to see if the callback changed since the code was disabled, and will tell the arch to update appropriately. Most archs do not need this notification, but PowerPC does. The problem could be seen by the following commands: # echo 0 > /proc/sys/kernel/ftrace_enabled # echo function > /sys/kernel/debug/tracing/current_tracer # echo 1 > /proc/sys/kernel/ftrace_enabled # cat /sys/kernel/debug/tracing/trace The trace will show that function tracing was not active. Cc: stable@vger.kernel.org # 2.6.27+ Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ea520bb..4f22802 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2710,7 +2710,7 @@ static void ftrace_startup_sysctl(void) command = FTRACE_UPDATE_CALLS; if (ftrace_graph_active) command |= FTRACE_START_FUNC_RET; - ftrace_run_update_code(command); + ftrace_startup_enable(command); } } @@ -5580,12 +5580,12 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, if (ftrace_enabled) { - ftrace_startup_sysctl(); - /* we are starting ftrace again */ if (ftrace_ops_list != &ftrace_list_end) update_ftrace_function(); + ftrace_startup_sysctl(); + } else { /* stopping ftrace calls (just send to ftrace_stub) */ ftrace_trace_function = ftrace_stub; -- cgit v1.1