summaryrefslogtreecommitdiffstats
path: root/kernel/irq_work.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 16:23:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 16:23:30 -0700
commit98959948a7ba33cf8c708626e0d2a1456397e1c6 (patch)
tree8ba9b6c2679a06e89f23bdd7018e9bb0249e3bda /kernel/irq_work.c
parentef35ad26f8ff44d2c93e29952cdb336bda729d9d (diff)
parentcd3bd4e628a6d57d66afe77835fe8d93ae3e41f8 (diff)
downloadop-kernel-dev-98959948a7ba33cf8c708626e0d2a1456397e1c6.zip
op-kernel-dev-98959948a7ba33cf8c708626e0d2a1456397e1c6.tar.gz
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - Move the nohz kick code out of the scheduler tick to a dedicated IPI, from Frederic Weisbecker. This necessiated quite some background infrastructure rework, including: * Clean up some irq-work internals * Implement remote irq-work * Implement nohz kick on top of remote irq-work * Move full dynticks timer enqueue notification to new kick * Move multi-task notification to new kick * Remove unecessary barriers on multi-task notification - Remove proliferation of wait_on_bit() action functions and allow wait_on_bit_action() functions to support a timeout. (Neil Brown) - Another round of sched/numa improvements, cleanups and fixes. (Rik van Riel) - Implement fast idling of CPUs when the system is partially loaded, for better scalability. (Tim Chen) - Restructure and fix the CPU hotplug handling code that may leave cfs_rq and rt_rq's throttled when tasks are migrated away from a dead cpu. (Kirill Tkhai) - Robustify the sched topology setup code. (Peterz Zijlstra) - Improve sched_feat() handling wrt. static_keys (Jason Baron) - Misc fixes. * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (37 commits) sched/fair: Fix 'make xmldocs' warning caused by missing description sched: Use macro for magic number of -1 for setparam sched: Robustify topology setup sched: Fix sched_setparam() policy == -1 logic sched: Allow wait_on_bit_action() functions to support a timeout sched: Remove proliferation of wait_on_bit() action functions sched/numa: Revert "Use effective_load() to balance NUMA loads" sched: Fix static_key race with sched_feat() sched: Remove extra static_key*() function indirection sched/rt: Fix replenish_dl_entity() comments to match the current upstream code sched: Transform resched_task() into resched_curr() sched/deadline: Kill task_struct->pi_top_task sched: Rework check_for_tasks() sched/rt: Enqueue just unthrottled rt_rq back on the stack in __disable_runtime() sched/fair: Disable runtime_enabled on dying rq sched/numa: Change scan period code to match intent sched/numa: Rework best node setting in task_numa_migrate() sched/numa: Examine a task move when examining a task swap sched/numa: Simplify task_numa_compare() sched/numa: Use effective_load() to balance NUMA loads ...
Diffstat (limited to 'kernel/irq_work.c')
-rw-r--r--kernel/irq_work.c110
1 files changed, 45 insertions, 65 deletions
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index a82170e..e6bcbe7 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -16,11 +16,12 @@
#include <linux/tick.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
+#include <linux/smp.h>
#include <asm/processor.h>
-static DEFINE_PER_CPU(struct llist_head, irq_work_list);
-static DEFINE_PER_CPU(int, irq_work_raised);
+static DEFINE_PER_CPU(struct llist_head, raised_list);
+static DEFINE_PER_CPU(struct llist_head, lazy_list);
/*
* Claim the entry so that no one else will poke at it.
@@ -55,12 +56,34 @@ void __weak arch_irq_work_raise(void)
*/
}
+#ifdef CONFIG_SMP
/*
- * Enqueue the irq_work @entry unless it's already pending
+ * Enqueue the irq_work @work on @cpu unless it's already pending
* somewhere.
*
* Can be re-enqueued while the callback is still in progress.
*/
+bool irq_work_queue_on(struct irq_work *work, int cpu)
+{
+ /* All work should have been flushed before going offline */
+ WARN_ON_ONCE(cpu_is_offline(cpu));
+
+ /* Arch remote IPI send/receive backend aren't NMI safe */
+ WARN_ON_ONCE(in_nmi());
+
+ /* Only queue if not already pending */
+ if (!irq_work_claim(work))
+ return false;
+
+ if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
+ arch_send_call_function_single_ipi(cpu);
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(irq_work_queue_on);
+#endif
+
+/* Enqueue the irq work @work on the current CPU */
bool irq_work_queue(struct irq_work *work)
{
/* Only queue if not already pending */
@@ -70,15 +93,13 @@ bool irq_work_queue(struct irq_work *work)
/* Queue the entry and raise the IPI if needed. */
preempt_disable();
- llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
-
- /*
- * If the work is not "lazy" or the tick is stopped, raise the irq
- * work interrupt (if supported by the arch), otherwise, just wait
- * for the next tick.
- */
- if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
- if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
+ /* If the work is "lazy", handle it from next tick if any */
+ if (work->flags & IRQ_WORK_LAZY) {
+ if (llist_add(&work->llnode, &__get_cpu_var(lazy_list)) &&
+ tick_nohz_tick_stopped())
+ arch_irq_work_raise();
+ } else {
+ if (llist_add(&work->llnode, &__get_cpu_var(raised_list)))
arch_irq_work_raise();
}
@@ -90,10 +111,11 @@ EXPORT_SYMBOL_GPL(irq_work_queue);
bool irq_work_needs_cpu(void)
{
- struct llist_head *this_list;
+ struct llist_head *raised, *lazy;
- this_list = &__get_cpu_var(irq_work_list);
- if (llist_empty(this_list))
+ raised = &__get_cpu_var(raised_list);
+ lazy = &__get_cpu_var(lazy_list);
+ if (llist_empty(raised) && llist_empty(lazy))
return false;
/* All work should have been flushed before going offline */
@@ -102,28 +124,18 @@ bool irq_work_needs_cpu(void)
return true;
}
-static void __irq_work_run(void)
+static void irq_work_run_list(struct llist_head *list)
{
unsigned long flags;
struct irq_work *work;
- struct llist_head *this_list;
struct llist_node *llnode;
+ BUG_ON(!irqs_disabled());
- /*
- * Reset the "raised" state right before we check the list because
- * an NMI may enqueue after we find the list empty from the runner.
- */
- __this_cpu_write(irq_work_raised, 0);
- barrier();
-
- this_list = &__get_cpu_var(irq_work_list);
- if (llist_empty(this_list))
+ if (llist_empty(list))
return;
- BUG_ON(!irqs_disabled());
-
- llnode = llist_del_all(this_list);
+ llnode = llist_del_all(list);
while (llnode != NULL) {
work = llist_entry(llnode, struct irq_work, llnode);
@@ -149,13 +161,13 @@ static void __irq_work_run(void)
}
/*
- * Run the irq_work entries on this cpu. Requires to be ran from hardirq
- * context with local IRQs disabled.
+ * hotplug calls this through:
+ * hotplug_cfd() -> flush_smp_call_function_queue()
*/
void irq_work_run(void)
{
- BUG_ON(!in_irq());
- __irq_work_run();
+ irq_work_run_list(&__get_cpu_var(raised_list));
+ irq_work_run_list(&__get_cpu_var(lazy_list));
}
EXPORT_SYMBOL_GPL(irq_work_run);
@@ -171,35 +183,3 @@ void irq_work_sync(struct irq_work *work)
cpu_relax();
}
EXPORT_SYMBOL_GPL(irq_work_sync);
-
-#ifdef CONFIG_HOTPLUG_CPU
-static int irq_work_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- long cpu = (long)hcpu;
-
- switch (action) {
- case CPU_DYING:
- /* Called from stop_machine */
- if (WARN_ON_ONCE(cpu != smp_processor_id()))
- break;
- __irq_work_run();
- break;
- default:
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block cpu_notify;
-
-static __init int irq_work_init_cpu_notifier(void)
-{
- cpu_notify.notifier_call = irq_work_cpu_notify;
- cpu_notify.priority = 0;
- register_cpu_notifier(&cpu_notify);
- return 0;
-}
-device_initcall(irq_work_init_cpu_notifier);
-
-#endif /* CONFIG_HOTPLUG_CPU */
OpenPOWER on IntegriCloud