From b93e0b8fa819c3d5641794ed9a07e643416aa0fd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 23 May 2014 18:10:21 +0200 Subject: irq_work: Split raised and lazy lists An irq work can be handled from two places: from the tick if the work carries the "lazy" flag and the tick is periodic, or from a self IPI. We merge all these works in a single list and we use some per cpu latch to avoid raising a self-IPI when one is already pending. Now we could do away with this ugly latch if only the list was only made of non-lazy works. Just enqueueing a work on the empty list would be enough to know if we need to raise an IPI or not. Also we are going to implement remote irq work queuing. Then the per CPU latch will need to become atomic in the global scope. That's too bad because, here as well, just enqueueing a work on an empty list of non-lazy works would be enough to know if we need to raise an IPI or not. So lets take a way out of this: split the works in two distinct lists, one for the works that can be handled by the next tick and another one for those handled by the IPI. Just checking if the latter is empty when we queue a new work is enough to know if we need to raise an IPI. Suggested-by: Peter Zijlstra Acked-by: Peter Zijlstra Cc: Andrew Morton Cc: Ingo Molnar Cc: Kevin Hilman Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- kernel/irq_work.c | 51 +++++++++++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 28 deletions(-) (limited to 'kernel/irq_work.c') diff --git a/kernel/irq_work.c b/kernel/irq_work.c index a82170e..126f254 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -19,8 +19,8 @@ #include -static DEFINE_PER_CPU(struct llist_head, irq_work_list); -static DEFINE_PER_CPU(int, irq_work_raised); +static DEFINE_PER_CPU(struct llist_head, raised_list); +static DEFINE_PER_CPU(struct llist_head, lazy_list); /* * Claim the entry so that no one else will poke at it. @@ -70,15 +70,13 @@ bool irq_work_queue(struct irq_work *work) /* Queue the entry and raise the IPI if needed. */ preempt_disable(); - llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); - - /* - * If the work is not "lazy" or the tick is stopped, raise the irq - * work interrupt (if supported by the arch), otherwise, just wait - * for the next tick. - */ - if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) { - if (!this_cpu_cmpxchg(irq_work_raised, 0, 1)) + /* If the work is "lazy", handle it from next tick if any */ + if (work->flags & IRQ_WORK_LAZY) { + if (llist_add(&work->llnode, &__get_cpu_var(lazy_list)) && + tick_nohz_tick_stopped()) + arch_irq_work_raise(); + } else { + if (llist_add(&work->llnode, &__get_cpu_var(raised_list))) arch_irq_work_raise(); } @@ -90,10 +88,11 @@ EXPORT_SYMBOL_GPL(irq_work_queue); bool irq_work_needs_cpu(void) { - struct llist_head *this_list; + struct llist_head *raised, *lazy; - this_list = &__get_cpu_var(irq_work_list); - if (llist_empty(this_list)) + raised = &__get_cpu_var(raised_list); + lazy = &__get_cpu_var(lazy_list); + if (llist_empty(raised) && llist_empty(lazy)) return false; /* All work should have been flushed before going offline */ @@ -102,28 +101,18 @@ bool irq_work_needs_cpu(void) return true; } -static void __irq_work_run(void) +static void irq_work_run_list(struct llist_head *list) { unsigned long flags; struct irq_work *work; - struct llist_head *this_list; struct llist_node *llnode; + BUG_ON(!irqs_disabled()); - /* - * Reset the "raised" state right before we check the list because - * an NMI may enqueue after we find the list empty from the runner. - */ - __this_cpu_write(irq_work_raised, 0); - barrier(); - - this_list = &__get_cpu_var(irq_work_list); - if (llist_empty(this_list)) + if (llist_empty(list)) return; - BUG_ON(!irqs_disabled()); - - llnode = llist_del_all(this_list); + llnode = llist_del_all(list); while (llnode != NULL) { work = llist_entry(llnode, struct irq_work, llnode); @@ -148,6 +137,12 @@ static void __irq_work_run(void) } } +static void __irq_work_run(void) +{ + irq_work_run_list(&__get_cpu_var(raised_list)); + irq_work_run_list(&__get_cpu_var(lazy_list)); +} + /* * Run the irq_work entries on this cpu. Requires to be ran from hardirq * context with local IRQs disabled. -- cgit v1.1 From 478850160636c4f0b2558451df0e42f8c5a10939 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 8 May 2014 01:37:48 +0200 Subject: irq_work: Implement remote queueing irq work currently only supports local callbacks. However its code is mostly ready to run remote callbacks and we have some potential user. The full nohz subsystem currently open codes its own remote irq work on top of the scheduler ipi when it wants a CPU to reevaluate its next tick. However this ad hoc solution bloats the scheduler IPI. Lets just extend the irq work subsystem to support remote queuing on top of the generic SMP IPI to handle this kind of user. This shouldn't add noticeable overhead. Suggested-by: Peter Zijlstra Acked-by: Peter Zijlstra Cc: Andrew Morton Cc: Eric Dumazet Cc: Ingo Molnar Cc: Kevin Hilman Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- kernel/irq_work.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'kernel/irq_work.c') diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 126f254..4b0a890 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -55,12 +56,34 @@ void __weak arch_irq_work_raise(void) */ } +#ifdef CONFIG_SMP /* - * Enqueue the irq_work @entry unless it's already pending + * Enqueue the irq_work @work on @cpu unless it's already pending * somewhere. * * Can be re-enqueued while the callback is still in progress. */ +bool irq_work_queue_on(struct irq_work *work, int cpu) +{ + /* All work should have been flushed before going offline */ + WARN_ON_ONCE(cpu_is_offline(cpu)); + + /* Arch remote IPI send/receive backend aren't NMI safe */ + WARN_ON_ONCE(in_nmi()); + + /* Only queue if not already pending */ + if (!irq_work_claim(work)) + return false; + + if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) + arch_send_call_function_single_ipi(cpu); + + return true; +} +EXPORT_SYMBOL_GPL(irq_work_queue_on); +#endif + +/* Enqueue the irq work @work on the current CPU */ bool irq_work_queue(struct irq_work *work) { /* Only queue if not already pending */ -- cgit v1.1 From a77353e5eb56b6c6098bfce59aff1f449451b0b7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Jun 2014 07:13:07 +0200 Subject: irq_work: Remove BUG_ON in irq_work_run() Because of a collision with 8d056c48e486 ("CPU hotplug, smp: flush any pending IPI callbacks before CPU offline"), which ends up calling hotplug_cfd()->flush_smp_call_function_queue()->irq_work_run(), which is not from IRQ context. And since that already calls irq_work_run() from the hotplug path, remove our entire hotplug handling. Reported-by: Stephen Warren Tested-by: Stephen Warren Reviewed-by: Srivatsa S. Bhat Cc: Frederic Weisbecker Cc: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-busatzs2gvz4v62258agipuf@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/irq_work.c | 46 ++++------------------------------------------ 1 file changed, 4 insertions(+), 42 deletions(-) (limited to 'kernel/irq_work.c') diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 4b0a890..e6bcbe7 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -160,20 +160,14 @@ static void irq_work_run_list(struct llist_head *list) } } -static void __irq_work_run(void) -{ - irq_work_run_list(&__get_cpu_var(raised_list)); - irq_work_run_list(&__get_cpu_var(lazy_list)); -} - /* - * Run the irq_work entries on this cpu. Requires to be ran from hardirq - * context with local IRQs disabled. + * hotplug calls this through: + * hotplug_cfd() -> flush_smp_call_function_queue() */ void irq_work_run(void) { - BUG_ON(!in_irq()); - __irq_work_run(); + irq_work_run_list(&__get_cpu_var(raised_list)); + irq_work_run_list(&__get_cpu_var(lazy_list)); } EXPORT_SYMBOL_GPL(irq_work_run); @@ -189,35 +183,3 @@ void irq_work_sync(struct irq_work *work) cpu_relax(); } EXPORT_SYMBOL_GPL(irq_work_sync); - -#ifdef CONFIG_HOTPLUG_CPU -static int irq_work_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - long cpu = (long)hcpu; - - switch (action) { - case CPU_DYING: - /* Called from stop_machine */ - if (WARN_ON_ONCE(cpu != smp_processor_id())) - break; - __irq_work_run(); - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block cpu_notify; - -static __init int irq_work_init_cpu_notifier(void) -{ - cpu_notify.notifier_call = irq_work_cpu_notify; - cpu_notify.priority = 0; - register_cpu_notifier(&cpu_notify); - return 0; -} -device_initcall(irq_work_init_cpu_notifier); - -#endif /* CONFIG_HOTPLUG_CPU */ -- cgit v1.1