diff options
Diffstat (limited to 'kernel/softirq.c')
-rw-r--r-- | kernel/softirq.c | 799 |
1 files changed, 799 insertions, 0 deletions
diff --git a/kernel/softirq.c b/kernel/softirq.c new file mode 100644 index 0000000..e7c69a7 --- /dev/null +++ b/kernel/softirq.c @@ -0,0 +1,799 @@ +/* + * linux/kernel/softirq.c + * + * Copyright (C) 1992 Linus Torvalds + * + * Distribute under GPLv2. + * + * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) + * + * Remote softirq infrastructure is by Jens Axboe. + */ + +#include <linux/module.h> +#include <linux/kernel_stat.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/notifier.h> +#include <linux/percpu.h> +#include <linux/cpu.h> +#include <linux/freezer.h> +#include <linux/kthread.h> +#include <linux/rcupdate.h> +#include <linux/smp.h> +#include <linux/tick.h> + +#include <asm/irq.h> +/* + - No shared variables, all the data are CPU local. + - If a softirq needs serialization, let it serialize itself + by its own spinlocks. + - Even if softirq is serialized, only local cpu is marked for + execution. Hence, we get something sort of weak cpu binding. + Though it is still not clear, will it result in better locality + or will not. + + Examples: + - NET RX softirq. It is multithreaded and does not require + any global serialization. + - NET TX softirq. It kicks software netdevice queues, hence + it is logically serialized per device, but this serialization + is invisible to common code. + - Tasklets: serialized wrt itself. + */ + +#ifndef __ARCH_IRQ_STAT +irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned; +EXPORT_SYMBOL(irq_stat); +#endif + +static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; + +static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); + +/* + * we cannot loop indefinitely here to avoid userspace starvation, + * but we also don't want to introduce a worst case 1/HZ latency + * to the pending events, so lets the scheduler to balance + * the softirq load for us. + */ +static inline void wakeup_softirqd(void) +{ + /* Interrupts are disabled: no need to stop preemption */ + struct task_struct *tsk = __get_cpu_var(ksoftirqd); + + if (tsk && tsk->state != TASK_RUNNING) + wake_up_process(tsk); +} + +/* + * This one is for softirq.c-internal use, + * where hardirqs are disabled legitimately: + */ +#ifdef CONFIG_TRACE_IRQFLAGS +static void __local_bh_disable(unsigned long ip) +{ + unsigned long flags; + + WARN_ON_ONCE(in_irq()); + + raw_local_irq_save(flags); + add_preempt_count(SOFTIRQ_OFFSET); + /* + * Were softirqs turned off above: + */ + if (softirq_count() == SOFTIRQ_OFFSET) + trace_softirqs_off(ip); + raw_local_irq_restore(flags); +} +#else /* !CONFIG_TRACE_IRQFLAGS */ +static inline void __local_bh_disable(unsigned long ip) +{ + add_preempt_count(SOFTIRQ_OFFSET); + barrier(); +} +#endif /* CONFIG_TRACE_IRQFLAGS */ + +void local_bh_disable(void) +{ + __local_bh_disable((unsigned long)__builtin_return_address(0)); +} + +EXPORT_SYMBOL(local_bh_disable); + +void __local_bh_enable(void) +{ + WARN_ON_ONCE(in_irq()); + + /* + * softirqs should never be enabled by __local_bh_enable(), + * it always nests inside local_bh_enable() sections: + */ + WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET); + + sub_preempt_count(SOFTIRQ_OFFSET); +} +EXPORT_SYMBOL_GPL(__local_bh_enable); + +/* + * Special-case - softirqs can safely be enabled in + * cond_resched_softirq(), or by __do_softirq(), + * without processing still-pending softirqs: + */ +void _local_bh_enable(void) +{ + WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(!irqs_disabled()); + + if (softirq_count() == SOFTIRQ_OFFSET) + trace_softirqs_on((unsigned long)__builtin_return_address(0)); + sub_preempt_count(SOFTIRQ_OFFSET); +} + +EXPORT_SYMBOL(_local_bh_enable); + +static inline void _local_bh_enable_ip(unsigned long ip) +{ + WARN_ON_ONCE(in_irq() || irqs_disabled()); +#ifdef CONFIG_TRACE_IRQFLAGS + local_irq_disable(); +#endif + /* + * Are softirqs going to be turned on now: + */ + if (softirq_count() == SOFTIRQ_OFFSET) + trace_softirqs_on(ip); + /* + * Keep preemption disabled until we are done with + * softirq processing: + */ + sub_preempt_count(SOFTIRQ_OFFSET - 1); + + if (unlikely(!in_interrupt() && local_softirq_pending())) + do_softirq(); + + dec_preempt_count(); +#ifdef CONFIG_TRACE_IRQFLAGS + local_irq_enable(); +#endif + preempt_check_resched(); +} + +void local_bh_enable(void) +{ + _local_bh_enable_ip((unsigned long)__builtin_return_address(0)); +} +EXPORT_SYMBOL(local_bh_enable); + +void local_bh_enable_ip(unsigned long ip) +{ + _local_bh_enable_ip(ip); +} +EXPORT_SYMBOL(local_bh_enable_ip); + +/* + * We restart softirq processing MAX_SOFTIRQ_RESTART times, + * and we fall back to softirqd after that. + * + * This number has been established via experimentation. + * The two things to balance is latency against fairness - + * we want to handle softirqs as soon as possible, but they + * should not be able to lock up the box. + */ +#define MAX_SOFTIRQ_RESTART 10 + +asmlinkage void __do_softirq(void) +{ + struct softirq_action *h; + __u32 pending; + int max_restart = MAX_SOFTIRQ_RESTART; + int cpu; + + pending = local_softirq_pending(); + account_system_vtime(current); + + __local_bh_disable((unsigned long)__builtin_return_address(0)); + trace_softirq_enter(); + + cpu = smp_processor_id(); +restart: + /* Reset the pending bitmask before enabling irqs */ + set_softirq_pending(0); + + local_irq_enable(); + + h = softirq_vec; + + do { + if (pending & 1) { + int prev_count = preempt_count(); + + h->action(h); + + if (unlikely(prev_count != preempt_count())) { + printk(KERN_ERR "huh, entered softirq %td %p" + "with preempt_count %08x," + " exited with %08x?\n", h - softirq_vec, + h->action, prev_count, preempt_count()); + preempt_count() = prev_count; + } + + rcu_bh_qsctr_inc(cpu); + } + h++; + pending >>= 1; + } while (pending); + + local_irq_disable(); + + pending = local_softirq_pending(); + if (pending && --max_restart) + goto restart; + + if (pending) + wakeup_softirqd(); + + trace_softirq_exit(); + + account_system_vtime(current); + _local_bh_enable(); +} + +#ifndef __ARCH_HAS_DO_SOFTIRQ + +asmlinkage void do_softirq(void) +{ + __u32 pending; + unsigned long flags; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + pending = local_softirq_pending(); + + if (pending) + __do_softirq(); + + local_irq_restore(flags); +} + +#endif + +/* + * Enter an interrupt context. + */ +void irq_enter(void) +{ + int cpu = smp_processor_id(); + + if (idle_cpu(cpu) && !in_interrupt()) { + __irq_enter(); + tick_check_idle(cpu); + } else + __irq_enter(); +} + +#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED +# define invoke_softirq() __do_softirq() +#else +# define invoke_softirq() do_softirq() +#endif + +/* + * Exit an interrupt context. Process softirqs if needed and possible: + */ +void irq_exit(void) +{ + account_system_vtime(current); + trace_hardirq_exit(); + sub_preempt_count(IRQ_EXIT_OFFSET); + if (!in_interrupt() && local_softirq_pending()) + invoke_softirq(); + +#ifdef CONFIG_NO_HZ + /* Make sure that timer wheel updates are propagated */ + if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) + tick_nohz_stop_sched_tick(0); + rcu_irq_exit(); +#endif + preempt_enable_no_resched(); +} + +/* + * This function must run with irqs disabled! + */ +inline void raise_softirq_irqoff(unsigned int nr) +{ + __raise_softirq_irqoff(nr); + + /* + * If we're in an interrupt or softirq, we're done + * (this also catches softirq-disabled code). We will + * actually run the softirq once we return from + * the irq or softirq. + * + * Otherwise we wake up ksoftirqd to make sure we + * schedule the softirq soon. + */ + if (!in_interrupt()) + wakeup_softirqd(); +} + +void raise_softirq(unsigned int nr) +{ + unsigned long flags; + + local_irq_save(flags); + raise_softirq_irqoff(nr); + local_irq_restore(flags); +} + +void open_softirq(int nr, void (*action)(struct softirq_action *)) +{ + softirq_vec[nr].action = action; +} + +/* Tasklets */ +struct tasklet_head +{ + struct tasklet_struct *head; + struct tasklet_struct **tail; +}; + +static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); +static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); + +void __tasklet_schedule(struct tasklet_struct *t) +{ + unsigned long flags; + + local_irq_save(flags); + t->next = NULL; + *__get_cpu_var(tasklet_vec).tail = t; + __get_cpu_var(tasklet_vec).tail = &(t->next); + raise_softirq_irqoff(TASKLET_SOFTIRQ); + local_irq_restore(flags); +} + +EXPORT_SYMBOL(__tasklet_schedule); + +void __tasklet_hi_schedule(struct tasklet_struct *t) +{ + unsigned long flags; + + local_irq_save(flags); + t->next = NULL; + *__get_cpu_var(tasklet_hi_vec).tail = t; + __get_cpu_var(tasklet_hi_vec).tail = &(t->next); + raise_softirq_irqoff(HI_SOFTIRQ); + local_irq_restore(flags); +} + +EXPORT_SYMBOL(__tasklet_hi_schedule); + +static void tasklet_action(struct softirq_action *a) +{ + struct tasklet_struct *list; + + local_irq_disable(); + list = __get_cpu_var(tasklet_vec).head; + __get_cpu_var(tasklet_vec).head = NULL; + __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head; + local_irq_enable(); + + while (list) { + struct tasklet_struct *t = list; + + list = list->next; + + if (tasklet_trylock(t)) { + if (!atomic_read(&t->count)) { + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + BUG(); + t->func(t->data); + tasklet_unlock(t); + continue; + } + tasklet_unlock(t); + } + + local_irq_disable(); + t->next = NULL; + *__get_cpu_var(tasklet_vec).tail = t; + __get_cpu_var(tasklet_vec).tail = &(t->next); + __raise_softirq_irqoff(TASKLET_SOFTIRQ); + local_irq_enable(); + } +} + +static void tasklet_hi_action(struct softirq_action *a) +{ + struct tasklet_struct *list; + + local_irq_disable(); + list = __get_cpu_var(tasklet_hi_vec).head; + __get_cpu_var(tasklet_hi_vec).head = NULL; + __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head; + local_irq_enable(); + + while (list) { + struct tasklet_struct *t = list; + + list = list->next; + + if (tasklet_trylock(t)) { + if (!atomic_read(&t->count)) { + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + BUG(); + t->func(t->data); + tasklet_unlock(t); + continue; + } + tasklet_unlock(t); + } + + local_irq_disable(); + t->next = NULL; + *__get_cpu_var(tasklet_hi_vec).tail = t; + __get_cpu_var(tasklet_hi_vec).tail = &(t->next); + __raise_softirq_irqoff(HI_SOFTIRQ); + local_irq_enable(); + } +} + + +void tasklet_init(struct tasklet_struct *t, + void (*func)(unsigned long), unsigned long data) +{ + t->next = NULL; + t->state = 0; + atomic_set(&t->count, 0); + t->func = func; + t->data = data; +} + +EXPORT_SYMBOL(tasklet_init); + +void tasklet_kill(struct tasklet_struct *t) +{ + if (in_interrupt()) + printk("Attempt to kill tasklet from interrupt\n"); + + while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { + do + yield(); + while (test_bit(TASKLET_STATE_SCHED, &t->state)); + } + tasklet_unlock_wait(t); + clear_bit(TASKLET_STATE_SCHED, &t->state); +} + +EXPORT_SYMBOL(tasklet_kill); + +DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); +EXPORT_PER_CPU_SYMBOL(softirq_work_list); + +static void __local_trigger(struct call_single_data *cp, int softirq) +{ + struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]); + + list_add_tail(&cp->list, head); + + /* Trigger the softirq only if the list was previously empty. */ + if (head->next == &cp->list) + raise_softirq_irqoff(softirq); +} + +#ifdef CONFIG_USE_GENERIC_SMP_HELPERS +static void remote_softirq_receive(void *data) +{ + struct call_single_data *cp = data; + unsigned long flags; + int softirq; + + softirq = cp->priv; + + local_irq_save(flags); + __local_trigger(cp, softirq); + local_irq_restore(flags); +} + +static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq) +{ + if (cpu_online(cpu)) { + cp->func = remote_softirq_receive; + cp->info = cp; + cp->flags = 0; + cp->priv = softirq; + + __smp_call_function_single(cpu, cp); + return 0; + } + return 1; +} +#else /* CONFIG_USE_GENERIC_SMP_HELPERS */ +static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq) +{ + return 1; +} +#endif + +/** + * __send_remote_softirq - try to schedule softirq work on a remote cpu + * @cp: private SMP call function data area + * @cpu: the remote cpu + * @this_cpu: the currently executing cpu + * @softirq: the softirq for the work + * + * Attempt to schedule softirq work on a remote cpu. If this cannot be + * done, the work is instead queued up on the local cpu. + * + * Interrupts must be disabled. + */ +void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq) +{ + if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq)) + __local_trigger(cp, softirq); +} +EXPORT_SYMBOL(__send_remote_softirq); + +/** + * send_remote_softirq - try to schedule softirq work on a remote cpu + * @cp: private SMP call function data area + * @cpu: the remote cpu + * @softirq: the softirq for the work + * + * Like __send_remote_softirq except that disabling interrupts and + * computing the current cpu is done for the caller. + */ +void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq) +{ + unsigned long flags; + int this_cpu; + + local_irq_save(flags); + this_cpu = smp_processor_id(); + __send_remote_softirq(cp, cpu, this_cpu, softirq); + local_irq_restore(flags); +} +EXPORT_SYMBOL(send_remote_softirq); + +static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + /* + * If a CPU goes away, splice its entries to the current CPU + * and trigger a run of the softirq + */ + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + int cpu = (unsigned long) hcpu; + int i; + + local_irq_disable(); + for (i = 0; i < NR_SOFTIRQS; i++) { + struct list_head *head = &per_cpu(softirq_work_list[i], cpu); + struct list_head *local_head; + + if (list_empty(head)) + continue; + + local_head = &__get_cpu_var(softirq_work_list[i]); + list_splice_init(head, local_head); + raise_softirq_irqoff(i); + } + local_irq_enable(); + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = { + .notifier_call = remote_softirq_cpu_notify, +}; + +void __init softirq_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + int i; + + per_cpu(tasklet_vec, cpu).tail = + &per_cpu(tasklet_vec, cpu).head; + per_cpu(tasklet_hi_vec, cpu).tail = + &per_cpu(tasklet_hi_vec, cpu).head; + for (i = 0; i < NR_SOFTIRQS; i++) + INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu)); + } + + register_hotcpu_notifier(&remote_softirq_cpu_notifier); + + open_softirq(TASKLET_SOFTIRQ, tasklet_action); + open_softirq(HI_SOFTIRQ, tasklet_hi_action); +} + +static int ksoftirqd(void * __bind_cpu) +{ + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop()) { + preempt_disable(); + if (!local_softirq_pending()) { + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } + + __set_current_state(TASK_RUNNING); + + while (local_softirq_pending()) { + /* Preempt disable stops cpu going offline. + If already offline, we'll be on wrong CPU: + don't process */ + if (cpu_is_offline((long)__bind_cpu)) + goto wait_to_die; + do_softirq(); + preempt_enable_no_resched(); + cond_resched(); + preempt_disable(); + } + preempt_enable(); + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + return 0; + +wait_to_die: + preempt_enable(); + /* Wait for kthread_stop */ + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +/* + * tasklet_kill_immediate is called to remove a tasklet which can already be + * scheduled for execution on @cpu. + * + * Unlike tasklet_kill, this function removes the tasklet + * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state. + * + * When this function is called, @cpu must be in the CPU_DEAD state. + */ +void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) +{ + struct tasklet_struct **i; + + BUG_ON(cpu_online(cpu)); + BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state)); + + if (!test_bit(TASKLET_STATE_SCHED, &t->state)) + return; + + /* CPU is dead, so no lock needed. */ + for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) { + if (*i == t) { + *i = t->next; + /* If this was the tail element, move the tail ptr */ + if (*i == NULL) + per_cpu(tasklet_vec, cpu).tail = i; + return; + } + } + BUG(); +} + +static void takeover_tasklets(unsigned int cpu) +{ + /* CPU is dead, so no lock needed. */ + local_irq_disable(); + + /* Find end, append list for that CPU. */ + if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) { + *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head; + __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail; + per_cpu(tasklet_vec, cpu).head = NULL; + per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; + } + raise_softirq_irqoff(TASKLET_SOFTIRQ); + + if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) { + *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head; + __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail; + per_cpu(tasklet_hi_vec, cpu).head = NULL; + per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; + } + raise_softirq_irqoff(HI_SOFTIRQ); + + local_irq_enable(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static int __cpuinit cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + int hotcpu = (unsigned long)hcpu; + struct task_struct *p; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); + if (IS_ERR(p)) { + printk("ksoftirqd for %i failed\n", hotcpu); + return NOTIFY_BAD; + } + kthread_bind(p, hotcpu); + per_cpu(ksoftirqd, hotcpu) = p; + break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + wake_up_process(per_cpu(ksoftirqd, hotcpu)); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + if (!per_cpu(ksoftirqd, hotcpu)) + break; + /* Unbind so it can run. Fall thru. */ + kthread_bind(per_cpu(ksoftirqd, hotcpu), + any_online_cpu(cpu_online_map)); + case CPU_DEAD: + case CPU_DEAD_FROZEN: { + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + + p = per_cpu(ksoftirqd, hotcpu); + per_cpu(ksoftirqd, hotcpu) = NULL; + sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); + kthread_stop(p); + takeover_tasklets(hotcpu); + break; + } +#endif /* CONFIG_HOTPLUG_CPU */ + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata cpu_nfb = { + .notifier_call = cpu_callback +}; + +static __init int spawn_ksoftirqd(void) +{ + void *cpu = (void *)(long)smp_processor_id(); + int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); + + BUG_ON(err == NOTIFY_BAD); + cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); + register_cpu_notifier(&cpu_nfb); + return 0; +} +early_initcall(spawn_ksoftirqd); + +#ifdef CONFIG_SMP +/* + * Call a function on all processors + */ +int on_each_cpu(void (*func) (void *info), void *info, int wait) +{ + int ret = 0; + + preempt_disable(); + ret = smp_call_function(func, info, wait); + local_irq_disable(); + func(info); + local_irq_enable(); + preempt_enable(); + return ret; +} +EXPORT_SYMBOL(on_each_cpu); +#endif |