From ca109491f612aab5c8152207631c0444f63da97f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Nov 2008 12:43:51 +0100 Subject: hrtimer: removing all ur callback modes Impact: cleanup, move all hrtimer processing into hardirq context This is an attempt at removing some of the hrtimer complexity by reducing the number of callback modes to 1. This means that all hrtimer callback functions will be ran from HARD-irq context. I went through all the 30 odd hrtimer callback functions in the kernel and saw only one that I'm not quite sure of, which is the one in net/can/bcm.c - hence I'm CC-ing the folks responsible for that code. Furthermore, the hrtimer core now calls callbacks directly with IRQs disabled in case you try to enqueue an expired timer. If this timer is a periodic timer (which should use hrtimer_forward() to advance its time) then it might be possible to end up in an inf. recursive loop due to the fact that hrtimer_forward() doesn't round up to the next timer granularity, and therefore keeps on calling the callback - obviously this needs a fix. Aside from that, this seems to compile and actually boot on my dual core test box - although I'm sure there are some bugs in, me not hitting any makes me certain :-) Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 280 ++++++------------------------------------------------- 1 file changed, 31 insertions(+), 249 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 47e6334..efd6f41 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -442,22 +442,6 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer) { } static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } #endif -/* - * Check, whether the timer is on the callback pending list - */ -static inline int hrtimer_cb_pending(const struct hrtimer *timer) -{ - return timer->state & HRTIMER_STATE_PENDING; -} - -/* - * Remove a timer from the callback pending list - */ -static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) -{ - list_del_init(&timer->cb_entry); -} - /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -651,6 +635,8 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } +static void __run_hrtimer(struct hrtimer *timer); + /* * When High resolution timers are active, try to reprogram. Note, that in case * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry @@ -661,31 +647,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, struct hrtimer_clock_base *base) { if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { - - /* Timer is expired, act upon the callback mode */ - switch(timer->cb_mode) { - case HRTIMER_CB_IRQSAFE_PERCPU: - case HRTIMER_CB_IRQSAFE_UNLOCKED: - /* - * This is solely for the sched tick emulation with - * dynamic tick support to ensure that we do not - * restart the tick right on the edge and end up with - * the tick timer in the softirq ! The calling site - * takes care of this. Also used for hrtimer sleeper ! - */ - debug_hrtimer_deactivate(timer); - return 1; - case HRTIMER_CB_SOFTIRQ: - /* - * Move everything else into the softirq pending list ! - */ - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - timer->state = HRTIMER_STATE_PENDING; - return 1; - default: - BUG(); - } + /* + * XXX: recursion check? + * hrtimer_forward() should round up with timer granularity + * so that we never get into inf recursion here, + * it doesn't do that though + */ + __run_hrtimer(timer); + return 1; } return 0; } @@ -724,11 +693,6 @@ static int hrtimer_switch_to_hres(void) return 1; } -static inline void hrtimer_raise_softirq(void) -{ - raise_softirq(HRTIMER_SOFTIRQ); -} - #else static inline int hrtimer_hres_active(void) { return 0; } @@ -747,7 +711,6 @@ static inline int hrtimer_reprogram(struct hrtimer *timer, { return 0; } -static inline void hrtimer_raise_softirq(void) { } #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -890,10 +853,7 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, unsigned long newstate, int reprogram) { - /* High res. callback list. NOP for !HIGHRES */ - if (hrtimer_cb_pending(timer)) - hrtimer_remove_cb_pending(timer); - else { + if (timer->state & HRTIMER_STATE_ENQUEUED) { /* * Remove the timer from the rbtree and replace the * first entry pointer if necessary. @@ -953,7 +913,7 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n { struct hrtimer_clock_base *base, *new_base; unsigned long flags; - int ret, raise; + int ret; base = lock_hrtimer_base(timer, &flags); @@ -988,26 +948,8 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n enqueue_hrtimer(timer, new_base, new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); - /* - * The timer may be expired and moved to the cb_pending - * list. We can not raise the softirq with base lock held due - * to a possible deadlock with runqueue lock. - */ - raise = timer->state == HRTIMER_STATE_PENDING; - - /* - * We use preempt_disable to prevent this task from migrating after - * setting up the softirq and raising it. Otherwise, if me migrate - * we will raise the softirq on the wrong CPU. - */ - preempt_disable(); - unlock_hrtimer_base(timer, &flags); - if (raise) - hrtimer_raise_softirq(); - preempt_enable(); - return ret; } EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); @@ -1192,75 +1134,6 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) } EXPORT_SYMBOL_GPL(hrtimer_get_res); -static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) -{ - spin_lock_irq(&cpu_base->lock); - - while (!list_empty(&cpu_base->cb_pending)) { - enum hrtimer_restart (*fn)(struct hrtimer *); - struct hrtimer *timer; - int restart; - int emulate_hardirq_ctx = 0; - - timer = list_entry(cpu_base->cb_pending.next, - struct hrtimer, cb_entry); - - debug_hrtimer_deactivate(timer); - timer_stats_account_hrtimer(timer); - - fn = timer->function; - /* - * A timer might have been added to the cb_pending list - * when it was migrated during a cpu-offline operation. - * Emulate hardirq context for such timers. - */ - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || - timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) - emulate_hardirq_ctx = 1; - - __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); - spin_unlock_irq(&cpu_base->lock); - - if (unlikely(emulate_hardirq_ctx)) { - local_irq_disable(); - restart = fn(timer); - local_irq_enable(); - } else - restart = fn(timer); - - spin_lock_irq(&cpu_base->lock); - - timer->state &= ~HRTIMER_STATE_CALLBACK; - if (restart == HRTIMER_RESTART) { - BUG_ON(hrtimer_active(timer)); - /* - * Enqueue the timer, allow reprogramming of the event - * device - */ - enqueue_hrtimer(timer, timer->base, 1); - } else if (hrtimer_active(timer)) { - /* - * If the timer was rearmed on another CPU, reprogram - * the event device. - */ - struct hrtimer_clock_base *base = timer->base; - - if (base->first == &timer->node && - hrtimer_reprogram(timer, base)) { - /* - * Timer is expired. Thus move it from tree to - * pending list again. - */ - __remove_hrtimer(timer, base, - HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - } - } - } - spin_unlock_irq(&cpu_base->lock); -} - static void __run_hrtimer(struct hrtimer *timer) { struct hrtimer_clock_base *base = timer->base; @@ -1268,25 +1141,21 @@ static void __run_hrtimer(struct hrtimer *timer) enum hrtimer_restart (*fn)(struct hrtimer *); int restart; + WARN_ON(!irqs_disabled()); + debug_hrtimer_deactivate(timer); __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); timer_stats_account_hrtimer(timer); - fn = timer->function; - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || - timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) { - /* - * Used for scheduler timers, avoid lock inversion with - * rq->lock and tasklist_lock. - * - * These timers are required to deal with enqueue expiry - * themselves and are not allowed to migrate. - */ - spin_unlock(&cpu_base->lock); - restart = fn(timer); - spin_lock(&cpu_base->lock); - } else - restart = fn(timer); + + /* + * Because we run timers from hardirq context, there is no chance + * they get migrated to another cpu, therefore its safe to unlock + * the timer base. + */ + spin_unlock(&cpu_base->lock); + restart = fn(timer); + spin_lock(&cpu_base->lock); /* * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid @@ -1311,7 +1180,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_clock_base *base; ktime_t expires_next, now; - int i, raise = 0; + int i; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; @@ -1360,16 +1229,6 @@ void hrtimer_interrupt(struct clock_event_device *dev) break; } - /* Move softirq callbacks to the pending list */ - if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { - __remove_hrtimer(timer, base, - HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - raise = 1; - continue; - } - __run_hrtimer(timer); } spin_unlock(&cpu_base->lock); @@ -1383,10 +1242,6 @@ void hrtimer_interrupt(struct clock_event_device *dev) if (tick_program_event(expires_next, 0)) goto retry; } - - /* Raise softirq ? */ - if (raise) - raise_softirq(HRTIMER_SOFTIRQ); } /** @@ -1413,11 +1268,6 @@ void hrtimer_peek_ahead_timers(void) local_irq_restore(flags); } -static void run_hrtimer_softirq(struct softirq_action *h) -{ - run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); -} - #endif /* CONFIG_HIGH_RES_TIMERS */ /* @@ -1429,8 +1279,6 @@ static void run_hrtimer_softirq(struct softirq_action *h) */ void hrtimer_run_pending(void) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - if (hrtimer_hres_active()) return; @@ -1444,8 +1292,6 @@ void hrtimer_run_pending(void) */ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) hrtimer_switch_to_hres(); - - run_hrtimer_pending(cpu_base); } /* @@ -1482,14 +1328,6 @@ void hrtimer_run_queues(void) hrtimer_get_expires_tv64(timer)) break; - if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { - __remove_hrtimer(timer, base, - HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - continue; - } - __run_hrtimer(timer); } spin_unlock(&cpu_base->lock); @@ -1516,9 +1354,6 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) { sl->timer.function = hrtimer_wakeup; sl->task = task; -#ifdef CONFIG_HIGH_RES_TIMERS - sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; -#endif } static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) @@ -1655,18 +1490,16 @@ static void __cpuinit init_hrtimers_cpu(int cpu) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) cpu_base->clock_base[i].cpu_base = cpu_base; - INIT_LIST_HEAD(&cpu_base->cb_pending); hrtimer_init_hres(cpu_base); } #ifdef CONFIG_HOTPLUG_CPU -static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, - struct hrtimer_clock_base *new_base, int dcpu) +static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, + struct hrtimer_clock_base *new_base, int dcpu) { struct hrtimer *timer; struct rb_node *node; - int raise = 0; while ((node = rb_first(&old_base->active))) { timer = rb_entry(node, struct hrtimer, node); @@ -1674,18 +1507,6 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, debug_hrtimer_deactivate(timer); /* - * Should not happen. Per CPU timers should be - * canceled _before_ the migration code is called - */ - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) { - __remove_hrtimer(timer, old_base, - HRTIMER_STATE_INACTIVE, 0); - WARN(1, "hrtimer (%p %p)active but cpu %d dead\n", - timer, timer->function, dcpu); - continue; - } - - /* * Mark it as STATE_MIGRATE not INACTIVE otherwise the * timer could be seen as !active and just vanish away * under us on another CPU @@ -1708,48 +1529,19 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, * otherwise we end up with a stale timer. */ if (timer->state == HRTIMER_STATE_MIGRATE) { - timer->state = HRTIMER_STATE_PENDING; - list_add_tail(&timer->cb_entry, - &new_base->cpu_base->cb_pending); - raise = 1; + /* XXX: running on offline cpu */ + __run_hrtimer(timer); } #endif /* Clear the migration state bit */ timer->state &= ~HRTIMER_STATE_MIGRATE; } - return raise; } -#ifdef CONFIG_HIGH_RES_TIMERS -static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base, - struct hrtimer_cpu_base *new_base) -{ - struct hrtimer *timer; - int raise = 0; - - while (!list_empty(&old_base->cb_pending)) { - timer = list_entry(old_base->cb_pending.next, - struct hrtimer, cb_entry); - - __remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0); - timer->base = &new_base->clock_base[timer->base->index]; - list_add_tail(&timer->cb_entry, &new_base->cb_pending); - raise = 1; - } - return raise; -} -#else -static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base, - struct hrtimer_cpu_base *new_base) -{ - return 0; -} -#endif - static void migrate_hrtimers(int cpu) { struct hrtimer_cpu_base *old_base, *new_base; - int i, raise = 0; + int i; BUG_ON(cpu_online(cpu)); old_base = &per_cpu(hrtimer_bases, cpu); @@ -1764,20 +1556,13 @@ static void migrate_hrtimers(int cpu) spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - if (migrate_hrtimer_list(&old_base->clock_base[i], - &new_base->clock_base[i], cpu)) - raise = 1; + migrate_hrtimer_list(&old_base->clock_base[i], + &new_base->clock_base[i], cpu); } - if (migrate_hrtimer_pending(old_base, new_base)) - raise = 1; - spin_unlock(&old_base->lock); spin_unlock_irq(&new_base->lock); put_cpu_var(hrtimer_bases); - - if (raise) - hrtimer_raise_softirq(); } #endif /* CONFIG_HOTPLUG_CPU */ @@ -1817,9 +1602,6 @@ void __init hrtimers_init(void) hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); register_cpu_notifier(&hrtimers_nb); -#ifdef CONFIG_HIGH_RES_TIMERS - open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); -#endif } /** -- cgit v1.1 From 37810659ea7d9572c5ac284ade272f806ef8f788 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Dec 2008 11:17:10 +0100 Subject: hrtimer: removing all ur callback modes, fix hotplug Impact: fix hrtimer locking (reported by lockdep) in the CPU hotplug case This addition fixes the hotplug locking issue on my machine Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 65 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 28 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index efd6f41..b09c7a2 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1496,7 +1496,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu) #ifdef CONFIG_HOTPLUG_CPU static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, - struct hrtimer_clock_base *new_base, int dcpu) + struct hrtimer_clock_base *new_base) { struct hrtimer *timer; struct rb_node *node; @@ -1514,40 +1514,34 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0); timer->base = new_base; /* - * Enqueue the timer. Allow reprogramming of the event device + * Enqueue the timers on the new cpu, but do not reprogram + * the timer as that would enable a deadlock between + * hrtimer_enqueue_reprogramm() running the timer and us still + * holding a nested base lock. + * + * Instead we tickle the hrtimer interrupt after the migration + * is done, which will run all expired timers and re-programm + * the timer device. */ - enqueue_hrtimer(timer, new_base, 1); + enqueue_hrtimer(timer, new_base, 0); -#ifdef CONFIG_HIGH_RES_TIMERS - /* - * Happens with high res enabled when the timer was - * already expired and the callback mode is - * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The - * enqueue code does not move them to the soft irq - * pending list for performance/latency reasons, but - * in the migration state, we need to do that - * otherwise we end up with a stale timer. - */ - if (timer->state == HRTIMER_STATE_MIGRATE) { - /* XXX: running on offline cpu */ - __run_hrtimer(timer); - } -#endif /* Clear the migration state bit */ timer->state &= ~HRTIMER_STATE_MIGRATE; } } -static void migrate_hrtimers(int cpu) +static int migrate_hrtimers(int scpu) { struct hrtimer_cpu_base *old_base, *new_base; - int i; + int dcpu, i; - BUG_ON(cpu_online(cpu)); - old_base = &per_cpu(hrtimer_bases, cpu); + BUG_ON(cpu_online(scpu)); + old_base = &per_cpu(hrtimer_bases, scpu); new_base = &get_cpu_var(hrtimer_bases); - tick_cancel_sched_timer(cpu); + dcpu = smp_processor_id(); + + tick_cancel_sched_timer(scpu); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. @@ -1557,32 +1551,47 @@ static void migrate_hrtimers(int cpu) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], - &new_base->clock_base[i], cpu); + &new_base->clock_base[i]); } spin_unlock(&old_base->lock); spin_unlock_irq(&new_base->lock); put_cpu_var(hrtimer_bases); + + return dcpu; +} + +static void tickle_timers(void *arg) +{ + hrtimer_peek_ahead_timers(); } + #endif /* CONFIG_HOTPLUG_CPU */ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - unsigned int cpu = (long)hcpu; + int dcpu = -1, scpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - init_hrtimers_cpu(cpu); + init_hrtimers_cpu(scpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: case CPU_DEAD_FROZEN: - clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu); - migrate_hrtimers(cpu); + clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu); + dcpu = migrate_hrtimers(scpu); + break; + + case CPU_POST_DEAD: + if (dcpu == -1) + break; + + smp_call_function_single(dcpu, tickle_timers, NULL, 0); break; #endif -- cgit v1.1 From a0a99b227da57f81319dd239bc4de811b0f530ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Dec 2008 17:13:02 +0100 Subject: hrtimer: removing all ur callback modes, fix > Ingo, this addition fixes the hotplug issue on my machine And because we're all human... Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index b09c7a2..b741f85 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1571,7 +1571,7 @@ static void tickle_timers(void *arg) static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - int dcpu = -1, scpu = (long)hcpu; + int dcpu, scpu = (long)hcpu; switch (action) { @@ -1585,12 +1585,6 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, case CPU_DEAD_FROZEN: clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu); dcpu = migrate_hrtimers(scpu); - break; - - case CPU_POST_DEAD: - if (dcpu == -1) - break; - smp_call_function_single(dcpu, tickle_timers, NULL, 0); break; #endif -- cgit v1.1 From b2e3c0adec918ea22b6c9d7c76193dd3aaba9bd4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 19 Dec 2008 00:48:27 +0100 Subject: hrtimers: fix warning in kernel/hrtimer.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this warning: kernel/hrtimer.c: In function ‘hrtimer_cpu_notify’: kernel/hrtimer.c:1574: warning: unused variable ‘dcpu’ is caused because 'dcpu' is only used in the CONFIG_HOTPLUG_CPU case. Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index b741f85..bda9cb9 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1571,7 +1571,7 @@ static void tickle_timers(void *arg) static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - int dcpu, scpu = (long)hcpu; + int scpu = (long)hcpu; switch (action) { @@ -1583,10 +1583,14 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: case CPU_DEAD_FROZEN: + { + int dcpu; + clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu); dcpu = migrate_hrtimers(scpu); smp_call_function_single(dcpu, tickle_timers, NULL, 0); break; + } #endif default: -- cgit v1.1 From 51bc39f4ba35bae153b32145077fb1109bcae14c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Fri, 26 Dec 2008 12:23:00 +0900 Subject: hrtimer: remove #include Impact: cleanup can be removed and should be, because: - hrtimer doesn't use any irq feature. - shouldn't be include from generic code. Signed-off-by: KOSAKI Motohiro Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 47e6334..0ad3f3d 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -32,7 +32,6 @@ */ #include -#include #include #include #include -- cgit v1.1 From 8bdec955b0da2ffbd10eb9b200651dd1f9e366f2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 5 Jan 2009 11:28:19 +0100 Subject: hrtimer: splitout peek ahead functionality Impact: cleanup Provide a peek ahead function that assumes irqs disabled, allows for micro optimizations. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index eb2bfef..8f7001c 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1243,6 +1243,22 @@ void hrtimer_interrupt(struct clock_event_device *dev) } } +/* + * local version of hrtimer_peek_ahead_timers() called with interrupts + * disabled. + */ +static void __hrtimer_peek_ahead_timers(void) +{ + struct tick_device *td; + + if (!hrtimer_hres_active()) + return; + + td = &__get_cpu_var(tick_cpu_device); + if (td && td->evtdev) + hrtimer_interrupt(td->evtdev); +} + /** * hrtimer_peek_ahead_timers -- run soft-expired timers now * @@ -1254,16 +1270,10 @@ void hrtimer_interrupt(struct clock_event_device *dev) */ void hrtimer_peek_ahead_timers(void) { - struct tick_device *td; unsigned long flags; - if (!hrtimer_hres_active()) - return; - local_irq_save(flags); - td = &__get_cpu_var(tick_cpu_device); - if (td && td->evtdev) - hrtimer_interrupt(td->evtdev); + __hrtimer_peek_ahead_timers(); local_irq_restore(flags); } -- cgit v1.1 From d5fd43c4ae04523e1dcd7794f9c511b289851350 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 5 Jan 2009 11:28:20 +0100 Subject: hrtimer: fix HOTPLUG_CPU=n compile warning Impact: cleanup kernel/hrtimer.c: In function 'hrtimer_cpu_notify': kernel/hrtimer.c:1574: warning: unused variable 'dcpu' Introduced by commit 37810659ea7d9572c5ac284ade272f806ef8f788 ("hrtimer: removing all ur callback modes, fix hotplug") from the timers. dcpu is only used if CONFIG_HOTPLUG_CPU is set. Reported-by: Stephen Rothwell Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 8f7001c..9c2bfa8 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1504,6 +1504,11 @@ static void __cpuinit init_hrtimers_cpu(int cpu) #ifdef CONFIG_HOTPLUG_CPU +static void tickle_timers(void *arg) +{ + hrtimer_peek_ahead_timers(); +} + static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, struct hrtimer_clock_base *new_base) { @@ -1539,7 +1544,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, } } -static int migrate_hrtimers(int scpu) +static void migrate_hrtimers(int scpu) { struct hrtimer_cpu_base *old_base, *new_base; int dcpu, i; @@ -1567,12 +1572,7 @@ static int migrate_hrtimers(int scpu) spin_unlock_irq(&new_base->lock); put_cpu_var(hrtimer_bases); - return dcpu; -} - -static void tickle_timers(void *arg) -{ - hrtimer_peek_ahead_timers(); + smp_call_function_single(dcpu, tickle_timers, NULL, 0); } #endif /* CONFIG_HOTPLUG_CPU */ @@ -1593,11 +1593,8 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, case CPU_DEAD: case CPU_DEAD_FROZEN: { - int dcpu; - clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu); - dcpu = migrate_hrtimers(scpu); - smp_call_function_single(dcpu, tickle_timers, NULL, 0); + migrate_hrtimers(scpu); break; } #endif -- cgit v1.1 From 731a55ba0f17064f85903b7bf8e24849ec6cfa20 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 5 Jan 2009 11:28:21 +0100 Subject: hrtimer: simplify hotplug migration Impact: cleanup No need for a smp function call, which is likely to run on the same CPU anyway. We can just call hrtimers_peek_ahead() in the interrupts disabled section of migrate_hrtimers(). Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 9c2bfa8..8010a67 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1504,11 +1504,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu) #ifdef CONFIG_HOTPLUG_CPU -static void tickle_timers(void *arg) -{ - hrtimer_peek_ahead_timers(); -} - static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, struct hrtimer_clock_base *new_base) { @@ -1547,20 +1542,19 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, static void migrate_hrtimers(int scpu) { struct hrtimer_cpu_base *old_base, *new_base; - int dcpu, i; + int i; BUG_ON(cpu_online(scpu)); - old_base = &per_cpu(hrtimer_bases, scpu); - new_base = &get_cpu_var(hrtimer_bases); - - dcpu = smp_processor_id(); - tick_cancel_sched_timer(scpu); + + local_irq_disable(); + old_base = &per_cpu(hrtimer_bases, scpu); + new_base = &__get_cpu_var(hrtimer_bases); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - spin_lock_irq(&new_base->lock); + spin_lock(&new_base->lock); spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { @@ -1569,10 +1563,11 @@ static void migrate_hrtimers(int scpu) } spin_unlock(&old_base->lock); - spin_unlock_irq(&new_base->lock); - put_cpu_var(hrtimer_bases); + spin_unlock(&new_base->lock); - smp_call_function_single(dcpu, tickle_timers, NULL, 0); + /* Check, if we got expired work to do */ + __hrtimer_peek_ahead_timers(); + local_irq_enable(); } #endif /* CONFIG_HOTPLUG_CPU */ -- cgit v1.1 From a6037b61c2f5fc99c57c15b26d7cfa58bbb34008 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 Jan 2009 11:28:22 +0100 Subject: hrtimer: fix recursion deadlock by re-introducing the softirq Impact: fix rare runtime deadlock There are a few sites that do: spin_lock_irq(&foo) hrtimer_start(&bar) __run_hrtimer(&bar) func() spin_lock(&foo) which obviously deadlocks. In order to avoid this, never call __run_hrtimer() from hrtimer_start*() context, but instead defer this to softirq context. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 60 +++++++++++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 33 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 8010a67..b68e98f 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -634,7 +634,6 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } -static void __run_hrtimer(struct hrtimer *timer); /* * When High resolution timers are active, try to reprogram. Note, that in case @@ -646,13 +645,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, struct hrtimer_clock_base *base) { if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { - /* - * XXX: recursion check? - * hrtimer_forward() should round up with timer granularity - * so that we never get into inf recursion here, - * it doesn't do that though - */ - __run_hrtimer(timer); + spin_unlock(&base->cpu_base->lock); + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + spin_lock(&base->cpu_base->lock); return 1; } return 0; @@ -705,11 +700,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, } static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } -static inline int hrtimer_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) -{ - return 0; -} #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -780,9 +770,11 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); * * The timer is inserted in expiry order. Insertion into the * red black tree is O(log(n)). Must hold the base lock. + * + * Returns 1 when the new timer is the leftmost timer in the tree. */ -static void enqueue_hrtimer(struct hrtimer *timer, - struct hrtimer_clock_base *base, int reprogram) +static int enqueue_hrtimer(struct hrtimer *timer, + struct hrtimer_clock_base *base) { struct rb_node **link = &base->active.rb_node; struct rb_node *parent = NULL; @@ -814,20 +806,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, * Insert the timer to the rbtree and check whether it * replaces the first pending timer */ - if (leftmost) { - /* - * Reprogram the clock event device. When the timer is already - * expired hrtimer_enqueue_reprogram has either called the - * callback or added it to the pending list and raised the - * softirq. - * - * This is a NOP for !HIGHRES - */ - if (reprogram && hrtimer_enqueue_reprogram(timer, base)) - return; - + if (leftmost) base->first = &timer->node; - } rb_link_node(&timer->node, parent, link); rb_insert_color(&timer->node, &base->active); @@ -836,6 +816,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, * state of a possibly running callback. */ timer->state |= HRTIMER_STATE_ENQUEUED; + + return leftmost; } /* @@ -912,7 +894,7 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n { struct hrtimer_clock_base *base, *new_base; unsigned long flags; - int ret; + int ret, leftmost; base = lock_hrtimer_base(timer, &flags); @@ -940,12 +922,16 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n timer_stats_hrtimer_set_start_info(timer); + leftmost = enqueue_hrtimer(timer, new_base); + /* * Only allow reprogramming if the new base is on this CPU. * (it might still be on another CPU if the timer was pending) + * + * XXX send_remote_softirq() ? */ - enqueue_hrtimer(timer, new_base, - new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); + if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) + hrtimer_enqueue_reprogram(timer, new_base); unlock_hrtimer_base(timer, &flags); @@ -1163,7 +1149,7 @@ static void __run_hrtimer(struct hrtimer *timer) */ if (restart != HRTIMER_NORESTART) { BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); - enqueue_hrtimer(timer, base, 0); + enqueue_hrtimer(timer, base); } timer->state &= ~HRTIMER_STATE_CALLBACK; } @@ -1277,6 +1263,11 @@ void hrtimer_peek_ahead_timers(void) local_irq_restore(flags); } +static void run_hrtimer_softirq(struct softirq_action *h) +{ + hrtimer_peek_ahead_timers(); +} + #endif /* CONFIG_HIGH_RES_TIMERS */ /* @@ -1532,7 +1523,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, * is done, which will run all expired timers and re-programm * the timer device. */ - enqueue_hrtimer(timer, new_base, 0); + enqueue_hrtimer(timer, new_base); /* Clear the migration state bit */ timer->state &= ~HRTIMER_STATE_MIGRATE; @@ -1610,6 +1601,9 @@ void __init hrtimers_init(void) hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); register_cpu_notifier(&hrtimers_nb); +#ifdef CONFIG_HIGH_RES_TIMERS + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); +#endif } /** -- cgit v1.1 From e3f1d883740b09e5116d4d4e30a6a6987264a83c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 5 Jan 2009 11:28:23 +0100 Subject: hrtimer: fixup comments Clean up the comments Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index b68e98f..aa024f2 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1143,9 +1143,9 @@ static void __run_hrtimer(struct hrtimer *timer) spin_lock(&cpu_base->lock); /* - * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid - * reprogramming of the event hardware. This happens at the end of this - * function anyway. + * Note: We clear the CALLBACK bit after enqueue_hrtimer and + * we do not reprogramm the event hardware. Happens either in + * hrtimer_start_range_ns() or in hrtimer_interrupt() */ if (restart != HRTIMER_NORESTART) { BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); @@ -1514,14 +1514,12 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0); timer->base = new_base; /* - * Enqueue the timers on the new cpu, but do not reprogram - * the timer as that would enable a deadlock between - * hrtimer_enqueue_reprogramm() running the timer and us still - * holding a nested base lock. - * - * Instead we tickle the hrtimer interrupt after the migration - * is done, which will run all expired timers and re-programm - * the timer device. + * Enqueue the timers on the new cpu. This does not + * reprogram the event device in case the timer + * expires before the earliest on this CPU, but we run + * hrtimer_interrupt after we migrated everything to + * sort out already expired timers and reprogram the + * event device. */ enqueue_hrtimer(timer, new_base); -- cgit v1.1 From 82c5b7b527ccc4b5d3cf832437e842f9d2920a79 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 5 Jan 2009 14:11:10 +0100 Subject: hrtimer: splitout peek ahead functionality, fix Impact: build fix on !CONFIG_HIGH_RES_TIMERS Fix: kernel/hrtimer.c:1586: error: implicit declaration of function '__hrtimer_peek_ahead_timers' Signen-off-by: Ingo Molnar --- kernel/hrtimer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index aa024f2..1455b76 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1268,7 +1268,11 @@ static void run_hrtimer_softirq(struct softirq_action *h) hrtimer_peek_ahead_timers(); } -#endif /* CONFIG_HIGH_RES_TIMERS */ +#else /* CONFIG_HIGH_RES_TIMERS */ + +static inline void __hrtimer_peek_ahead_timers(void) { } + +#endif /* !CONFIG_HIGH_RES_TIMERS */ /* * Called from timer softirq every jiffy, expire hrtimers: -- cgit v1.1