From bd624d75db21ea5402f9ecf4450b311794d80352 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 13 Feb 2015 08:54:56 +0800 Subject: clockevents: Introduce mode specific callbacks It is not possible for the clockevents core to know which modes (other than those with a corresponding feature flag) are supported by a particular implementation. And drivers are expected to handle transition to all modes elegantly, as ->set_mode() would be issued for them unconditionally. Now, adding support for a new mode complicates things a bit if we want to use the legacy ->set_mode() callback. We need to closely review all clockevents drivers to see if they would break on addition of a new mode. And after such reviews, it is found that we have to do non-trivial changes to most of the drivers [1]. Introduce mode-specific set_mode_*() callbacks, some of which the drivers may or may not implement. A missing callback would clearly convey the message that the corresponding mode isn't supported. A driver may still choose to keep supporting the legacy ->set_mode() callback, but ->set_mode() wouldn't be supporting any new modes beyond RESUME. If a driver wants to benefit from using a new mode, it would be required to migrate to the mode specific callbacks. The legacy ->set_mode() callback and the newly introduced mode-specific callbacks are mutually exclusive. Only one of them should be supported by the driver. Sanity check is done at the time of registration to distinguish between optional and required callbacks and to make error recovery and handling simpler. If the legacy ->set_mode() callback is provided, all mode specific ones would be ignored by the core but a warning is thrown if they are present. Call sites calling ->set_mode() directly are also updated to use __clockevents_set_mode() instead, as ->set_mode() may not be available anymore for few drivers. [1] https://lkml.org/lkml/2014/12/9/605 [2] https://lkml.org/lkml/2015/1/23/255 Suggested-by: Thomas Gleixner [2] Signed-off-by: Viresh Kumar Signed-off-by: Peter Zijlstra (Intel) Cc: Daniel Lezcano Cc: Frederic Weisbecker Cc: John Stultz Cc: Kevin Hilman Cc: Linus Torvalds Cc: Preeti U Murthy Cc: linaro-kernel@lists.linaro.org Cc: linaro-networking@linaro.org Link: http://lkml.kernel.org/r/792d59a40423f0acffc9bb0bec9de1341a06fa02.1423788565.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 21 +++++++++-- kernel/time/clockevents.c | 88 ++++++++++++++++++++++++++++++++++++++++++++-- kernel/time/timer_list.c | 32 +++++++++++++++-- 3 files changed, 134 insertions(+), 7 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 2e4cb67..59af26b 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -39,6 +39,8 @@ enum clock_event_mode { CLOCK_EVT_MODE_PERIODIC, CLOCK_EVT_MODE_ONESHOT, CLOCK_EVT_MODE_RESUME, + + /* Legacy ->set_mode() callback doesn't support below modes */ }; /* @@ -81,7 +83,11 @@ enum clock_event_mode { * @mode: operating mode assigned by the management code * @features: features * @retries: number of forced programming retries - * @set_mode: set mode function + * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. + * @set_mode_periodic: switch mode to periodic, if !set_mode + * @set_mode_oneshot: switch mode to oneshot, if !set_mode + * @set_mode_shutdown: switch mode to shutdown, if !set_mode + * @set_mode_resume: resume clkevt device, if !set_mode * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration @@ -108,9 +114,20 @@ struct clock_event_device { unsigned int features; unsigned long retries; - void (*broadcast)(const struct cpumask *mask); + /* + * Mode transition callback(s): Only one of the two groups should be + * defined: + * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. + * - set_mode_{shutdown|periodic|oneshot|resume}(). + */ void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); + int (*set_mode_periodic)(struct clock_event_device *); + int (*set_mode_oneshot)(struct clock_event_device *); + int (*set_mode_shutdown)(struct clock_event_device *); + int (*set_mode_resume)(struct clock_event_device *); + + void (*broadcast)(const struct cpumask *mask); void (*suspend)(struct clock_event_device *); void (*resume)(struct clock_event_device *); unsigned long min_delta_ticks; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 5544990..489642b 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -94,6 +94,57 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) } EXPORT_SYMBOL_GPL(clockevent_delta2ns); +static int __clockevents_set_mode(struct clock_event_device *dev, + enum clock_event_mode mode) +{ + /* Transition with legacy set_mode() callback */ + if (dev->set_mode) { + /* Legacy callback doesn't support new modes */ + if (mode > CLOCK_EVT_MODE_RESUME) + return -ENOSYS; + dev->set_mode(mode, dev); + return 0; + } + + if (dev->features & CLOCK_EVT_FEAT_DUMMY) + return 0; + + /* Transition with new mode-specific callbacks */ + switch (mode) { + case CLOCK_EVT_MODE_UNUSED: + /* + * This is an internal state, which is guaranteed to go from + * SHUTDOWN to UNUSED. No driver interaction required. + */ + return 0; + + case CLOCK_EVT_MODE_SHUTDOWN: + return dev->set_mode_shutdown(dev); + + case CLOCK_EVT_MODE_PERIODIC: + /* Core internal bug */ + if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC)) + return -ENOSYS; + return dev->set_mode_periodic(dev); + + case CLOCK_EVT_MODE_ONESHOT: + /* Core internal bug */ + if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) + return -ENOSYS; + return dev->set_mode_oneshot(dev); + + case CLOCK_EVT_MODE_RESUME: + /* Optional callback */ + if (dev->set_mode_resume) + return dev->set_mode_resume(dev); + else + return 0; + + default: + return -ENOSYS; + } +} + /** * clockevents_set_mode - set the operating mode of a clock event device * @dev: device to modify @@ -105,7 +156,9 @@ void clockevents_set_mode(struct clock_event_device *dev, enum clock_event_mode mode) { if (dev->mode != mode) { - dev->set_mode(mode, dev); + if (__clockevents_set_mode(dev, mode)) + return; + dev->mode = mode; /* @@ -373,6 +426,35 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) } EXPORT_SYMBOL_GPL(clockevents_unbind); +/* Sanity check of mode transition callbacks */ +static int clockevents_sanity_check(struct clock_event_device *dev) +{ + /* Legacy set_mode() callback */ + if (dev->set_mode) { + /* We shouldn't be supporting new modes now */ + WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot || + dev->set_mode_shutdown || dev->set_mode_resume); + return 0; + } + + if (dev->features & CLOCK_EVT_FEAT_DUMMY) + return 0; + + /* New mode-specific callbacks */ + if (!dev->set_mode_shutdown) + return -EINVAL; + + if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && + !dev->set_mode_periodic) + return -EINVAL; + + if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) && + !dev->set_mode_oneshot) + return -EINVAL; + + return 0; +} + /** * clockevents_register_device - register a clock event device * @dev: device to register @@ -382,6 +464,8 @@ void clockevents_register_device(struct clock_event_device *dev) unsigned long flags; BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); + BUG_ON(clockevents_sanity_check(dev)); + if (!dev->cpumask) { WARN_ON(num_possible_cpus() > 1); dev->cpumask = cpumask_of(smp_processor_id()); @@ -449,7 +533,7 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq) return clockevents_program_event(dev, dev->next_event, false); if (dev->mode == CLOCK_EVT_MODE_PERIODIC) - dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev); + return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); return 0; } diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 61ed862..2cfd194 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) print_name_offset(m, dev->set_next_event); SEQ_printf(m, "\n"); - SEQ_printf(m, " set_mode: "); - print_name_offset(m, dev->set_mode); - SEQ_printf(m, "\n"); + if (dev->set_mode) { + SEQ_printf(m, " set_mode: "); + print_name_offset(m, dev->set_mode); + SEQ_printf(m, "\n"); + } else { + if (dev->set_mode_shutdown) { + SEQ_printf(m, " shutdown: "); + print_name_offset(m, dev->set_mode_shutdown); + SEQ_printf(m, "\n"); + } + + if (dev->set_mode_periodic) { + SEQ_printf(m, " periodic: "); + print_name_offset(m, dev->set_mode_periodic); + SEQ_printf(m, "\n"); + } + + if (dev->set_mode_oneshot) { + SEQ_printf(m, " oneshot: "); + print_name_offset(m, dev->set_mode_oneshot); + SEQ_printf(m, "\n"); + } + + if (dev->set_mode_resume) { + SEQ_printf(m, " resume: "); + print_name_offset(m, dev->set_mode_resume); + SEQ_printf(m, "\n"); + } + } SEQ_printf(m, " event_handler: "); print_name_offset(m, dev->event_handler); -- cgit v1.1 From 6086e346fdea1ae64d974c94c1acacc2605567ae Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:29 -0700 Subject: clocksource: Simplify the clocks_calc_max_nsecs() logic The previous clocks_calc_max_nsecs() code had some unecessarily complex bit logic to find the max interval that could cause multiplication overflows. Since this is not in the hot path, just do the divide to make it easier to read. The previous implementation also had a subtle issue that it avoided overflows with signed 64-bit values, where as the intervals are always unsigned. This resulted in overly conservative intervals, which other safety margins were then added to, reducing the intended interval length. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 4892352..2148f41 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -476,19 +476,10 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) /* * Calculate the maximum number of cycles that we can pass to the - * cyc2ns function without overflowing a 64-bit signed result. The - * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj) - * which is equivalent to the below. - * max_cycles < (2^63)/(mult + maxadj) - * max_cycles < 2^(log2((2^63)/(mult + maxadj))) - * max_cycles < 2^(log2(2^63) - log2(mult + maxadj)) - * max_cycles < 2^(63 - log2(mult + maxadj)) - * max_cycles < 1 << (63 - log2(mult + maxadj)) - * Please note that we add 1 to the result of the log2 to account for - * any rounding errors, ensure the above inequality is satisfied and - * no overflow will occur. + * cyc2ns() function without overflowing a 64-bit result. */ - max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1)); + max_cycles = ULLONG_MAX; + do_div(max_cycles, mult+maxadj); /* * The actual maximum number of cycles we can defer the clocksource is -- cgit v1.1 From 362fde0410377e468ca00ad363fdf3e3ec42eb6a Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:30 -0700 Subject: clocksource: Simplify the logic around clocksource wrapping safety margins The clocksource logic has a number of places where we try to include a safety margin. Most of these are 12% safety margins, but they are inconsistently applied and sometimes are applied on top of each other. Additionally, in the previous patch, we corrected an issue where we unintentionally in effect created a 50% safety margin, which these 12.5% margins where then added to. So to simplify the logic here, this patch removes the various 12.5% margins, and consolidates adding the margin in one place: clocks_calc_max_nsecs(). Additionally, Linus prefers a 50% safety margin, as it allows bad clock values to be more easily caught. This should really have no net effect, due to the corrected issue earlier which caused greater then 50% margins to be used w/o issue. Signed-off-by: John Stultz Acked-by: Stephen Boyd (for the sched_clock.c bit) Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 26 ++++++++++++-------------- kernel/time/sched_clock.c | 4 ++-- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 2148f41..ace9576 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -469,6 +469,9 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) * @shift: cycle to nanosecond divisor (power of two) * @maxadj: maximum adjustment value to mult (~11%) * @mask: bitmask for two's complement subtraction of non 64 bit counters + * + * NOTE: This function includes a safety margin of 50%, so that bad clock values + * can be detected. */ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) { @@ -490,11 +493,14 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) max_cycles = min(max_cycles, mask); max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); + /* Return 50% of the actual maximum, so we can detect bad values */ + max_nsecs >>= 1; + return max_nsecs; } /** - * clocksource_max_deferment - Returns max time the clocksource can be deferred + * clocksource_max_deferment - Returns max time the clocksource should be deferred * @cs: Pointer to clocksource * */ @@ -504,13 +510,7 @@ static u64 clocksource_max_deferment(struct clocksource *cs) max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, cs->mask); - /* - * To ensure that the clocksource does not wrap whilst we are idle, - * limit the time the clocksource can be deferred by 12.5%. Please - * note a margin of 12.5% is used because this can be computed with - * a shift, versus say 10% which would require division. - */ - return max_nsecs - (max_nsecs >> 3); + return max_nsecs; } #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET @@ -659,10 +659,9 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) * conversion precision. 10 minutes is still a reasonable * amount. That results in a shift value of 24 for a * clocksource with mask >= 40bit and f >= 4GHz. That maps to - * ~ 0.06ppm granularity for NTP. We apply the same 12.5% - * margin as we do in clocksource_max_deferment() + * ~ 0.06ppm granularity for NTP. */ - sec = (cs->mask - (cs->mask >> 3)); + sec = cs->mask; do_div(sec, freq); do_div(sec, scale); if (!sec) @@ -674,9 +673,8 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) NSEC_PER_SEC / scale, sec * scale); /* - * for clocksources that have large mults, to avoid overflow. - * Since mult may be adjusted by ntp, add an safety extra margin - * + * Ensure clocksources that have large 'mult' values don't overflow + * when adjusted. */ cs->maxadj = clocksource_max_adjustment(cs); while ((cs->mult + cs->maxadj < cs->mult) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 01d2d15..3b8ae45 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -125,9 +125,9 @@ void __init sched_clock_register(u64 (*read)(void), int bits, new_mask = CLOCKSOURCE_MASK(bits); - /* calculate how many ns until we wrap */ + /* calculate how many nanosecs until we risk wrapping */ wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask); - new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); + new_wrap_kt = ns_to_ktime(wrap); /* update epoch for new counter and update epoch_ns from old counter*/ new_epoch = read(); -- cgit v1.1 From fb82fe2fe8588745edd73aa3a6229facac5c1e15 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:31 -0700 Subject: clocksource: Add 'max_cycles' to 'struct clocksource' In order to facilitate clocksource validation, add a 'max_cycles' field to the clocksource structure which will hold the maximum cycle value that can safely be multiplied without potentially causing an overflow. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 5 +++-- kernel/time/clocksource.c | 28 ++++++++++++++++------------ kernel/time/sched_clock.c | 2 +- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 9c78d15..16d048c 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -56,6 +56,7 @@ struct module; * @shift: cycle to nanosecond divisor (power of two) * @max_idle_ns: max idle time permitted by the clocksource (nsecs) * @maxadj: maximum adjustment value to mult (~11%) + * @max_cycles: maximum safe cycle value which won't overflow on multiplication * @flags: flags describing special properties * @archdata: arch-specific data * @suspend: suspend function for the clocksource, if necessary @@ -76,7 +77,7 @@ struct clocksource { #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA struct arch_clocksource_data archdata; #endif - + u64 max_cycles; const char *name; struct list_head list; int rating; @@ -189,7 +190,7 @@ extern struct clocksource * __init clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); extern u64 -clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask); +clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles); extern void clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index ace9576..fc2a9de 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -469,11 +469,13 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) * @shift: cycle to nanosecond divisor (power of two) * @maxadj: maximum adjustment value to mult (~11%) * @mask: bitmask for two's complement subtraction of non 64 bit counters + * @max_cyc: maximum cycle value before potential overflow (does not include + * any safety margin) * * NOTE: This function includes a safety margin of 50%, so that bad clock values * can be detected. */ -u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) +u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc) { u64 max_nsecs, max_cycles; @@ -493,6 +495,10 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) max_cycles = min(max_cycles, mask); max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); + /* return the max_cycles value as well if requested */ + if (max_cyc) + *max_cyc = max_cycles; + /* Return 50% of the actual maximum, so we can detect bad values */ max_nsecs >>= 1; @@ -500,17 +506,15 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) } /** - * clocksource_max_deferment - Returns max time the clocksource should be deferred - * @cs: Pointer to clocksource + * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles + * @cs: Pointer to clocksource to be updated * */ -static u64 clocksource_max_deferment(struct clocksource *cs) +static inline void clocksource_update_max_deferment(struct clocksource *cs) { - u64 max_nsecs; - - max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, - cs->mask); - return max_nsecs; + cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, + cs->maxadj, cs->mask, + &cs->max_cycles); } #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET @@ -684,7 +688,7 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) cs->maxadj = clocksource_max_adjustment(cs); } - cs->max_idle_ns = clocksource_max_deferment(cs); + clocksource_update_max_deferment(cs); } EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); @@ -730,8 +734,8 @@ int clocksource_register(struct clocksource *cs) "Clocksource %s might overflow on 11%% adjustment\n", cs->name); - /* calculate max idle time permitted for this clocksource */ - cs->max_idle_ns = clocksource_max_deferment(cs); + /* Update max idle time permitted for this clocksource */ + clocksource_update_max_deferment(cs); mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 3b8ae45..ca3bc5c 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -126,7 +126,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits, new_mask = CLOCKSOURCE_MASK(bits); /* calculate how many nanosecs until we risk wrapping */ - wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask); + wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL); new_wrap_kt = ns_to_ktime(wrap); /* update epoch for new counter and update epoch_ns from old counter*/ -- cgit v1.1 From 3c17ad19f0697ffe5ef7438cdafc2d2b7757d8a5 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:32 -0700 Subject: timekeeping: Add debugging checks to warn if we see delays Recently there's been requests for better sanity checking in the time code, so that it's more clear when something is going wrong, since timekeeping issues could manifest in a large number of strange ways in various subsystems. Thus, this patch adds some extra infrastructure to add a check to update_wall_time() to print two new warnings: 1) if we see the call delayed beyond the 'max_cycles' overflow point, 2) or if we see the call delayed beyond the clocksource's 'max_idle_ns' value, which is currently 50% of the overflow point. This extra infrastructure is conditional on a new CONFIG_DEBUG_TIMEKEEPING option, also added in this patch - default off. Tested this a bit by halting qemu for specified lengths of time to trigger the warnings. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-5-git-send-email-john.stultz@linaro.org [ Improved the changelog and the messages a bit. ] Signed-off-by: Ingo Molnar --- kernel/time/jiffies.c | 1 + kernel/time/timekeeping.c | 28 ++++++++++++++++++++++++++++ lib/Kconfig.debug | 13 +++++++++++++ 3 files changed, 42 insertions(+) diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index a6a5bf5..7e41390 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = { .mask = 0xffffffff, /*32bits*/ .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ .shift = JIFFIES_SHIFT, + .max_cycles = 10, }; __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 91db941..acf0491 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -118,6 +118,31 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) tk->offs_boot = ktime_add(tk->offs_boot, delta); } +#ifdef CONFIG_DEBUG_TIMEKEEPING +static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) +{ + + cycle_t max_cycles = tk->tkr.clock->max_cycles; + const char *name = tk->tkr.clock->name; + + if (offset > max_cycles) { + printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow\n", + offset, name, max_cycles); + printk_deferred(" timekeeping: Your kernel is sick, but tries to cope\n"); + } else { + if (offset > (max_cycles >> 1)) { + printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n", + offset, name, max_cycles >> 1); + printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n"); + } + } +} +#else +static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) +{ +} +#endif + /** * tk_setup_internals - Set up internals to use clocksource clock. * @@ -1630,6 +1655,9 @@ void update_wall_time(void) if (offset < real_tk->cycle_interval) goto out; + /* Do some additional sanity checking */ + timekeeping_check_update(real_tk, offset); + /* * With NO_HZ we may have to accumulate many cycle_intervals * (think "ticks") worth of time at once. To do this efficiently, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c5cefb3..36b6fa8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK data corruption or a sporadic crash at a later stage once the region is examined. The runtime overhead introduced is minimal. +config DEBUG_TIMEKEEPING + bool "Enable extra timekeeping sanity checking" + help + This option will enable additional timekeeping sanity checks + which may be helpful when diagnosing issues where timekeeping + problems are suspected. + + This may include checks in the timekeeping hotpaths, so this + option may have a (very small) performance impact to some + workloads. + + If unsure, say N. + config TIMER_STATS bool "Collect kernel timers statistics" depends on DEBUG_KERNEL && PROC_FS -- cgit v1.1 From a558cd021d83b65c47ee5b9bec1fcfe5298a769f Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:33 -0700 Subject: timekeeping: Add checks to cap clocksource reads to the 'max_cycles' value When calculating the current delta since the last tick, we currently have no hard protections to prevent a multiplication overflow from occuring. This patch introduces infrastructure to allow a cap that limits the clocksource read delta value to the 'max_cycles' value, which is where an overflow would occur. Since this is in the hotpath, it adds the extra checking under CONFIG_DEBUG_TIMEKEEPING=y. There was some concern that capping time like this could cause problems as we may stop expiring timers, which could go circular if the timer that triggers time accumulation were mis-scheduled too far in the future, which would cause time to stop. However, since the mult overflow would result in a smaller time value, we would effectively have the same problem there. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-6-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 49 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index acf0491..657414c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -126,9 +126,9 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) const char *name = tk->tkr.clock->name; if (offset > max_cycles) { - printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow\n", + printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n", offset, name, max_cycles); - printk_deferred(" timekeeping: Your kernel is sick, but tries to cope\n"); + printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n"); } else { if (offset > (max_cycles >> 1)) { printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n", @@ -137,10 +137,39 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) } } } + +static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) +{ + cycle_t cycle_now, delta; + + /* read clocksource */ + cycle_now = tkr->read(tkr->clock); + + /* calculate the delta since the last update_wall_time */ + delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + + /* Cap delta value to the max_cycles values to avoid mult overflows */ + if (unlikely(delta > tkr->clock->max_cycles)) + delta = tkr->clock->max_cycles; + + return delta; +} #else static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) { } +static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) +{ + cycle_t cycle_now, delta; + + /* read clocksource */ + cycle_now = tkr->read(tkr->clock); + + /* calculate the delta since the last update_wall_time */ + delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + + return delta; +} #endif /** @@ -218,14 +247,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; } static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) { - cycle_t cycle_now, delta; + cycle_t delta; s64 nsec; - /* read clocksource: */ - cycle_now = tkr->read(tkr->clock); - - /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + delta = timekeeping_get_delta(tkr); nsec = delta * tkr->mult + tkr->xtime_nsec; nsec >>= tkr->shift; @@ -237,14 +262,10 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) { struct clocksource *clock = tk->tkr.clock; - cycle_t cycle_now, delta; + cycle_t delta; s64 nsec; - /* read clocksource: */ - cycle_now = tk->tkr.read(clock); - - /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); + delta = timekeeping_get_delta(&tk->tkr); /* convert delta to nanoseconds. */ nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); -- cgit v1.1 From 057b87e3161d1194a095718f9918c01b2c389e74 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:34 -0700 Subject: timekeeping: Try to catch clocksource delta underflows In the case where there is a broken clocksource where there are multiple actual clocks that aren't perfectly aligned, we may see small "negative" deltas when we subtract 'now' from 'cycle_last'. The values are actually negative with respect to the clocksource mask value, not necessarily negative if cast to a s64, but we can check by checking the delta to see if it is a small (relative to the mask) negative value (again negative relative to the mask). If so, we assume we jumped backwards somehow and instead use zero for our delta. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-7-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 657414c..187149b 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -148,6 +148,13 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) /* calculate the delta since the last update_wall_time */ delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + /* + * Try to catch underflows by checking if we are seeing small + * mask-relative negative values. + */ + if (unlikely((~delta & tkr->mask) < (tkr->mask >> 3))) + delta = 0; + /* Cap delta value to the max_cycles values to avoid mult overflows */ if (unlikely(delta > tkr->clock->max_cycles)) delta = tkr->clock->max_cycles; -- cgit v1.1 From 4ca22c2648f9c1cec0b242f58d7302136f5a4cbb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:35 -0700 Subject: timekeeping: Add warnings when overflows or underflows are observed It was suggested that the underflow/overflow protection should probably throw some sort of warning out, rather than just silently fixing the issue. So this patch adds some warnings here. The flag variables used are not protected by locks, but since we can't print from the reading functions, just being able to say we saw an issue in the update interval is useful enough, and can be slightly racy without real consequence. The big complication is that we're only under a read seqlock, so the data could shift under us during our calculation to see if there was a problem. This patch avoids this issue by nesting another seqlock which allows us to snapshot the just required values atomically. So we shouldn't see false positives. I also added some basic rate-limiting here, since on one build machine w/ skewed TSCs it was fairly noisy at bootup. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-8-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 64 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 187149b..892f6cb 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -119,6 +119,20 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) } #ifdef CONFIG_DEBUG_TIMEKEEPING +#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ +/* + * These simple flag variables are managed + * without locks, which is racy, but ok since + * we don't really care about being super + * precise about how many events were seen, + * just that a problem was observed. + */ +static int timekeeping_underflow_seen; +static int timekeeping_overflow_seen; + +/* last_warning is only modified under the timekeeping lock */ +static long timekeeping_last_warning; + static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) { @@ -136,28 +150,64 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n"); } } + + if (timekeeping_underflow_seen) { + if (jiffies - timekeeping_last_warning > WARNING_FREQ) { + printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name); + printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); + printk_deferred(" Your kernel is probably still fine.\n"); + timekeeping_last_warning = jiffies; + } + timekeeping_underflow_seen = 0; + } + + if (timekeeping_overflow_seen) { + if (jiffies - timekeeping_last_warning > WARNING_FREQ) { + printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name); + printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); + printk_deferred(" Your kernel is probably still fine.\n"); + timekeeping_last_warning = jiffies; + } + timekeeping_overflow_seen = 0; + } } static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) { - cycle_t cycle_now, delta; + cycle_t now, last, mask, max, delta; + unsigned int seq; - /* read clocksource */ - cycle_now = tkr->read(tkr->clock); + /* + * Since we're called holding a seqlock, the data may shift + * under us while we're doing the calculation. This can cause + * false positives, since we'd note a problem but throw the + * results away. So nest another seqlock here to atomically + * grab the points we are checking with. + */ + do { + seq = read_seqcount_begin(&tk_core.seq); + now = tkr->read(tkr->clock); + last = tkr->cycle_last; + mask = tkr->mask; + max = tkr->clock->max_cycles; + } while (read_seqcount_retry(&tk_core.seq, seq)); - /* calculate the delta since the last update_wall_time */ - delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + delta = clocksource_delta(now, last, mask); /* * Try to catch underflows by checking if we are seeing small * mask-relative negative values. */ - if (unlikely((~delta & tkr->mask) < (tkr->mask >> 3))) + if (unlikely((~delta & mask) < (mask >> 3))) { + timekeeping_underflow_seen = 1; delta = 0; + } /* Cap delta value to the max_cycles values to avoid mult overflows */ - if (unlikely(delta > tkr->clock->max_cycles)) + if (unlikely(delta > max)) { + timekeeping_overflow_seen = 1; delta = tkr->clock->max_cycles; + } return delta; } -- cgit v1.1 From 0b046b217ad4c64fbbeaaac24d0648cb1fa49ad8 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:36 -0700 Subject: clocksource: Improve clocksource watchdog reporting The clocksource watchdog reporting has been less helpful then desired, as it just printed the delta between the two clocksources. This prevents any useful analysis of why the skew occurred. Thus this patch tries to improve the output when we mark a clocksource as unstable, printing out the cycle last and now values for both the current clocksource and the watchdog clocksource. This will allow us to see if the result was due to a false positive caused by a problematic watchdog. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-9-git-send-email-john.stultz@linaro.org [ Minor cleanups of kernel messages. ] Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index fc2a9de..c4cc04b 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs) schedule_work(&watchdog_work); } -static void clocksource_unstable(struct clocksource *cs, int64_t delta) -{ - printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", - cs->name, delta); - __clocksource_unstable(cs); -} - /** * clocksource_mark_unstable - mark clocksource unstable via watchdog * @cs: clocksource to be marked unstable @@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs) static void clocksource_watchdog(unsigned long data) { struct clocksource *cs; - cycle_t csnow, wdnow, delta; + cycle_t csnow, wdnow, cslast, wdlast, delta; int64_t wd_nsec, cs_nsec; int next_cpu, reset_pending; @@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data) delta = clocksource_delta(csnow, cs->cs_last, cs->mask); cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); + wdlast = cs->wd_last; /* save these in case we print them */ + cslast = cs->cs_last; cs->cs_last = csnow; cs->wd_last = wdnow; @@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data) /* Check the deviation from the watchdog clocksource. */ if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { - clocksource_unstable(cs, cs_nsec - wd_nsec); + pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name); + pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", + watchdog->name, wdnow, wdlast, watchdog->mask); + pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", + cs->name, csnow, cslast, cs->mask); + __clocksource_unstable(cs); continue; } -- cgit v1.1 From f8935983f110505daa38e8d36ee406807f83a069 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:37 -0700 Subject: clocksource: Mostly kill clocksource_register() A long running project has been to clean up remaining uses of clocksource_register(), replacing it with the simpler clocksource_register_khz/hz() functions. However, there are a few cases where we need to self-define our mult/shift values, so switch the function to a more obviously internal __clocksource_register() name, and consolidate much of the internal logic so we don't have duplication. Signed-off-by: John Stultz Cc: Dave Jones Cc: David S. Miller Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-10-git-send-email-john.stultz@linaro.org [ Minor cleanups. ] Signed-off-by: Ingo Molnar --- arch/s390/kernel/time.c | 2 +- arch/sparc/kernel/time_32.c | 2 +- include/linux/clocksource.h | 10 +++++- kernel/time/clocksource.c | 81 +++++++++++++++++++-------------------------- kernel/time/jiffies.c | 4 +-- 5 files changed, 47 insertions(+), 52 deletions(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 20660dd..6c273cd 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -283,7 +283,7 @@ void __init time_init(void) if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt)) panic("Couldn't request external interrupt 0x1406"); - if (clocksource_register(&clocksource_tod) != 0) + if (__clocksource_register(&clocksource_tod) != 0) panic("Could not register TOD clock source"); /* Enable TOD clock interrupts on the boot cpu. */ diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 2f80d23..a31c0c8 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -191,7 +191,7 @@ static __init int setup_timer_cs(void) timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate, timer_cs.shift); - return clocksource_register(&timer_cs); + return __clocksource_register(&timer_cs); } #ifdef CONFIG_SMP diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 16d048c..bd98eaa 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -179,7 +179,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) } -extern int clocksource_register(struct clocksource*); extern int clocksource_unregister(struct clocksource*); extern void clocksource_touch_watchdog(void); extern struct clocksource* clocksource_get_next(void); @@ -203,6 +202,15 @@ __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq); extern void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq); +/* + * Don't call this unless you are a default clocksource + * (AKA: jiffies) and absolutely have to. + */ +static inline int __clocksource_register(struct clocksource *cs) +{ + return __clocksource_register_scale(cs, 1, 0); +} + static inline int clocksource_register_hz(struct clocksource *cs, u32 hz) { return __clocksource_register_scale(cs, 1, hz); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c4cc04b..5cdf17e 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -656,38 +656,52 @@ static void clocksource_enqueue(struct clocksource *cs) void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) { u64 sec; + /* - * Calc the maximum number of seconds which we can run before - * wrapping around. For clocksources which have a mask > 32bit - * we need to limit the max sleep time to have a good - * conversion precision. 10 minutes is still a reasonable - * amount. That results in a shift value of 24 for a - * clocksource with mask >= 40bit and f >= 4GHz. That maps to - * ~ 0.06ppm granularity for NTP. + * Default clocksources are *special* and self-define their mult/shift. + * But, you're not special, so you should specify a freq value. */ - sec = cs->mask; - do_div(sec, freq); - do_div(sec, scale); - if (!sec) - sec = 1; - else if (sec > 600 && cs->mask > UINT_MAX) - sec = 600; - - clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, - NSEC_PER_SEC / scale, sec * scale); - + if (freq) { + /* + * Calc the maximum number of seconds which we can run before + * wrapping around. For clocksources which have a mask > 32-bit + * we need to limit the max sleep time to have a good + * conversion precision. 10 minutes is still a reasonable + * amount. That results in a shift value of 24 for a + * clocksource with mask >= 40-bit and f >= 4GHz. That maps to + * ~ 0.06ppm granularity for NTP. + */ + sec = cs->mask; + do_div(sec, freq); + do_div(sec, scale); + if (!sec) + sec = 1; + else if (sec > 600 && cs->mask > UINT_MAX) + sec = 600; + + clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, + NSEC_PER_SEC / scale, sec * scale); + } /* * Ensure clocksources that have large 'mult' values don't overflow * when adjusted. */ cs->maxadj = clocksource_max_adjustment(cs); - while ((cs->mult + cs->maxadj < cs->mult) - || (cs->mult - cs->maxadj > cs->mult)) { + while (freq && ((cs->mult + cs->maxadj < cs->mult) + || (cs->mult - cs->maxadj > cs->mult))) { cs->mult >>= 1; cs->shift--; cs->maxadj = clocksource_max_adjustment(cs); } + /* + * Only warn for *special* clocksources that self-define + * their mult/shift values and don't specify a freq. + */ + WARN_ONCE(cs->mult + cs->maxadj < cs->mult, + "timekeeping: Clocksource %s might overflow on 11%% adjustment\n", + cs->name); + clocksource_update_max_deferment(cs); } EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); @@ -719,33 +733,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) } EXPORT_SYMBOL_GPL(__clocksource_register_scale); - -/** - * clocksource_register - Used to install new clocksources - * @cs: clocksource to be registered - * - * Returns -EBUSY if registration fails, zero otherwise. - */ -int clocksource_register(struct clocksource *cs) -{ - /* calculate max adjustment for given mult/shift */ - cs->maxadj = clocksource_max_adjustment(cs); - WARN_ONCE(cs->mult + cs->maxadj < cs->mult, - "Clocksource %s might overflow on 11%% adjustment\n", - cs->name); - - /* Update max idle time permitted for this clocksource */ - clocksource_update_max_deferment(cs); - - mutex_lock(&clocksource_mutex); - clocksource_enqueue(cs); - clocksource_enqueue_watchdog(cs); - clocksource_select(); - mutex_unlock(&clocksource_mutex); - return 0; -} -EXPORT_SYMBOL(clocksource_register); - static void __clocksource_change_rating(struct clocksource *cs, int rating) { list_del(&cs->list); diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 7e41390..c4bb518 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -95,7 +95,7 @@ EXPORT_SYMBOL(jiffies); static int __init init_jiffies_clocksource(void) { - return clocksource_register(&clocksource_jiffies); + return __clocksource_register(&clocksource_jiffies); } core_initcall(init_jiffies_clocksource); @@ -131,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second) refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT; - clocksource_register(&refined_jiffies); + __clocksource_register(&refined_jiffies); return 0; } -- cgit v1.1 From 3142f76022fe46f6e0a0d3940b23fb6ccb794692 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:38 -0700 Subject: clocksource, sparc32: Convert to using clocksource_register_hz() While cleaning up some clocksource code, I noticed the time_32 implementation uses the clocksource_hz2mult() helper, but doesn't use the clocksource_register_hz() method. I don't believe the Sparc clocksource is a default clocksource, so we shouldn't need to self-define the mult/shift pair. So convert the time_32.c implementation to use clocksource_register_hz(). Untested. Signed-off-by: John Stultz Acked-by: David S. Miller Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-11-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- arch/sparc/kernel/time_32.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index a31c0c8..18147a5 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -181,17 +181,13 @@ static struct clocksource timer_cs = { .rating = 100, .read = timer_cs_read, .mask = CLOCKSOURCE_MASK(64), - .shift = 2, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; static __init int setup_timer_cs(void) { timer_cs_enabled = 1; - timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate, - timer_cs.shift); - - return __clocksource_register(&timer_cs); + return clocksource_register_hz(&timer_cs, sparc_config.clock_rate); } #ifdef CONFIG_SMP -- cgit v1.1 From 8cc8c525ad4e7b581cacf84119e1a28dcb4044db Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:39 -0700 Subject: clocksource: Add some debug info about clocksources being registered Print the mask, max_cycles, and max_idle_ns values for clocksources being registered. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-12-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 5cdf17e..1977eba 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -703,6 +703,9 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) cs->name); clocksource_update_max_deferment(cs); + + pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", + cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); } EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); -- cgit v1.1 From fba9e07208c0f9d92d9f73761c99c8612039da44 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:40 -0700 Subject: clocksource: Rename __clocksource_updatefreq_*() to __clocksource_update_freq_*() Ingo requested this function be renamed to improve readability, so I've renamed __clocksource_updatefreq_scale() as well as the __clocksource_updatefreq_hz/khz() functions to avoid squishedtogethernames. This touches some of the sh clocksources, which I've not tested. The arch/arm/plat-omap change is just a comment change for consistency. Signed-off-by: John Stultz Cc: Daniel Lezcano Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-13-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- arch/arm/plat-omap/counter_32k.c | 2 +- drivers/clocksource/em_sti.c | 2 +- drivers/clocksource/sh_cmt.c | 2 +- drivers/clocksource/sh_tmu.c | 2 +- include/linux/clocksource.h | 10 +++++----- kernel/time/clocksource.c | 11 ++++++----- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 61b4d70..43cf745 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -103,7 +103,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase) /* * 120000 rough estimate from the calculations in - * __clocksource_updatefreq_scale. + * __clocksource_update_freq_scale. */ clocks_calc_mult_shift(&persistent_mult, &persistent_shift, 32768, NSEC_PER_SEC, 120000); diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c index d0a7bd6..dc3c6ee 100644 --- a/drivers/clocksource/em_sti.c +++ b/drivers/clocksource/em_sti.c @@ -210,7 +210,7 @@ static int em_sti_clocksource_enable(struct clocksource *cs) ret = em_sti_start(p, USER_CLOCKSOURCE); if (!ret) - __clocksource_updatefreq_hz(cs, p->rate); + __clocksource_update_freq_hz(cs, p->rate); return ret; } diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 2bd13b5..b8ff3c6 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -641,7 +641,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs) ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE); if (!ret) { - __clocksource_updatefreq_hz(cs, ch->rate); + __clocksource_update_freq_hz(cs, ch->rate); ch->cs_enabled = true; } return ret; diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index f150ca82..b6b8fa3 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -272,7 +272,7 @@ static int sh_tmu_clocksource_enable(struct clocksource *cs) ret = sh_tmu_enable(ch); if (!ret) { - __clocksource_updatefreq_hz(cs, ch->rate); + __clocksource_update_freq_hz(cs, ch->rate); ch->cs_enabled = true; } diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index bd98eaa..1355098 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -200,7 +200,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); extern int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq); extern void -__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq); +__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq); /* * Don't call this unless you are a default clocksource @@ -221,14 +221,14 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz) return __clocksource_register_scale(cs, 1000, khz); } -static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz) +static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz) { - __clocksource_updatefreq_scale(cs, 1, hz); + __clocksource_update_freq_scale(cs, 1, hz); } -static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz) +static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz) { - __clocksource_updatefreq_scale(cs, 1000, khz); + __clocksource_update_freq_scale(cs, 1000, khz); } diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1977eba..c3be3c7 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -643,7 +643,7 @@ static void clocksource_enqueue(struct clocksource *cs) } /** - * __clocksource_updatefreq_scale - Used update clocksource with new freq + * __clocksource_update_freq_scale - Used update clocksource with new freq * @cs: clocksource to be registered * @scale: Scale factor multiplied against freq to get clocksource hz * @freq: clocksource frequency (cycles per second) divided by scale @@ -651,9 +651,10 @@ static void clocksource_enqueue(struct clocksource *cs) * This should only be called from the clocksource->enable() method. * * This *SHOULD NOT* be called directly! Please use the - * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions. + * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper + * functions. */ -void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) +void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq) { u64 sec; @@ -707,7 +708,7 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); } -EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); +EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale); /** * __clocksource_register_scale - Used to install new clocksources @@ -724,7 +725,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) { /* Initialize mult/shift and max_idle_ns */ - __clocksource_updatefreq_scale(cs, scale, freq); + __clocksource_update_freq_scale(cs, scale, freq); /* Add clocksource to the clocksource list */ mutex_lock(&clocksource_mutex); -- cgit v1.1 From 8710e914027e4f64058ebbf0501cc6db3cc8454f Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 26 Mar 2015 12:23:22 -0700 Subject: timers, sched/clock: Match scope of read and write seqcounts Currently the scope of the raw_write_seqcount_begin/end() in sched_clock_register() far exceeds the scope of the read section in sched_clock(). This gives the impression of safety during cursory review but achieves little. Note that this is likely to be a latent issue at present because sched_clock_register() is typically called before we enable interrupts, however the issue does risk bugs being needlessly introduced as the code evolves. This patch fixes the problem by increasing the scope of the read locking performed by sched_clock() to cover all data modified by sched_clock_register. We also improve clarity by moving writes to struct clock_data that do not impact sched_clock() outside of the critical section. Signed-off-by: Daniel Thompson [ Reworked it slightly to apply to tip/timers/core] Signed-off-by: John Stultz Reviewed-by: Stephen Boyd Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1427397806-20889-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/sched_clock.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index ca3bc5c..1751e95 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -58,23 +58,21 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) unsigned long long notrace sched_clock(void) { - u64 epoch_ns; - u64 epoch_cyc; - u64 cyc; + u64 cyc, res; unsigned long seq; - if (cd.suspended) - return cd.epoch_ns; - do { seq = raw_read_seqcount_begin(&cd.seq); - epoch_cyc = cd.epoch_cyc; - epoch_ns = cd.epoch_ns; + + res = cd.epoch_ns; + if (!cd.suspended) { + cyc = read_sched_clock(); + cyc = (cyc - cd.epoch_cyc) & sched_clock_mask; + res += cyc_to_ns(cyc, cd.mult, cd.shift); + } } while (read_seqcount_retry(&cd.seq, seq)); - cyc = read_sched_clock(); - cyc = (cyc - epoch_cyc) & sched_clock_mask; - return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); + return res; } /* @@ -111,7 +109,6 @@ void __init sched_clock_register(u64 (*read)(void), int bits, { u64 res, wrap, new_mask, new_epoch, cyc, ns; u32 new_mult, new_shift; - ktime_t new_wrap_kt; unsigned long r; char r_unit; @@ -124,10 +121,11 @@ void __init sched_clock_register(u64 (*read)(void), int bits, clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); new_mask = CLOCKSOURCE_MASK(bits); + cd.rate = rate; /* calculate how many nanosecs until we risk wrapping */ wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL); - new_wrap_kt = ns_to_ktime(wrap); + cd.wrap_kt = ns_to_ktime(wrap); /* update epoch for new counter and update epoch_ns from old counter*/ new_epoch = read(); @@ -138,8 +136,6 @@ void __init sched_clock_register(u64 (*read)(void), int bits, raw_write_seqcount_begin(&cd.seq); read_sched_clock = read; sched_clock_mask = new_mask; - cd.rate = rate; - cd.wrap_kt = new_wrap_kt; cd.mult = new_mult; cd.shift = new_shift; cd.epoch_cyc = new_epoch; -- cgit v1.1 From cf7c9c170787d6870af54684822f58acc00a966c Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 26 Mar 2015 12:23:23 -0700 Subject: timers, sched/clock: Optimize cache line usage Currently sched_clock(), a very hot code path, is not optimized to minimise its cache profile. In particular: 1. cd is not ____cacheline_aligned, 2. struct clock_data does not distinguish between hotpath and coldpath data, reducing locality of reference in the hotpath, 3. Some hotpath data is missing from struct clock_data and is marked __read_mostly (which more or less guarantees it will not share a cache line with cd). This patch corrects these problems by extracting all hotpath data into a separate structure and using ____cacheline_aligned to ensure the hotpath uses a single (64 byte) cache line. Signed-off-by: Daniel Thompson Signed-off-by: John Stultz Reviewed-by: Stephen Boyd Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1427397806-20889-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/sched_clock.c | 112 +++++++++++++++++++++++++++++++--------------- 1 file changed, 77 insertions(+), 35 deletions(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 1751e95..872e068 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -18,28 +18,59 @@ #include #include -struct clock_data { - ktime_t wrap_kt; +/** + * struct clock_read_data - data required to read from sched_clock + * + * @epoch_ns: sched_clock value at last update + * @epoch_cyc: Clock cycle value at last update + * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit + * clocks + * @read_sched_clock: Current clock source (or dummy source when suspended) + * @mult: Multipler for scaled math conversion + * @shift: Shift value for scaled math conversion + * @suspended: Flag to indicate if the clock is suspended (stopped) + * + * Care must be taken when updating this structure; it is read by + * some very hot code paths. It occupies <=48 bytes and, when combined + * with the seqcount used to synchronize access, comfortably fits into + * a 64 byte cache line. + */ +struct clock_read_data { u64 epoch_ns; u64 epoch_cyc; - seqcount_t seq; - unsigned long rate; + u64 sched_clock_mask; + u64 (*read_sched_clock)(void); u32 mult; u32 shift; bool suspended; }; +/** + * struct clock_data - all data needed for sched_clock (including + * registration of a new clock source) + * + * @seq: Sequence counter for protecting updates. + * @read_data: Data required to read from sched_clock. + * @wrap_kt: Duration for which clock can run before wrapping + * @rate: Tick rate of the registered clock + * @actual_read_sched_clock: Registered clock read function + * + * The ordering of this structure has been chosen to optimize cache + * performance. In particular seq and read_data (combined) should fit + * into a single 64 byte cache line. + */ +struct clock_data { + seqcount_t seq; + struct clock_read_data read_data; + ktime_t wrap_kt; + unsigned long rate; +}; + static struct hrtimer sched_clock_timer; static int irqtime = -1; core_param(irqtime, irqtime, int, 0400); -static struct clock_data cd = { - .mult = NSEC_PER_SEC / HZ, -}; - -static u64 __read_mostly sched_clock_mask; - static u64 notrace jiffy_sched_clock_read(void) { /* @@ -49,7 +80,10 @@ static u64 notrace jiffy_sched_clock_read(void) return (u64)(jiffies - INITIAL_JIFFIES); } -static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; +static struct clock_data cd ____cacheline_aligned = { + .read_data = { .mult = NSEC_PER_SEC / HZ, + .read_sched_clock = jiffy_sched_clock_read, }, +}; static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) { @@ -60,15 +94,16 @@ unsigned long long notrace sched_clock(void) { u64 cyc, res; unsigned long seq; + struct clock_read_data *rd = &cd.read_data; do { seq = raw_read_seqcount_begin(&cd.seq); - res = cd.epoch_ns; - if (!cd.suspended) { - cyc = read_sched_clock(); - cyc = (cyc - cd.epoch_cyc) & sched_clock_mask; - res += cyc_to_ns(cyc, cd.mult, cd.shift); + res = rd->epoch_ns; + if (!rd->suspended) { + cyc = rd->read_sched_clock(); + cyc = (cyc - rd->epoch_cyc) & rd->sched_clock_mask; + res += cyc_to_ns(cyc, rd->mult, rd->shift); } } while (read_seqcount_retry(&cd.seq, seq)); @@ -83,16 +118,17 @@ static void notrace update_sched_clock(void) unsigned long flags; u64 cyc; u64 ns; + struct clock_read_data *rd = &cd.read_data; - cyc = read_sched_clock(); - ns = cd.epoch_ns + - cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, - cd.mult, cd.shift); + cyc = rd->read_sched_clock(); + ns = rd->epoch_ns + + cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, + rd->mult, rd->shift); raw_local_irq_save(flags); raw_write_seqcount_begin(&cd.seq); - cd.epoch_ns = ns; - cd.epoch_cyc = cyc; + rd->epoch_ns = ns; + rd->epoch_cyc = cyc; raw_write_seqcount_end(&cd.seq); raw_local_irq_restore(flags); } @@ -111,6 +147,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits, u32 new_mult, new_shift; unsigned long r; char r_unit; + struct clock_read_data *rd = &cd.read_data; if (cd.rate > rate) return; @@ -129,17 +166,18 @@ void __init sched_clock_register(u64 (*read)(void), int bits, /* update epoch for new counter and update epoch_ns from old counter*/ new_epoch = read(); - cyc = read_sched_clock(); - ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, - cd.mult, cd.shift); + cyc = rd->read_sched_clock(); + ns = rd->epoch_ns + + cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, + rd->mult, rd->shift); raw_write_seqcount_begin(&cd.seq); - read_sched_clock = read; - sched_clock_mask = new_mask; - cd.mult = new_mult; - cd.shift = new_shift; - cd.epoch_cyc = new_epoch; - cd.epoch_ns = ns; + rd->read_sched_clock = read; + rd->sched_clock_mask = new_mask; + rd->mult = new_mult; + rd->shift = new_shift; + rd->epoch_cyc = new_epoch; + rd->epoch_ns = ns; raw_write_seqcount_end(&cd.seq); r = rate; @@ -171,7 +209,7 @@ void __init sched_clock_postinit(void) * If no sched_clock function has been provided at that point, * make it the final one one. */ - if (read_sched_clock == jiffy_sched_clock_read) + if (cd.read_data.read_sched_clock == jiffy_sched_clock_read) sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); update_sched_clock(); @@ -187,17 +225,21 @@ void __init sched_clock_postinit(void) static int sched_clock_suspend(void) { + struct clock_read_data *rd = &cd.read_data; + update_sched_clock(); hrtimer_cancel(&sched_clock_timer); - cd.suspended = true; + rd->suspended = true; return 0; } static void sched_clock_resume(void) { - cd.epoch_cyc = read_sched_clock(); + struct clock_read_data *rd = &cd.read_data; + + rd->epoch_cyc = rd->read_sched_clock(); hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); - cd.suspended = false; + rd->suspended = false; } static struct syscore_ops sched_clock_ops = { -- cgit v1.1 From 13dbeb384d2d3aa555ea48d511e8cb110bd172e0 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 26 Mar 2015 12:23:24 -0700 Subject: timers, sched/clock: Remove suspend from clock_read_data() Currently cd.read_data.suspended is read by the hotpath function sched_clock(). This variable need not be accessed on the hotpath. In fact, once it is removed, we can remove the conditional branches from sched_clock() and install a dummy read_sched_clock function to suspend the clock. The new master copy of the function pointer (actual_read_sched_clock) is introduced and is used for all reads of the clock hardware except those within sched_clock itself. Suggested-by: Thomas Gleixner Signed-off-by: Daniel Thompson Signed-off-by: John Stultz Reviewed-by: Stephen Boyd Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Russell King Cc: Will Deacon Link: http://lkml.kernel.org/r/1427397806-20889-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/sched_clock.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 872e068..52ea5d9 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -28,10 +28,9 @@ * @read_sched_clock: Current clock source (or dummy source when suspended) * @mult: Multipler for scaled math conversion * @shift: Shift value for scaled math conversion - * @suspended: Flag to indicate if the clock is suspended (stopped) * * Care must be taken when updating this structure; it is read by - * some very hot code paths. It occupies <=48 bytes and, when combined + * some very hot code paths. It occupies <=40 bytes and, when combined * with the seqcount used to synchronize access, comfortably fits into * a 64 byte cache line. */ @@ -42,7 +41,6 @@ struct clock_read_data { u64 (*read_sched_clock)(void); u32 mult; u32 shift; - bool suspended; }; /** @@ -64,6 +62,7 @@ struct clock_data { struct clock_read_data read_data; ktime_t wrap_kt; unsigned long rate; + u64 (*actual_read_sched_clock)(void); }; static struct hrtimer sched_clock_timer; @@ -83,6 +82,8 @@ static u64 notrace jiffy_sched_clock_read(void) static struct clock_data cd ____cacheline_aligned = { .read_data = { .mult = NSEC_PER_SEC / HZ, .read_sched_clock = jiffy_sched_clock_read, }, + .actual_read_sched_clock = jiffy_sched_clock_read, + }; static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) @@ -99,12 +100,9 @@ unsigned long long notrace sched_clock(void) do { seq = raw_read_seqcount_begin(&cd.seq); - res = rd->epoch_ns; - if (!rd->suspended) { - cyc = rd->read_sched_clock(); - cyc = (cyc - rd->epoch_cyc) & rd->sched_clock_mask; - res += cyc_to_ns(cyc, rd->mult, rd->shift); - } + cyc = (rd->read_sched_clock() - rd->epoch_cyc) & + rd->sched_clock_mask; + res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift); } while (read_seqcount_retry(&cd.seq, seq)); return res; @@ -120,7 +118,7 @@ static void notrace update_sched_clock(void) u64 ns; struct clock_read_data *rd = &cd.read_data; - cyc = rd->read_sched_clock(); + cyc = cd.actual_read_sched_clock(); ns = rd->epoch_ns + cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, rd->mult, rd->shift); @@ -166,10 +164,11 @@ void __init sched_clock_register(u64 (*read)(void), int bits, /* update epoch for new counter and update epoch_ns from old counter*/ new_epoch = read(); - cyc = rd->read_sched_clock(); + cyc = cd.actual_read_sched_clock(); ns = rd->epoch_ns + cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, rd->mult, rd->shift); + cd.actual_read_sched_clock = read; raw_write_seqcount_begin(&cd.seq); rd->read_sched_clock = read; @@ -209,7 +208,7 @@ void __init sched_clock_postinit(void) * If no sched_clock function has been provided at that point, * make it the final one one. */ - if (cd.read_data.read_sched_clock == jiffy_sched_clock_read) + if (cd.actual_read_sched_clock == jiffy_sched_clock_read) sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); update_sched_clock(); @@ -223,13 +222,24 @@ void __init sched_clock_postinit(void) hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); } +/* + * Clock read function for use when the clock is suspended. + * + * This function makes it appear to sched_clock() as if the clock + * stopped counting at its last update. + */ +static u64 notrace suspended_sched_clock_read(void) +{ + return cd.read_data.epoch_cyc; +} + static int sched_clock_suspend(void) { struct clock_read_data *rd = &cd.read_data; update_sched_clock(); hrtimer_cancel(&sched_clock_timer); - rd->suspended = true; + rd->read_sched_clock = suspended_sched_clock_read; return 0; } @@ -237,9 +247,9 @@ static void sched_clock_resume(void) { struct clock_read_data *rd = &cd.read_data; - rd->epoch_cyc = rd->read_sched_clock(); + rd->epoch_cyc = cd.actual_read_sched_clock(); hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); - rd->suspended = false; + rd->read_sched_clock = cd.actual_read_sched_clock; } static struct syscore_ops sched_clock_ops = { -- cgit v1.1 From 9fee69a8c8070b38b558161a3f18bd5e2b664682 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 26 Mar 2015 12:23:25 -0700 Subject: timers, sched/clock: Remove redundant notrace from update function Currently update_sched_clock() is marked as notrace but this function is not called by ftrace. This is trivially fixed by removing the mark up. Signed-off-by: Daniel Thompson Signed-off-by: John Stultz Reviewed-by: Stephen Boyd Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1427397806-20889-5-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/sched_clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 52ea5d9..8adb9d0 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -111,7 +111,7 @@ unsigned long long notrace sched_clock(void) /* * Atomically update the sched_clock epoch. */ -static void notrace update_sched_clock(void) +static void update_sched_clock(void) { unsigned long flags; u64 cyc; -- cgit v1.1 From 1809bfa44e1019e397fabaa6f2349bb7237e57a4 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 26 Mar 2015 12:23:26 -0700 Subject: timers, sched/clock: Avoid deadlock during read from NMI Currently it is possible for an NMI (or FIQ on ARM) to come in and read sched_clock() whilst update_sched_clock() has locked the seqcount for writing. This results in the NMI handler locking up when it calls raw_read_seqcount_begin(). This patch fixes the NMI safety issues by providing banked clock data. This is a similar approach to the one used in Thomas Gleixner's 4396e058c52e("timekeeping: Provide fast and NMI safe access to CLOCK_MONOTONIC"). Suggested-by: Stephen Boyd Signed-off-by: Daniel Thompson Signed-off-by: John Stultz Reviewed-by: Stephen Boyd Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1427397806-20889-6-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/sched_clock.c | 103 ++++++++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 35 deletions(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 8adb9d0..eeea1e9 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -47,19 +47,20 @@ struct clock_read_data { * struct clock_data - all data needed for sched_clock (including * registration of a new clock source) * - * @seq: Sequence counter for protecting updates. + * @seq: Sequence counter for protecting updates. The lowest + * bit is the index for @read_data. * @read_data: Data required to read from sched_clock. * @wrap_kt: Duration for which clock can run before wrapping * @rate: Tick rate of the registered clock * @actual_read_sched_clock: Registered clock read function * * The ordering of this structure has been chosen to optimize cache - * performance. In particular seq and read_data (combined) should fit + * performance. In particular seq and read_data[0] (combined) should fit * into a single 64 byte cache line. */ struct clock_data { seqcount_t seq; - struct clock_read_data read_data; + struct clock_read_data read_data[2]; ktime_t wrap_kt; unsigned long rate; u64 (*actual_read_sched_clock)(void); @@ -80,10 +81,9 @@ static u64 notrace jiffy_sched_clock_read(void) } static struct clock_data cd ____cacheline_aligned = { - .read_data = { .mult = NSEC_PER_SEC / HZ, - .read_sched_clock = jiffy_sched_clock_read, }, + .read_data[0] = { .mult = NSEC_PER_SEC / HZ, + .read_sched_clock = jiffy_sched_clock_read, }, .actual_read_sched_clock = jiffy_sched_clock_read, - }; static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) @@ -95,10 +95,11 @@ unsigned long long notrace sched_clock(void) { u64 cyc, res; unsigned long seq; - struct clock_read_data *rd = &cd.read_data; + struct clock_read_data *rd; do { - seq = raw_read_seqcount_begin(&cd.seq); + seq = raw_read_seqcount(&cd.seq); + rd = cd.read_data + (seq & 1); cyc = (rd->read_sched_clock() - rd->epoch_cyc) & rd->sched_clock_mask; @@ -109,26 +110,50 @@ unsigned long long notrace sched_clock(void) } /* + * Updating the data required to read the clock. + * + * sched_clock will never observe mis-matched data even if called from + * an NMI. We do this by maintaining an odd/even copy of the data and + * steering sched_clock to one or the other using a sequence counter. + * In order to preserve the data cache profile of sched_clock as much + * as possible the system reverts back to the even copy when the update + * completes; the odd copy is used *only* during an update. + */ +static void update_clock_read_data(struct clock_read_data *rd) +{ + /* update the backup (odd) copy with the new data */ + cd.read_data[1] = *rd; + + /* steer readers towards the odd copy */ + raw_write_seqcount_latch(&cd.seq); + + /* now its safe for us to update the normal (even) copy */ + cd.read_data[0] = *rd; + + /* switch readers back to the even copy */ + raw_write_seqcount_latch(&cd.seq); +} + +/* * Atomically update the sched_clock epoch. */ static void update_sched_clock(void) { - unsigned long flags; u64 cyc; u64 ns; - struct clock_read_data *rd = &cd.read_data; + struct clock_read_data rd; + + rd = cd.read_data[0]; cyc = cd.actual_read_sched_clock(); - ns = rd->epoch_ns + - cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, - rd->mult, rd->shift); - - raw_local_irq_save(flags); - raw_write_seqcount_begin(&cd.seq); - rd->epoch_ns = ns; - rd->epoch_cyc = cyc; - raw_write_seqcount_end(&cd.seq); - raw_local_irq_restore(flags); + ns = rd.epoch_ns + + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, + rd.mult, rd.shift); + + rd.epoch_ns = ns; + rd.epoch_cyc = cyc; + + update_clock_read_data(&rd); } static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) @@ -145,7 +170,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits, u32 new_mult, new_shift; unsigned long r; char r_unit; - struct clock_read_data *rd = &cd.read_data; + struct clock_read_data rd; if (cd.rate > rate) return; @@ -162,22 +187,23 @@ void __init sched_clock_register(u64 (*read)(void), int bits, wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL); cd.wrap_kt = ns_to_ktime(wrap); + rd = cd.read_data[0]; + /* update epoch for new counter and update epoch_ns from old counter*/ new_epoch = read(); cyc = cd.actual_read_sched_clock(); - ns = rd->epoch_ns + - cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, - rd->mult, rd->shift); + ns = rd.epoch_ns + + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, + rd.mult, rd.shift); cd.actual_read_sched_clock = read; - raw_write_seqcount_begin(&cd.seq); - rd->read_sched_clock = read; - rd->sched_clock_mask = new_mask; - rd->mult = new_mult; - rd->shift = new_shift; - rd->epoch_cyc = new_epoch; - rd->epoch_ns = ns; - raw_write_seqcount_end(&cd.seq); + rd.read_sched_clock = read; + rd.sched_clock_mask = new_mask; + rd.mult = new_mult; + rd.shift = new_shift; + rd.epoch_cyc = new_epoch; + rd.epoch_ns = ns; + update_clock_read_data(&rd); r = rate; if (r >= 4000000) { @@ -227,15 +253,22 @@ void __init sched_clock_postinit(void) * * This function makes it appear to sched_clock() as if the clock * stopped counting at its last update. + * + * This function must only be called from the critical + * section in sched_clock(). It relies on the read_seqcount_retry() + * at the end of the critical section to be sure we observe the + * correct copy of epoch_cyc. */ static u64 notrace suspended_sched_clock_read(void) { - return cd.read_data.epoch_cyc; + unsigned long seq = raw_read_seqcount(&cd.seq); + + return cd.read_data[seq & 1].epoch_cyc; } static int sched_clock_suspend(void) { - struct clock_read_data *rd = &cd.read_data; + struct clock_read_data *rd = &cd.read_data[0]; update_sched_clock(); hrtimer_cancel(&sched_clock_timer); @@ -245,7 +278,7 @@ static int sched_clock_suspend(void) static void sched_clock_resume(void) { - struct clock_read_data *rd = &cd.read_data; + struct clock_read_data *rd = &cd.read_data[0]; rd->epoch_cyc = cd.actual_read_sched_clock(); hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); -- cgit v1.1 From 32fea568aec5b73ae27253125522b5c2a970a1f0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 27 Mar 2015 07:08:06 +0100 Subject: timers, sched/clock: Clean up the code a bit Trivial cleanups, to improve the readability of the generic sched_clock() code: - Improve and standardize comments - Standardize the coding style - Use vertical spacing where appropriate - etc. No code changed: md5: 19a053b31e0c54feaeff1492012b019a sched_clock.o.before.asm 19a053b31e0c54feaeff1492012b019a sched_clock.o.after.asm Cc: Catalin Marinas Cc: Daniel Thompson Cc: John Stultz Cc: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Russell King Cc: Stephen Boyd Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Ingo Molnar --- kernel/time/sched_clock.c | 107 ++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index eeea1e9..a26036d 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -1,5 +1,6 @@ /* - * sched_clock.c: support for extending counters to full 64-bit ns counter + * sched_clock.c: Generic sched_clock() support, to extend low level + * hardware time counters to full 64-bit ns values. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,15 +20,15 @@ #include /** - * struct clock_read_data - data required to read from sched_clock + * struct clock_read_data - data required to read from sched_clock() * - * @epoch_ns: sched_clock value at last update - * @epoch_cyc: Clock cycle value at last update + * @epoch_ns: sched_clock() value at last update + * @epoch_cyc: Clock cycle value at last update. * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit - * clocks - * @read_sched_clock: Current clock source (or dummy source when suspended) - * @mult: Multipler for scaled math conversion - * @shift: Shift value for scaled math conversion + * clocks. + * @read_sched_clock: Current clock source (or dummy source when suspended). + * @mult: Multipler for scaled math conversion. + * @shift: Shift value for scaled math conversion. * * Care must be taken when updating this structure; it is read by * some very hot code paths. It occupies <=40 bytes and, when combined @@ -44,25 +45,26 @@ struct clock_read_data { }; /** - * struct clock_data - all data needed for sched_clock (including + * struct clock_data - all data needed for sched_clock() (including * registration of a new clock source) * * @seq: Sequence counter for protecting updates. The lowest * bit is the index for @read_data. * @read_data: Data required to read from sched_clock. - * @wrap_kt: Duration for which clock can run before wrapping - * @rate: Tick rate of the registered clock - * @actual_read_sched_clock: Registered clock read function + * @wrap_kt: Duration for which clock can run before wrapping. + * @rate: Tick rate of the registered clock. + * @actual_read_sched_clock: Registered hardware level clock read function. * * The ordering of this structure has been chosen to optimize cache - * performance. In particular seq and read_data[0] (combined) should fit - * into a single 64 byte cache line. + * performance. In particular 'seq' and 'read_data[0]' (combined) should fit + * into a single 64-byte cache line. */ struct clock_data { - seqcount_t seq; - struct clock_read_data read_data[2]; - ktime_t wrap_kt; - unsigned long rate; + seqcount_t seq; + struct clock_read_data read_data[2]; + ktime_t wrap_kt; + unsigned long rate; + u64 (*actual_read_sched_clock)(void); }; @@ -112,10 +114,10 @@ unsigned long long notrace sched_clock(void) /* * Updating the data required to read the clock. * - * sched_clock will never observe mis-matched data even if called from + * sched_clock() will never observe mis-matched data even if called from * an NMI. We do this by maintaining an odd/even copy of the data and - * steering sched_clock to one or the other using a sequence counter. - * In order to preserve the data cache profile of sched_clock as much + * steering sched_clock() to one or the other using a sequence counter. + * In order to preserve the data cache profile of sched_clock() as much * as possible the system reverts back to the even copy when the update * completes; the odd copy is used *only* during an update. */ @@ -135,7 +137,7 @@ static void update_clock_read_data(struct clock_read_data *rd) } /* - * Atomically update the sched_clock epoch. + * Atomically update the sched_clock() epoch. */ static void update_sched_clock(void) { @@ -146,9 +148,7 @@ static void update_sched_clock(void) rd = cd.read_data[0]; cyc = cd.actual_read_sched_clock(); - ns = rd.epoch_ns + - cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, - rd.mult, rd.shift); + ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); rd.epoch_ns = ns; rd.epoch_cyc = cyc; @@ -160,11 +160,12 @@ static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) { update_sched_clock(); hrtimer_forward_now(hrt, cd.wrap_kt); + return HRTIMER_RESTART; } -void __init sched_clock_register(u64 (*read)(void), int bits, - unsigned long rate) +void __init +sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) { u64 res, wrap, new_mask, new_epoch, cyc, ns; u32 new_mult, new_shift; @@ -177,51 +178,53 @@ void __init sched_clock_register(u64 (*read)(void), int bits, WARN_ON(!irqs_disabled()); - /* calculate the mult/shift to convert counter ticks to ns. */ + /* Calculate the mult/shift to convert counter ticks to ns. */ clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); new_mask = CLOCKSOURCE_MASK(bits); cd.rate = rate; - /* calculate how many nanosecs until we risk wrapping */ + /* Calculate how many nanosecs until we risk wrapping */ wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL); cd.wrap_kt = ns_to_ktime(wrap); rd = cd.read_data[0]; - /* update epoch for new counter and update epoch_ns from old counter*/ + /* Update epoch for new counter and update 'epoch_ns' from old counter*/ new_epoch = read(); cyc = cd.actual_read_sched_clock(); - ns = rd.epoch_ns + - cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, - rd.mult, rd.shift); + ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); cd.actual_read_sched_clock = read; - rd.read_sched_clock = read; - rd.sched_clock_mask = new_mask; - rd.mult = new_mult; - rd.shift = new_shift; - rd.epoch_cyc = new_epoch; - rd.epoch_ns = ns; + rd.read_sched_clock = read; + rd.sched_clock_mask = new_mask; + rd.mult = new_mult; + rd.shift = new_shift; + rd.epoch_cyc = new_epoch; + rd.epoch_ns = ns; + update_clock_read_data(&rd); r = rate; if (r >= 4000000) { r /= 1000000; r_unit = 'M'; - } else if (r >= 1000) { - r /= 1000; - r_unit = 'k'; - } else - r_unit = ' '; - - /* calculate the ns resolution of this counter */ + } else { + if (r >= 1000) { + r /= 1000; + r_unit = 'k'; + } else { + r_unit = ' '; + } + } + + /* Calculate the ns resolution of this counter */ res = cyc_to_ns(1ULL, new_mult, new_shift); pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n", bits, r, r_unit, res, wrap); - /* Enable IRQ time accounting if we have a fast enough sched_clock */ + /* Enable IRQ time accounting if we have a fast enough sched_clock() */ if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) enable_sched_clock_irqtime(); @@ -231,7 +234,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits, void __init sched_clock_postinit(void) { /* - * If no sched_clock function has been provided at that point, + * If no sched_clock() function has been provided at that point, * make it the final one one. */ if (cd.actual_read_sched_clock == jiffy_sched_clock_read) @@ -257,7 +260,7 @@ void __init sched_clock_postinit(void) * This function must only be called from the critical * section in sched_clock(). It relies on the read_seqcount_retry() * at the end of the critical section to be sure we observe the - * correct copy of epoch_cyc. + * correct copy of 'epoch_cyc'. */ static u64 notrace suspended_sched_clock_read(void) { @@ -273,6 +276,7 @@ static int sched_clock_suspend(void) update_sched_clock(); hrtimer_cancel(&sched_clock_timer); rd->read_sched_clock = suspended_sched_clock_read; + return 0; } @@ -286,13 +290,14 @@ static void sched_clock_resume(void) } static struct syscore_ops sched_clock_ops = { - .suspend = sched_clock_suspend, - .resume = sched_clock_resume, + .suspend = sched_clock_suspend, + .resume = sched_clock_resume, }; static int __init sched_clock_syscore_init(void) { register_syscore_ops(&sched_clock_ops); + return 0; } device_initcall(sched_clock_syscore_init); -- cgit v1.1 From 876e78818def2983be55878b21f7152fbaebbd36 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2015 10:09:06 +0100 Subject: time: Rename timekeeper::tkr to timekeeper::tkr_mono In preparation of adding another tkr field, rename this one to tkr_mono. Also rename tk_read_base::base_mono to tk_read_base::base, since the structure is not specific to CLOCK_MONOTONIC and the mono name got added to the tk_read_base instance. Lots of trivial churn. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Stultz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150319093400.344679419@infradead.org Signed-off-by: Ingo Molnar --- arch/arm64/kernel/vdso.c | 10 +-- arch/s390/kernel/time.c | 18 ++--- arch/tile/kernel/time.c | 24 +++--- arch/x86/kernel/vsyscall_gtod.c | 24 +++--- arch/x86/kvm/x86.c | 14 ++-- include/linux/timekeeper_internal.h | 12 +-- kernel/time/timekeeping.c | 150 ++++++++++++++++++------------------ 7 files changed, 126 insertions(+), 126 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 32aeea0..ec37ab3 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -200,7 +200,7 @@ up_fail: void update_vsyscall(struct timekeeper *tk) { struct timespec xtime_coarse; - u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter"); + u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter"); ++vdso_data->tb_seq_count; smp_wmb(); @@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { - vdso_data->cs_cycle_last = tk->tkr.cycle_last; + vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; - vdso_data->cs_mult = tk->tkr.mult; - vdso_data->cs_shift = tk->tkr.shift; + vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; + vdso_data->cs_mult = tk->tkr_mono.mult; + vdso_data->cs_shift = tk->tkr_mono.shift; } smp_wmb(); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 6c273cd..170ddd2 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk) { u64 nsecps; - if (tk->tkr.clock != &clocksource_tod) + if (tk->tkr_mono.clock != &clocksource_tod) return; /* Make userspace gettimeofday spin until we're done. */ ++vdso_data->tb_update_count; smp_wmb(); - vdso_data->xtime_tod_stamp = tk->tkr.cycle_last; + vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last; vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; + vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; vdso_data->wtom_clock_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec + - + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift); - nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift; + vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec + + + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); + nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift; while (vdso_data->wtom_clock_nsec >= nsecps) { vdso_data->wtom_clock_nsec -= nsecps; vdso_data->wtom_clock_sec++; @@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->xtime_coarse_sec = tk->xtime_sec; vdso_data->xtime_coarse_nsec = - (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); + (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); vdso_data->wtom_coarse_sec = vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec; vdso_data->wtom_coarse_nsec = @@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtom_coarse_sec++; } - vdso_data->tk_mult = tk->tkr.mult; - vdso_data->tk_shift = tk->tkr.shift; + vdso_data->tk_mult = tk->tkr_mono.mult; + vdso_data->tk_shift = tk->tkr_mono.shift; smp_wmb(); ++vdso_data->tb_update_count; } diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index d412b08..00178ec 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -257,34 +257,34 @@ void update_vsyscall_tz(void) void update_vsyscall(struct timekeeper *tk) { - if (tk->tkr.clock != &cycle_counter_cs) + if (tk->tkr_mono.clock != &cycle_counter_cs) return; write_seqcount_begin(&vdso_data->tb_seq); - vdso_data->cycle_last = tk->tkr.cycle_last; - vdso_data->mask = tk->tkr.mask; - vdso_data->mult = tk->tkr.mult; - vdso_data->shift = tk->tkr.shift; + vdso_data->cycle_last = tk->tkr_mono.cycle_last; + vdso_data->mask = tk->tkr_mono.mask; + vdso_data->mult = tk->tkr_mono.mult; + vdso_data->shift = tk->tkr_mono.shift; vdso_data->wall_time_sec = tk->xtime_sec; - vdso_data->wall_time_snsec = tk->tkr.xtime_nsec; + vdso_data->wall_time_snsec = tk->tkr_mono.xtime_nsec; vdso_data->monotonic_time_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec + vdso_data->monotonic_time_snsec = tk->tkr_mono.xtime_nsec + ((u64)tk->wall_to_monotonic.tv_nsec - << tk->tkr.shift); + << tk->tkr_mono.shift); while (vdso_data->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { + (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { vdso_data->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->tkr.shift; + ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; vdso_data->monotonic_time_sec++; } vdso_data->wall_time_coarse_sec = tk->xtime_sec; - vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> - tk->tkr.shift); + vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >> + tk->tkr_mono.shift); vdso_data->monotonic_time_coarse_sec = vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c index c7d791f..51e3304 100644 --- a/arch/x86/kernel/vsyscall_gtod.c +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk) gtod_write_begin(vdata); /* copy vsyscall data */ - vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode; - vdata->cycle_last = tk->tkr.cycle_last; - vdata->mask = tk->tkr.mask; - vdata->mult = tk->tkr.mult; - vdata->shift = tk->tkr.shift; + vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; + vdata->cycle_last = tk->tkr_mono.cycle_last; + vdata->mask = tk->tkr_mono.mask; + vdata->mult = tk->tkr_mono.mult; + vdata->shift = tk->tkr_mono.shift; vdata->wall_time_sec = tk->xtime_sec; - vdata->wall_time_snsec = tk->tkr.xtime_nsec; + vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec; vdata->monotonic_time_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_snsec = tk->tkr.xtime_nsec + vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec + ((u64)tk->wall_to_monotonic.tv_nsec - << tk->tkr.shift); + << tk->tkr_mono.shift); while (vdata->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { + (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { vdata->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->tkr.shift; + ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; vdata->monotonic_time_sec++; } vdata->wall_time_coarse_sec = tk->xtime_sec; - vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> - tk->tkr.shift); + vdata->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >> + tk->tkr_mono.shift); vdata->monotonic_time_coarse_sec = vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd7a70b..d7a300e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1070,19 +1070,19 @@ static void update_pvclock_gtod(struct timekeeper *tk) struct pvclock_gtod_data *vdata = &pvclock_gtod_data; u64 boot_ns; - boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot)); + boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot)); write_seqcount_begin(&vdata->seq); /* copy pvclock gtod data */ - vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->tkr.cycle_last; - vdata->clock.mask = tk->tkr.mask; - vdata->clock.mult = tk->tkr.mult; - vdata->clock.shift = tk->tkr.shift; + vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; + vdata->clock.cycle_last = tk->tkr_mono.cycle_last; + vdata->clock.mask = tk->tkr_mono.mask; + vdata->clock.mult = tk->tkr_mono.mult; + vdata->clock.shift = tk->tkr_mono.shift; vdata->boot_ns = boot_ns; - vdata->nsec_base = tk->tkr.xtime_nsec; + vdata->nsec_base = tk->tkr_mono.xtime_nsec; write_seqcount_end(&vdata->seq); } diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 05af9a3..73df17f 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -16,16 +16,16 @@ * @read: Read function of @clock * @mask: Bitmask for two's complement subtraction of non 64bit clocks * @cycle_last: @clock cycle value at last update - * @mult: NTP adjusted multiplier for scaled math conversion + * @mult: (NTP adjusted) multiplier for scaled math conversion * @shift: Shift value for scaled math conversion * @xtime_nsec: Shifted (fractional) nano seconds offset for readout - * @base_mono: ktime_t (nanoseconds) base time for readout + * @base: ktime_t (nanoseconds) base time for readout * * This struct has size 56 byte on 64 bit. Together with a seqcount it * occupies a single 64byte cache line. * * The struct is separate from struct timekeeper as it is also used - * for a fast NMI safe accessor to clock monotonic. + * for a fast NMI safe accessors. */ struct tk_read_base { struct clocksource *clock; @@ -35,12 +35,12 @@ struct tk_read_base { u32 mult; u32 shift; u64 xtime_nsec; - ktime_t base_mono; + ktime_t base; }; /** * struct timekeeper - Structure holding internal timekeeping values. - * @tkr: The readout base structure + * @tkr_mono: The readout base structure for CLOCK_MONOTONIC * @xtime_sec: Current CLOCK_REALTIME time in seconds * @ktime_sec: Current CLOCK_MONOTONIC time in seconds * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset @@ -76,7 +76,7 @@ struct tk_read_base { * used instead. */ struct timekeeper { - struct tk_read_base tkr; + struct tk_read_base tkr_mono; u64 xtime_sec; unsigned long ktime_sec; struct timespec64 wall_to_monotonic; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 892f6cb..1405091 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -68,8 +68,8 @@ bool __read_mostly persistent_clock_exist = false; static inline void tk_normalize_xtime(struct timekeeper *tk) { - while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) { - tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift; + while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { + tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; tk->xtime_sec++; } } @@ -79,20 +79,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk) struct timespec64 ts; ts.tv_sec = tk->xtime_sec; - ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); + ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); return ts; } static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec = ts->tv_sec; - tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift; + tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift; } static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec += ts->tv_sec; - tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift; + tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift; tk_normalize_xtime(tk); } @@ -136,8 +136,8 @@ static long timekeeping_last_warning; static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) { - cycle_t max_cycles = tk->tkr.clock->max_cycles; - const char *name = tk->tkr.clock->name; + cycle_t max_cycles = tk->tkr_mono.clock->max_cycles; + const char *name = tk->tkr_mono.clock->name; if (offset > max_cycles) { printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n", @@ -246,11 +246,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) u64 tmp, ntpinterval; struct clocksource *old_clock; - old_clock = tk->tkr.clock; - tk->tkr.clock = clock; - tk->tkr.read = clock->read; - tk->tkr.mask = clock->mask; - tk->tkr.cycle_last = tk->tkr.read(clock); + old_clock = tk->tkr_mono.clock; + tk->tkr_mono.clock = clock; + tk->tkr_mono.read = clock->read; + tk->tkr_mono.mask = clock->mask; + tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock); /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; @@ -274,11 +274,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) if (old_clock) { int shift_change = clock->shift - old_clock->shift; if (shift_change < 0) - tk->tkr.xtime_nsec >>= -shift_change; + tk->tkr_mono.xtime_nsec >>= -shift_change; else - tk->tkr.xtime_nsec <<= shift_change; + tk->tkr_mono.xtime_nsec <<= shift_change; } - tk->tkr.shift = clock->shift; + tk->tkr_mono.shift = clock->shift; tk->ntp_error = 0; tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; @@ -289,7 +289,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) * active clocksource. These value will be adjusted via NTP * to counteract clock drifting. */ - tk->tkr.mult = clock->mult; + tk->tkr_mono.mult = clock->mult; tk->ntp_err_mult = 0; } @@ -318,11 +318,11 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) { - struct clocksource *clock = tk->tkr.clock; + struct clocksource *clock = tk->tkr_mono.clock; cycle_t delta; s64 nsec; - delta = timekeeping_get_delta(&tk->tkr); + delta = timekeeping_get_delta(&tk->tkr_mono); /* convert delta to nanoseconds. */ nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); @@ -428,7 +428,7 @@ u64 notrace ktime_get_mono_fast_ns(void) do { seq = raw_read_seqcount(&tk_fast_mono.seq); tkr = tk_fast_mono.base + (seq & 0x01); - now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr); + now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); } while (read_seqcount_retry(&tk_fast_mono.seq, seq)); return now; @@ -456,7 +456,7 @@ static cycle_t dummy_clock_read(struct clocksource *cs) static void halt_fast_timekeeper(struct timekeeper *tk) { static struct tk_read_base tkr_dummy; - struct tk_read_base *tkr = &tk->tkr; + struct tk_read_base *tkr = &tk->tkr_mono; memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); cycles_at_suspend = tkr->read(tkr->clock); @@ -472,8 +472,8 @@ static inline void update_vsyscall(struct timekeeper *tk) xt = timespec64_to_timespec(tk_xtime(tk)); wm = timespec64_to_timespec(tk->wall_to_monotonic); - update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult, - tk->tkr.cycle_last); + update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult, + tk->tkr_mono.cycle_last); } static inline void old_vsyscall_fixup(struct timekeeper *tk) @@ -490,11 +490,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk) * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD * users are removed, this can be killed. */ - remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1); - tk->tkr.xtime_nsec -= remainder; - tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift; + remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1); + tk->tkr_mono.xtime_nsec -= remainder; + tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift; tk->ntp_error += remainder << tk->ntp_error_shift; - tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift; + tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift; } #else #define old_vsyscall_fixup(tk) @@ -559,7 +559,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) */ seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); nsec = (u32) tk->wall_to_monotonic.tv_nsec; - tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); + tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); /* Update the monotonic raw base */ tk->base_raw = timespec64_to_ktime(tk->raw_time); @@ -569,7 +569,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) * wall_to_monotonic can be greater/equal one second. Take * this into account before updating tk->ktime_sec. */ - nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift); + nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); if (nsec >= NSEC_PER_SEC) seconds++; tk->ktime_sec = seconds; @@ -592,7 +592,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) memcpy(&shadow_timekeeper, &tk_core.timekeeper, sizeof(tk_core.timekeeper)); - update_fast_timekeeper(&tk->tkr); + update_fast_timekeeper(&tk->tkr_mono); } /** @@ -604,18 +604,18 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) */ static void timekeeping_forward_now(struct timekeeper *tk) { - struct clocksource *clock = tk->tkr.clock; + struct clocksource *clock = tk->tkr_mono.clock; cycle_t cycle_now, delta; s64 nsec; - cycle_now = tk->tkr.read(clock); - delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); - tk->tkr.cycle_last = cycle_now; + cycle_now = tk->tkr_mono.read(clock); + delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); + tk->tkr_mono.cycle_last = cycle_now; - tk->tkr.xtime_nsec += delta * tk->tkr.mult; + tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult; /* If arch requires, add in get_arch_timeoffset() */ - tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift; + tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift; tk_normalize_xtime(tk); @@ -640,7 +640,7 @@ int __getnstimeofday64(struct timespec64 *ts) seq = read_seqcount_begin(&tk_core.seq); ts->tv_sec = tk->xtime_sec; - nsecs = timekeeping_get_ns(&tk->tkr); + nsecs = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -680,8 +680,8 @@ ktime_t ktime_get(void) do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->tkr.base_mono; - nsecs = timekeeping_get_ns(&tk->tkr); + base = tk->tkr_mono.base; + nsecs = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -706,8 +706,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs) do { seq = read_seqcount_begin(&tk_core.seq); - base = ktime_add(tk->tkr.base_mono, *offset); - nsecs = timekeeping_get_ns(&tk->tkr); + base = ktime_add(tk->tkr_mono.base, *offset); + nsecs = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -777,7 +777,7 @@ void ktime_get_ts64(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); ts->tv_sec = tk->xtime_sec; - nsec = timekeeping_get_ns(&tk->tkr); + nsec = timekeeping_get_ns(&tk->tkr_mono); tomono = tk->wall_to_monotonic; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -863,7 +863,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) ts_real->tv_nsec = 0; nsecs_raw = timekeeping_get_ns_raw(tk); - nsecs_real = timekeeping_get_ns(&tk->tkr); + nsecs_real = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1046,7 +1046,7 @@ static int change_clocksource(void *data) */ if (try_module_get(new->owner)) { if (!new->enable || new->enable(new) == 0) { - old = tk->tkr.clock; + old = tk->tkr_mono.clock; tk_setup_internals(tk, new); if (old->disable) old->disable(old); @@ -1074,11 +1074,11 @@ int timekeeping_notify(struct clocksource *clock) { struct timekeeper *tk = &tk_core.timekeeper; - if (tk->tkr.clock == clock) + if (tk->tkr_mono.clock == clock) return 0; stop_machine(change_clocksource, clock, NULL); tick_clock_notify(); - return tk->tkr.clock == clock ? 0 : -1; + return tk->tkr_mono.clock == clock ? 0 : -1; } /** @@ -1119,7 +1119,7 @@ int timekeeping_valid_for_hres(void) do { seq = read_seqcount_begin(&tk_core.seq); - ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; + ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1138,7 +1138,7 @@ u64 timekeeping_max_deferment(void) do { seq = read_seqcount_begin(&tk_core.seq); - ret = tk->tkr.clock->max_idle_ns; + ret = tk->tkr_mono.clock->max_idle_ns; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1303,7 +1303,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) void timekeeping_resume(void) { struct timekeeper *tk = &tk_core.timekeeper; - struct clocksource *clock = tk->tkr.clock; + struct clocksource *clock = tk->tkr_mono.clock; unsigned long flags; struct timespec64 ts_new, ts_delta; struct timespec tmp; @@ -1331,16 +1331,16 @@ void timekeeping_resume(void) * The less preferred source will only be tried if there is no better * usable source. The rtc part is handled separately in rtc core code. */ - cycle_now = tk->tkr.read(clock); + cycle_now = tk->tkr_mono.read(clock); if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && - cycle_now > tk->tkr.cycle_last) { + cycle_now > tk->tkr_mono.cycle_last) { u64 num, max = ULLONG_MAX; u32 mult = clock->mult; u32 shift = clock->shift; s64 nsec = 0; - cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, - tk->tkr.mask); + cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, + tk->tkr_mono.mask); /* * "cycle_delta * mutl" may cause 64 bits overflow, if the @@ -1366,7 +1366,7 @@ void timekeeping_resume(void) __timekeeping_inject_sleeptime(tk, &ts_delta); /* Re-base the last cycle value */ - tk->tkr.cycle_last = cycle_now; + tk->tkr_mono.cycle_last = cycle_now; tk->ntp_error = 0; timekeeping_suspended = 0; timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); @@ -1519,15 +1519,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, * * XXX - TODO: Doc ntp_error calculation. */ - if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) { + if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) { /* NTP adjustment caused clocksource mult overflow */ WARN_ON_ONCE(1); return; } - tk->tkr.mult += mult_adj; + tk->tkr_mono.mult += mult_adj; tk->xtime_interval += interval; - tk->tkr.xtime_nsec -= offset; + tk->tkr_mono.xtime_nsec -= offset; tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; } @@ -1589,13 +1589,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) tk->ntp_err_mult = 0; } - if (unlikely(tk->tkr.clock->maxadj && - (abs(tk->tkr.mult - tk->tkr.clock->mult) - > tk->tkr.clock->maxadj))) { + if (unlikely(tk->tkr_mono.clock->maxadj && + (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult) + > tk->tkr_mono.clock->maxadj))) { printk_once(KERN_WARNING "Adjusting %s more than 11%% (%ld vs %ld)\n", - tk->tkr.clock->name, (long)tk->tkr.mult, - (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj); + tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult, + (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj); } /* @@ -1612,9 +1612,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) * We'll correct this error next time through this function, when * xtime_nsec is not as small. */ - if (unlikely((s64)tk->tkr.xtime_nsec < 0)) { - s64 neg = -(s64)tk->tkr.xtime_nsec; - tk->tkr.xtime_nsec = 0; + if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) { + s64 neg = -(s64)tk->tkr_mono.xtime_nsec; + tk->tkr_mono.xtime_nsec = 0; tk->ntp_error += neg << tk->ntp_error_shift; } } @@ -1629,13 +1629,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) */ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) { - u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift; + u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift; unsigned int clock_set = 0; - while (tk->tkr.xtime_nsec >= nsecps) { + while (tk->tkr_mono.xtime_nsec >= nsecps) { int leap; - tk->tkr.xtime_nsec -= nsecps; + tk->tkr_mono.xtime_nsec -= nsecps; tk->xtime_sec++; /* Figure out if its a leap sec and apply if needed */ @@ -1680,9 +1680,9 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, /* Accumulate one shifted interval */ offset -= interval; - tk->tkr.cycle_last += interval; + tk->tkr_mono.cycle_last += interval; - tk->tkr.xtime_nsec += tk->xtime_interval << shift; + tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift; *clock_set |= accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ @@ -1725,8 +1725,8 @@ void update_wall_time(void) #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET offset = real_tk->cycle_interval; #else - offset = clocksource_delta(tk->tkr.read(tk->tkr.clock), - tk->tkr.cycle_last, tk->tkr.mask); + offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock), + tk->tkr_mono.cycle_last, tk->tkr_mono.mask); #endif /* Check if there's really nothing to do */ @@ -1890,8 +1890,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot, do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->tkr.base_mono; - nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift; + base = tk->tkr_mono.base; + nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; *offs_real = tk->offs_real; *offs_boot = tk->offs_boot; @@ -1922,8 +1922,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot, do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->tkr.base_mono; - nsecs = timekeeping_get_ns(&tk->tkr); + base = tk->tkr_mono.base; + nsecs = timekeeping_get_ns(&tk->tkr_mono); *offs_real = tk->offs_real; *offs_boot = tk->offs_boot; -- cgit v1.1 From 4a4ad80d32cea69ee93bd4589f24dc478804cd80 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2015 09:28:44 +0100 Subject: time: Add timerkeeper::tkr_raw Introduce tkr_raw and make use of it. base_raw -> tkr_raw.base clock->{mult,shift} -> tkr_raw.{mult.shift} Kill timekeeping_get_ns_raw() in favour of timekeeping_get_ns(&tkr_raw), this removes all mono_raw special casing. Duplicate the updates to tkr_mono.cycle_last into tkr_raw.cycle_last, both need the same value. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Stultz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150319093400.422589590@infradead.org Signed-off-by: Ingo Molnar --- include/linux/timekeeper_internal.h | 4 ++-- kernel/time/timekeeping.c | 41 +++++++++++++++++-------------------- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 73df17f..fb86963 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -41,6 +41,7 @@ struct tk_read_base { /** * struct timekeeper - Structure holding internal timekeeping values. * @tkr_mono: The readout base structure for CLOCK_MONOTONIC + * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @xtime_sec: Current CLOCK_REALTIME time in seconds * @ktime_sec: Current CLOCK_MONOTONIC time in seconds * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset @@ -48,7 +49,6 @@ struct tk_read_base { * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai * @tai_offset: The current UTC to TAI offset in seconds - * @base_raw: Monotonic raw base time in ktime_t format * @raw_time: Monotonic raw base time in timespec64 format * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP @@ -77,6 +77,7 @@ struct tk_read_base { */ struct timekeeper { struct tk_read_base tkr_mono; + struct tk_read_base tkr_raw; u64 xtime_sec; unsigned long ktime_sec; struct timespec64 wall_to_monotonic; @@ -84,7 +85,6 @@ struct timekeeper { ktime_t offs_boot; ktime_t offs_tai; s32 tai_offset; - ktime_t base_raw; struct timespec64 raw_time; /* The following members are for timekeeping internal use */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1405091..cbb612e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -252,6 +252,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) tk->tkr_mono.mask = clock->mask; tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock); + tk->tkr_raw.clock = clock; + tk->tkr_raw.read = clock->read; + tk->tkr_raw.mask = clock->mask; + tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; + /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; tmp <<= clock->shift; @@ -278,7 +283,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) else tk->tkr_mono.xtime_nsec <<= shift_change; } + tk->tkr_raw.xtime_nsec = 0; + tk->tkr_mono.shift = clock->shift; + tk->tkr_raw.shift = clock->shift; tk->ntp_error = 0; tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; @@ -290,6 +298,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) * to counteract clock drifting. */ tk->tkr_mono.mult = clock->mult; + tk->tkr_raw.mult = clock->mult; tk->ntp_err_mult = 0; } @@ -316,21 +325,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) return nsec + arch_gettimeoffset(); } -static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) -{ - struct clocksource *clock = tk->tkr_mono.clock; - cycle_t delta; - s64 nsec; - - delta = timekeeping_get_delta(&tk->tkr_mono); - - /* convert delta to nanoseconds. */ - nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); - - /* If arch requires, add in get_arch_timeoffset() */ - return nsec + arch_gettimeoffset(); -} - /** * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. * @tkr: Timekeeping readout base from which we take the update @@ -562,7 +556,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); /* Update the monotonic raw base */ - tk->base_raw = timespec64_to_ktime(tk->raw_time); + tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time); /* * The sum of the nanoseconds portions of xtime and @@ -611,6 +605,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) cycle_now = tk->tkr_mono.read(clock); delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); tk->tkr_mono.cycle_last = cycle_now; + tk->tkr_raw.cycle_last = cycle_now; tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult; @@ -619,7 +614,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) tk_normalize_xtime(tk); - nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); + nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift); timespec64_add_ns(&tk->raw_time, nsec); } @@ -748,8 +743,8 @@ ktime_t ktime_get_raw(void) do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->base_raw; - nsecs = timekeeping_get_ns_raw(tk); + base = tk->tkr_raw.base; + nsecs = timekeeping_get_ns(&tk->tkr_raw); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -862,7 +857,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) ts_real->tv_sec = tk->xtime_sec; ts_real->tv_nsec = 0; - nsecs_raw = timekeeping_get_ns_raw(tk); + nsecs_raw = timekeeping_get_ns(&tk->tkr_raw); nsecs_real = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1096,7 +1091,7 @@ void getrawmonotonic64(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); - nsecs = timekeeping_get_ns_raw(tk); + nsecs = timekeeping_get_ns(&tk->tkr_raw); ts64 = tk->raw_time; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1217,7 +1212,6 @@ void __init timekeeping_init(void) tk_set_xtime(tk, &now); tk->raw_time.tv_sec = 0; tk->raw_time.tv_nsec = 0; - tk->base_raw.tv64 = 0; if (boot.tv_sec == 0 && boot.tv_nsec == 0) boot = tk_xtime(tk); @@ -1367,6 +1361,8 @@ void timekeeping_resume(void) /* Re-base the last cycle value */ tk->tkr_mono.cycle_last = cycle_now; + tk->tkr_raw.cycle_last = cycle_now; + tk->ntp_error = 0; timekeeping_suspended = 0; timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); @@ -1681,6 +1677,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, /* Accumulate one shifted interval */ offset -= interval; tk->tkr_mono.cycle_last += interval; + tk->tkr_raw.cycle_last += interval; tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift; *clock_set |= accumulate_nsecs_to_secs(tk); -- cgit v1.1 From 4498e7467e9e441c18ca12f1ca08460356e0508a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2015 09:36:19 +0100 Subject: time: Parametrize all tk_fast_mono users In preparation for more tk_fast instances, remove all hard-coded tk_fast_mono references. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Stultz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150319093400.484279927@infradead.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cbb612e..278373e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -364,18 +364,18 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) * slightly wrong timestamp (a few nanoseconds). See * @ktime_get_mono_fast_ns. */ -static void update_fast_timekeeper(struct tk_read_base *tkr) +static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf) { - struct tk_read_base *base = tk_fast_mono.base; + struct tk_read_base *base = tkf->base; /* Force readers off to base[1] */ - raw_write_seqcount_latch(&tk_fast_mono.seq); + raw_write_seqcount_latch(&tkf->seq); /* Update base[0] */ memcpy(base, tkr, sizeof(*base)); /* Force readers back to base[0] */ - raw_write_seqcount_latch(&tk_fast_mono.seq); + raw_write_seqcount_latch(&tkf->seq); /* Update base[1] */ memcpy(base + 1, base, sizeof(*base)); @@ -413,20 +413,25 @@ static void update_fast_timekeeper(struct tk_read_base *tkr) * of the following timestamps. Callers need to be aware of that and * deal with it. */ -u64 notrace ktime_get_mono_fast_ns(void) +static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) { struct tk_read_base *tkr; unsigned int seq; u64 now; do { - seq = raw_read_seqcount(&tk_fast_mono.seq); - tkr = tk_fast_mono.base + (seq & 0x01); + seq = raw_read_seqcount(&tkf->seq); + tkr = tkf->base + (seq & 0x01); now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); + } while (read_seqcount_retry(&tkf->seq, seq)); - } while (read_seqcount_retry(&tk_fast_mono.seq, seq)); return now; } + +u64 ktime_get_mono_fast_ns(void) +{ + return __ktime_get_fast_ns(&tk_fast_mono); +} EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); /* Suspend-time cycles value for halted fast timekeeper. */ @@ -455,7 +460,7 @@ static void halt_fast_timekeeper(struct timekeeper *tk) memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); cycles_at_suspend = tkr->read(tkr->clock); tkr_dummy.read = dummy_clock_read; - update_fast_timekeeper(&tkr_dummy); + update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); } #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD @@ -586,7 +591,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) memcpy(&shadow_timekeeper, &tk_core.timekeeper, sizeof(tk_core.timekeeper)); - update_fast_timekeeper(&tk->tkr_mono); + update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); } /** -- cgit v1.1 From f09cb9a1808e35ad7502ea39b6bfb443c7fa0f19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2015 09:39:08 +0100 Subject: time: Introduce tk_fast_raw Add the NMI safe CLOCK_MONOTONIC_RAW accessor.. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Stultz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150319093400.562746929@infradead.org Signed-off-by: Ingo Molnar --- include/linux/timekeeping.h | 1 + kernel/time/timekeeping.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3eaae47..f36b1ed 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -220,6 +220,7 @@ static inline u64 ktime_get_raw_ns(void) } extern u64 ktime_get_mono_fast_ns(void); +extern u64 ktime_get_raw_fast_ns(void); /* * Timespec interfaces utilizing the ktime based ones diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 278373e..c3fcff0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -59,6 +59,7 @@ struct tk_fast { }; static struct tk_fast tk_fast_mono ____cacheline_aligned; +static struct tk_fast tk_fast_raw ____cacheline_aligned; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -434,6 +435,12 @@ u64 ktime_get_mono_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); +u64 ktime_get_raw_fast_ns(void) +{ + return __ktime_get_fast_ns(&tk_fast_raw); +} +EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); + /* Suspend-time cycles value for halted fast timekeeper. */ static cycle_t cycles_at_suspend; @@ -461,6 +468,11 @@ static void halt_fast_timekeeper(struct timekeeper *tk) cycles_at_suspend = tkr->read(tkr->clock); tkr_dummy.read = dummy_clock_read; update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); + + tkr = &tk->tkr_raw; + memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); + tkr_dummy.read = dummy_clock_read; + update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); } #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD @@ -592,6 +604,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) sizeof(tk_core.timekeeper)); update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); + update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); } /** -- cgit v1.1 From fe5fba05b46c791c95a9f34228ac495f81f72fc0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Mar 2015 12:39:10 +0100 Subject: time: Add ktime_get_tai_ns() Because it was the only clock for which we didn't have a _ns() accessor yet. Signed-off-by: Peter Zijlstra (Intel) Cc: John Stultz Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/timekeeping.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index f36b1ed..5047b83 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -214,6 +214,11 @@ static inline u64 ktime_get_boot_ns(void) return ktime_to_ns(ktime_get_boottime()); } +static inline u64 ktime_get_tai_ns(void) +{ + return ktime_to_ns(ktime_get_clocktai()); +} + static inline u64 ktime_get_raw_ns(void) { return ktime_to_ns(ktime_get_raw()); -- cgit v1.1 From 554ef3876c6acdff1331feab10275e9e9e0adb84 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 27 Feb 2015 17:21:32 +0530 Subject: clockevents: Handle tick device's resume separately Upcoming patch will redefine possible states of a clockevent device. The RESUME mode is a special case only for tick's clockevent devices. In future it can be replaced by ->resume() callback already available for clockevent devices. Lets handle it separately so that clockevents_set_mode() only handles states valid across all devices. This also renames set_mode_resume() to tick_resume() to make it more explicit. Signed-off-by: Viresh Kumar Acked-by: Peter Zijlstra Cc: Daniel Lezcano Cc: Frederic Weisbecker Cc: Kevin Hilman Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: linaro-kernel@lists.linaro.org Cc: linaro-networking@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/c1b0112410870f49e7bf06958e1483eac6c15e20.1425037853.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 4 ++-- kernel/time/clockevents.c | 30 +++++++++++++++++++++--------- kernel/time/tick-broadcast.c | 2 +- kernel/time/tick-common.c | 2 +- kernel/time/tick-internal.h | 1 + kernel/time/timer_list.c | 4 ++-- 6 files changed, 28 insertions(+), 15 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 59af26b..a417495 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -87,7 +87,7 @@ enum clock_event_mode { * @set_mode_periodic: switch mode to periodic, if !set_mode * @set_mode_oneshot: switch mode to oneshot, if !set_mode * @set_mode_shutdown: switch mode to shutdown, if !set_mode - * @set_mode_resume: resume clkevt device, if !set_mode + * @tick_resume: resume clkevt device, if !set_mode * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration @@ -125,7 +125,7 @@ struct clock_event_device { int (*set_mode_periodic)(struct clock_event_device *); int (*set_mode_oneshot)(struct clock_event_device *); int (*set_mode_shutdown)(struct clock_event_device *); - int (*set_mode_resume)(struct clock_event_device *); + int (*tick_resume)(struct clock_event_device *); void (*broadcast)(const struct cpumask *mask); void (*suspend)(struct clock_event_device *); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 489642b..1b0ea63 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -100,7 +100,7 @@ static int __clockevents_set_mode(struct clock_event_device *dev, /* Transition with legacy set_mode() callback */ if (dev->set_mode) { /* Legacy callback doesn't support new modes */ - if (mode > CLOCK_EVT_MODE_RESUME) + if (mode > CLOCK_EVT_MODE_ONESHOT) return -ENOSYS; dev->set_mode(mode, dev); return 0; @@ -133,13 +133,6 @@ static int __clockevents_set_mode(struct clock_event_device *dev, return -ENOSYS; return dev->set_mode_oneshot(dev); - case CLOCK_EVT_MODE_RESUME: - /* Optional callback */ - if (dev->set_mode_resume) - return dev->set_mode_resume(dev); - else - return 0; - default: return -ENOSYS; } @@ -184,6 +177,25 @@ void clockevents_shutdown(struct clock_event_device *dev) dev->next_event.tv64 = KTIME_MAX; } +/** + * clockevents_tick_resume - Resume the tick device before using it again + * @dev: device to resume + */ +int clockevents_tick_resume(struct clock_event_device *dev) +{ + int ret = 0; + + if (dev->set_mode) + dev->set_mode(CLOCK_EVT_MODE_RESUME, dev); + else if (dev->tick_resume) + ret = dev->tick_resume(dev); + + if (likely(!ret)) + dev->mode = CLOCK_EVT_MODE_RESUME; + + return ret; +} + #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST /* Limit min_delta to a jiffie */ @@ -433,7 +445,7 @@ static int clockevents_sanity_check(struct clock_event_device *dev) if (dev->set_mode) { /* We shouldn't be supporting new modes now */ WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot || - dev->set_mode_shutdown || dev->set_mode_resume); + dev->set_mode_shutdown || dev->tick_resume); return 0; } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 066f0ec..542d5bb 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -464,7 +464,7 @@ int tick_resume_broadcast(void) bc = tick_broadcast_device.evtdev; if (bc) { - clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME); + clockevents_tick_resume(bc); switch (tick_broadcast_device.mode) { case TICKDEV_MODE_PERIODIC: diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index f7c5155..5c50664 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -384,7 +384,7 @@ void tick_resume(void) struct tick_device *td = this_cpu_ptr(&tick_cpu_device); int broadcast = tick_resume_broadcast(); - clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); + clockevents_tick_resume(td->evtdev); if (!broadcast) { if (td->mode == TICKDEV_MODE_PERIODIC) diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 366aeb4..98700e4 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -32,6 +32,7 @@ extern bool tick_check_replacement(struct clock_event_device *curdev, extern void tick_install_replacement(struct clock_event_device *dev); extern void clockevents_shutdown(struct clock_event_device *dev); +extern int clockevents_tick_resume(struct clock_event_device *dev); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 2cfd194..2b3e939 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -251,9 +251,9 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) SEQ_printf(m, "\n"); } - if (dev->set_mode_resume) { + if (dev->tick_resume) { SEQ_printf(m, " resume: "); - print_name_offset(m, dev->set_mode_resume); + print_name_offset(m, dev->tick_resume); SEQ_printf(m, "\n"); } } -- cgit v1.1 From 77e32c89a7117614ab3d66d20c1088de721abfaa Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 27 Feb 2015 17:21:33 +0530 Subject: clockevents: Manage device's state separately for the core 'enum clock_event_mode' is used for two purposes today: - to pass mode to the driver of clockevent device::set_mode(). - for managing state of the device for clockevents core. For supporting new modes/states we have moved away from the legacy set_mode() callback to new per-mode/state callbacks. New modes/states shouldn't be exposed to the legacy (now OBSOLOTE) callbacks and so we shouldn't add new states to 'enum clock_event_mode'. Lets have separate enums for the two use cases mentioned above. Keep using the earlier enum for legacy set_mode() callback and mark it OBSOLETE. And add another enum to clearly specify the possible states of a clockevent device. This also renames the newly added per-mode callbacks to reflect state changes. We haven't got rid of 'mode' member of 'struct clock_event_device' as it is used by some of the clockevent drivers and it would automatically die down once we migrate those drivers to the new interface. It ('mode') is only updated now for the drivers using the legacy interface. Suggested-by: Peter Zijlstra Suggested-by: Ingo Molnar Signed-off-by: Viresh Kumar Acked-by: Peter Zijlstra Cc: Daniel Lezcano Cc: Frederic Weisbecker Cc: Kevin Hilman Cc: Preeti U Murthy Cc: linaro-kernel@lists.linaro.org Cc: linaro-networking@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/b6b0143a8a57bd58352ad35e08c25424c879c0cb.1425037853.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- arch/arm/common/bL_switcher.c | 8 ++-- include/linux/clockchips.h | 44 +++++++++++++------ kernel/time/clockevents.c | 99 +++++++++++++++++++++++-------------------- kernel/time/tick-broadcast.c | 20 ++++----- kernel/time/tick-common.c | 7 +-- kernel/time/tick-oneshot.c | 6 +-- kernel/time/timer_list.c | 12 +++--- 7 files changed, 111 insertions(+), 85 deletions(-) diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 6eaddc4..d4f970a 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -152,7 +152,7 @@ static int bL_switch_to(unsigned int new_cluster_id) unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster; struct completion inbound_alive; struct tick_device *tdev; - enum clock_event_mode tdev_mode; + enum clock_event_state tdev_state; long volatile *handshake_ptr; int ipi_nr, ret; @@ -223,8 +223,8 @@ static int bL_switch_to(unsigned int new_cluster_id) if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu))) tdev = NULL; if (tdev) { - tdev_mode = tdev->evtdev->mode; - clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN); + tdev_state = tdev->evtdev->state; + clockevents_set_state(tdev->evtdev, CLOCK_EVT_STATE_SHUTDOWN); } ret = cpu_pm_enter(); @@ -252,7 +252,7 @@ static int bL_switch_to(unsigned int new_cluster_id) ret = cpu_pm_exit(); if (tdev) { - clockevents_set_mode(tdev->evtdev, tdev_mode); + clockevents_set_state(tdev->evtdev, tdev_state); clockevents_program_event(tdev->evtdev, tdev->evtdev->next_event, 1); } diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index a417495..e20232c 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -32,15 +32,31 @@ enum clock_event_nofitiers { struct clock_event_device; struct module; -/* Clock event mode commands */ +/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */ enum clock_event_mode { CLOCK_EVT_MODE_UNUSED = 0, CLOCK_EVT_MODE_SHUTDOWN, CLOCK_EVT_MODE_PERIODIC, CLOCK_EVT_MODE_ONESHOT, CLOCK_EVT_MODE_RESUME, +}; - /* Legacy ->set_mode() callback doesn't support below modes */ +/* + * Possible states of a clock event device. + * + * DETACHED: Device is not used by clockevents core. Initial state or can be + * reached from SHUTDOWN. + * SHUTDOWN: Device is powered-off. Can be reached from PERIODIC or ONESHOT. + * PERIODIC: Device is programmed to generate events periodically. Can be + * reached from DETACHED or SHUTDOWN. + * ONESHOT: Device is programmed to generate event only once. Can be reached + * from DETACHED or SHUTDOWN. + */ +enum clock_event_state { + CLOCK_EVT_STATE_DETACHED = 0, + CLOCK_EVT_STATE_SHUTDOWN, + CLOCK_EVT_STATE_PERIODIC, + CLOCK_EVT_STATE_ONESHOT, }; /* @@ -80,13 +96,14 @@ enum clock_event_mode { * @min_delta_ns: minimum delta value in ns * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) - * @mode: operating mode assigned by the management code + * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE + * @state: current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. - * @set_mode_periodic: switch mode to periodic, if !set_mode - * @set_mode_oneshot: switch mode to oneshot, if !set_mode - * @set_mode_shutdown: switch mode to shutdown, if !set_mode + * @set_state_periodic: switch state to periodic, if !set_mode + * @set_state_oneshot: switch state to oneshot, if !set_mode + * @set_state_shutdown: switch state to shutdown, if !set_mode * @tick_resume: resume clkevt device, if !set_mode * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration @@ -111,20 +128,21 @@ struct clock_event_device { u32 mult; u32 shift; enum clock_event_mode mode; + enum clock_event_state state; unsigned int features; unsigned long retries; /* - * Mode transition callback(s): Only one of the two groups should be + * State transition callback(s): Only one of the two groups should be * defined: * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. - * - set_mode_{shutdown|periodic|oneshot|resume}(). + * - set_state_{shutdown|periodic|oneshot}(), tick_resume(). */ void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); - int (*set_mode_periodic)(struct clock_event_device *); - int (*set_mode_oneshot)(struct clock_event_device *); - int (*set_mode_shutdown)(struct clock_event_device *); + int (*set_state_periodic)(struct clock_event_device *); + int (*set_state_oneshot)(struct clock_event_device *); + int (*set_state_shutdown)(struct clock_event_device *); int (*tick_resume)(struct clock_event_device *); void (*broadcast)(const struct cpumask *mask); @@ -177,8 +195,8 @@ extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); -extern void clockevents_set_mode(struct clock_event_device *dev, - enum clock_event_mode mode); +extern void clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state); extern int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, bool force); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 1b0ea63..6e53e9a 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -94,44 +94,49 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) } EXPORT_SYMBOL_GPL(clockevent_delta2ns); -static int __clockevents_set_mode(struct clock_event_device *dev, - enum clock_event_mode mode) +static int __clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state) { /* Transition with legacy set_mode() callback */ if (dev->set_mode) { /* Legacy callback doesn't support new modes */ - if (mode > CLOCK_EVT_MODE_ONESHOT) + if (state > CLOCK_EVT_STATE_ONESHOT) return -ENOSYS; - dev->set_mode(mode, dev); + /* + * 'clock_event_state' and 'clock_event_mode' have 1-to-1 + * mapping until *_ONESHOT, and so a simple cast will work. + */ + dev->set_mode((enum clock_event_mode)state, dev); + dev->mode = (enum clock_event_mode)state; return 0; } if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; - /* Transition with new mode-specific callbacks */ - switch (mode) { - case CLOCK_EVT_MODE_UNUSED: + /* Transition with new state-specific callbacks */ + switch (state) { + case CLOCK_EVT_STATE_DETACHED: /* * This is an internal state, which is guaranteed to go from - * SHUTDOWN to UNUSED. No driver interaction required. + * SHUTDOWN to DETACHED. No driver interaction required. */ return 0; - case CLOCK_EVT_MODE_SHUTDOWN: - return dev->set_mode_shutdown(dev); + case CLOCK_EVT_STATE_SHUTDOWN: + return dev->set_state_shutdown(dev); - case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_STATE_PERIODIC: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC)) return -ENOSYS; - return dev->set_mode_periodic(dev); + return dev->set_state_periodic(dev); - case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_STATE_ONESHOT: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) return -ENOSYS; - return dev->set_mode_oneshot(dev); + return dev->set_state_oneshot(dev); default: return -ENOSYS; @@ -139,26 +144,26 @@ static int __clockevents_set_mode(struct clock_event_device *dev, } /** - * clockevents_set_mode - set the operating mode of a clock event device + * clockevents_set_state - set the operating state of a clock event device * @dev: device to modify - * @mode: new mode + * @state: new state * * Must be called with interrupts disabled ! */ -void clockevents_set_mode(struct clock_event_device *dev, - enum clock_event_mode mode) +void clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state) { - if (dev->mode != mode) { - if (__clockevents_set_mode(dev, mode)) + if (dev->state != state) { + if (__clockevents_set_state(dev, state)) return; - dev->mode = mode; + dev->state = state; /* * A nsec2cyc multiplicator of 0 is invalid and we'd crash * on it, so fix it up and emit a warning: */ - if (mode == CLOCK_EVT_MODE_ONESHOT) { + if (state == CLOCK_EVT_STATE_ONESHOT) { if (unlikely(!dev->mult)) { dev->mult = 1; WARN_ON(1); @@ -173,7 +178,7 @@ void clockevents_set_mode(struct clock_event_device *dev, */ void clockevents_shutdown(struct clock_event_device *dev) { - clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); + clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); dev->next_event.tv64 = KTIME_MAX; } @@ -185,13 +190,12 @@ int clockevents_tick_resume(struct clock_event_device *dev) { int ret = 0; - if (dev->set_mode) + if (dev->set_mode) { dev->set_mode(CLOCK_EVT_MODE_RESUME, dev); - else if (dev->tick_resume) - ret = dev->tick_resume(dev); - - if (likely(!ret)) dev->mode = CLOCK_EVT_MODE_RESUME; + } else if (dev->tick_resume) { + ret = dev->tick_resume(dev); + } return ret; } @@ -248,7 +252,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev) delta = dev->min_delta_ns; dev->next_event = ktime_add_ns(ktime_get(), delta); - if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) return 0; dev->retries++; @@ -285,7 +289,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev) delta = dev->min_delta_ns; dev->next_event = ktime_add_ns(ktime_get(), delta); - if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) return 0; dev->retries++; @@ -317,7 +321,7 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, dev->next_event = expires; - if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) return 0; /* Shortcut for clockevent devices that can deal with ktime. */ @@ -362,7 +366,7 @@ static int clockevents_replace(struct clock_event_device *ced) struct clock_event_device *dev, *newdev = NULL; list_for_each_entry(dev, &clockevent_devices, list) { - if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED) + if (dev == ced || dev->state != CLOCK_EVT_STATE_DETACHED) continue; if (!tick_check_replacement(newdev, dev)) @@ -388,7 +392,7 @@ static int clockevents_replace(struct clock_event_device *ced) static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu) { /* Fast track. Device is unused */ - if (ced->mode == CLOCK_EVT_MODE_UNUSED) { + if (ced->state == CLOCK_EVT_STATE_DETACHED) { list_del_init(&ced->list); return 0; } @@ -438,30 +442,30 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) } EXPORT_SYMBOL_GPL(clockevents_unbind); -/* Sanity check of mode transition callbacks */ +/* Sanity check of state transition callbacks */ static int clockevents_sanity_check(struct clock_event_device *dev) { /* Legacy set_mode() callback */ if (dev->set_mode) { /* We shouldn't be supporting new modes now */ - WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot || - dev->set_mode_shutdown || dev->tick_resume); + WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || + dev->set_state_shutdown || dev->tick_resume); return 0; } if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; - /* New mode-specific callbacks */ - if (!dev->set_mode_shutdown) + /* New state-specific callbacks */ + if (!dev->set_state_shutdown) return -EINVAL; if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && - !dev->set_mode_periodic) + !dev->set_state_periodic) return -EINVAL; if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) && - !dev->set_mode_oneshot) + !dev->set_state_oneshot) return -EINVAL; return 0; @@ -478,6 +482,9 @@ void clockevents_register_device(struct clock_event_device *dev) BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); BUG_ON(clockevents_sanity_check(dev)); + /* Initialize state to DETACHED */ + dev->state = CLOCK_EVT_STATE_DETACHED; + if (!dev->cpumask) { WARN_ON(num_possible_cpus() > 1); dev->cpumask = cpumask_of(smp_processor_id()); @@ -541,11 +548,11 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq) { clockevents_config(dev, freq); - if (dev->mode == CLOCK_EVT_MODE_ONESHOT) + if (dev->state == CLOCK_EVT_STATE_ONESHOT) return clockevents_program_event(dev, dev->next_event, false); - if (dev->mode == CLOCK_EVT_MODE_PERIODIC) - return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); + if (dev->state == CLOCK_EVT_STATE_PERIODIC) + return __clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC); return 0; } @@ -601,13 +608,13 @@ void clockevents_exchange_device(struct clock_event_device *old, */ if (old) { module_put(old->owner); - clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); + clockevents_set_state(old, CLOCK_EVT_STATE_DETACHED); list_del(&old->list); list_add(&old->list, &clockevents_released); } if (new) { - BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED); + BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED); clockevents_shutdown(new); } local_irq_restore(flags); @@ -693,7 +700,7 @@ int clockevents_notify(unsigned long reason, void *arg) if (cpumask_test_cpu(cpu, dev->cpumask) && cpumask_weight(dev->cpumask) == 1 && !tick_is_broadcast_device(dev)) { - BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); + BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED); list_del(&dev->list); } } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 542d5bb..f0f8ee9 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -303,7 +303,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) /* * The device is in periodic mode. No reprogramming necessary: */ - if (dev->mode == CLOCK_EVT_MODE_PERIODIC) + if (dev->state == CLOCK_EVT_STATE_PERIODIC) goto unlock; /* @@ -532,8 +532,8 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, { int ret; - if (bc->mode != CLOCK_EVT_MODE_ONESHOT) - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); + if (bc->state != CLOCK_EVT_STATE_ONESHOT) + clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); ret = clockevents_program_event(bc, expires, force); if (!ret) @@ -543,7 +543,7 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); return 0; } @@ -562,8 +562,8 @@ void tick_check_oneshot_broadcast_this_cpu(void) * switched over, leave the device alone. */ if (td->mode == TICKDEV_MODE_ONESHOT) { - clockevents_set_mode(td->evtdev, - CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(td->evtdev, + CLOCK_EVT_STATE_ONESHOT); } } } @@ -666,7 +666,7 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, if (dev->next_event.tv64 < bc->next_event.tv64) return; } - clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); + clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } static void broadcast_move_bc(int deadcpu) @@ -741,7 +741,7 @@ int tick_broadcast_oneshot_control(unsigned long reason) cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { - clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); /* * The cpu which was handling the broadcast * timer marked this cpu in the broadcast @@ -842,7 +842,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) /* Set it up only once ! */ if (bc->event_handler != tick_handle_oneshot_broadcast) { - int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; + int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC; bc->event_handler = tick_handle_oneshot_broadcast; @@ -858,7 +858,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) tick_broadcast_oneshot_mask, tmpmask); if (was_periodic && !cpumask_empty(tmpmask)) { - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); tick_broadcast_init_next_event(tmpmask, tick_next_period); tick_broadcast_set_event(bc, cpu, tick_next_period, 1); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 5c50664..a5b8771 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -102,7 +102,7 @@ void tick_handle_periodic(struct clock_event_device *dev) tick_periodic(cpu); - if (dev->mode != CLOCK_EVT_MODE_ONESHOT) + if (dev->state != CLOCK_EVT_STATE_ONESHOT) return; for (;;) { /* @@ -140,7 +140,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && !tick_broadcast_oneshot_active()) { - clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); + clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC); } else { unsigned long seq; ktime_t next; @@ -150,7 +150,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) next = tick_next_period; } while (read_seqretry(&jiffies_lock, seq)); - clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); for (;;) { if (!clockevents_program_event(dev, next, false)) @@ -365,6 +365,7 @@ void tick_shutdown(unsigned int *cpup) * Prevent that the clock events layer tries to call * the set mode function! */ + dev->state = CLOCK_EVT_STATE_DETACHED; dev->mode = CLOCK_EVT_MODE_UNUSED; clockevents_exchange_device(dev, NULL); dev->event_handler = clockevents_handle_noop; diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 7ce740e..67a64b1 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -38,7 +38,7 @@ void tick_resume_oneshot(void) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); - clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(dev, ktime_get(), true); } @@ -50,7 +50,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev, ktime_t next_event) { newdev->event_handler = handler; - clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(newdev, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(newdev, next_event, true); } @@ -81,7 +81,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) td->mode = TICKDEV_MODE_ONESHOT; dev->event_handler = handler; - clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); tick_broadcast_switch_to_oneshot(); return 0; } diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 2b3e939..05aa559 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -233,21 +233,21 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) print_name_offset(m, dev->set_mode); SEQ_printf(m, "\n"); } else { - if (dev->set_mode_shutdown) { + if (dev->set_state_shutdown) { SEQ_printf(m, " shutdown: "); - print_name_offset(m, dev->set_mode_shutdown); + print_name_offset(m, dev->set_state_shutdown); SEQ_printf(m, "\n"); } - if (dev->set_mode_periodic) { + if (dev->set_state_periodic) { SEQ_printf(m, " periodic: "); - print_name_offset(m, dev->set_mode_periodic); + print_name_offset(m, dev->set_state_periodic); SEQ_printf(m, "\n"); } - if (dev->set_mode_oneshot) { + if (dev->set_state_oneshot) { SEQ_printf(m, " oneshot: "); - print_name_offset(m, dev->set_mode_oneshot); + print_name_offset(m, dev->set_state_oneshot); SEQ_printf(m, "\n"); } -- cgit v1.1 From de81e64b250d3865a75d221a80b4311e3273670a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 27 Feb 2015 17:21:34 +0530 Subject: clockevents: Don't validate dev->mode against CLOCK_EVT_MODE_UNUSED for new interface It was a requirement in the legacy interface that drivers must initialize ->mode field to 'CLOCK_EVT_MODE_UNUSED'. This field isn't used anymore by the new interface and so should be only checked for the legacy interface. Probably it can be dropped as well as core doesn't rely on it anymore, but lets keep it to support legacy interface. Signed-off-by: Viresh Kumar Acked-by: Peter Zijlstra Cc: Daniel Lezcano Cc: Frederic Weisbecker Cc: Kevin Hilman Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: linaro-kernel@lists.linaro.org Cc: linaro-networking@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/c6604fa1a77fe1fc8dcab87769857228fb1dadd5.1425037853.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 6e53e9a..73689df 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -450,6 +450,8 @@ static int clockevents_sanity_check(struct clock_event_device *dev) /* We shouldn't be supporting new modes now */ WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || dev->set_state_shutdown || dev->tick_resume); + + BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); return 0; } @@ -479,7 +481,6 @@ void clockevents_register_device(struct clock_event_device *dev) { unsigned long flags; - BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); BUG_ON(clockevents_sanity_check(dev)); /* Initialize state to DETACHED */ -- cgit v1.1 From ad834a346853b7ee7c684efa56b1d415a850a710 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 30 Mar 2015 22:17:13 +0200 Subject: clocksource/drivers/efm32: Use CLOCK_EVT_FEAT_PERIODIC for defining features We have used CLOCK_EVT_MODE_PERIODIC instead of CLOCK_EVT_FEAT_PERIODIC while defining features. Fix it. Signed-off-by: Viresh Kumar Signed-off-by: Daniel Lezcano Acked-by: Uwe Kleine-Koenig Cc: ben.dooks@codethink.co.uk Cc: digetx@gmail.com Cc: hdegoede@redhat.com Cc: laurent.pinchart+renesas@ideasonboard.com Cc: linux-arm-kernel@lists.infradead.org Cc: maxime.ripard@free-electrons.com Link: http://lkml.kernel.org/r/1427746633-9137-11-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/time-efm32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/time-efm32.c b/drivers/clocksource/time-efm32.c index ec57ba2..5b6e3d5 100644 --- a/drivers/clocksource/time-efm32.c +++ b/drivers/clocksource/time-efm32.c @@ -111,7 +111,7 @@ static irqreturn_t efm32_clock_event_handler(int irq, void *dev_id) static struct efm32_clock_event_ddata clock_event_ddata = { .evtdev = { .name = "efm32 clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_MODE_PERIODIC, + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, .set_mode = efm32_clock_event_set_mode, .set_next_event = efm32_clock_event_set_next_event, .rating = 200, -- cgit v1.1 From 3a10013b6a5975346fe5a8db59500a8d176d64e0 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Mar 2015 22:17:12 +0200 Subject: clocksource/drivers/dw_apb_timers_of: Fix IO endianness causing time jumps The dw_apb_timer_of timer is using __raw_readl() to access the timer register, which is causing issues when the system is running in big endian mode. Fix this by using readl_relaxed() which should account for the endian settings. This fixes issues where the time jumps around in the dmesg output due to returnling __le32 values. For an example, these two console lines show time running backwards: [ 49.882572] CPU1: failed to come online [ 43.282457] Brought up 1 CPUs Signed-off-by: Ben Dooks Signed-off-by: Daniel Lezcano Acked-by: Nicolas Ferre Cc: Dinh Nguyen Cc: Linux ARM Kernel Cc: Thomas Gleixner Cc: digetx@gmail.com Cc: hdegoede@redhat.com Cc: laurent.pinchart+renesas@ideasonboard.com Cc: maxime.ripard@free-electrons.com Cc: viresh.kumar@linaro.org Link: http://lkml.kernel.org/r/1427746633-9137-10-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/dw_apb_timer_of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c index d305fb0..a19a3f6 100644 --- a/drivers/clocksource/dw_apb_timer_of.c +++ b/drivers/clocksource/dw_apb_timer_of.c @@ -108,7 +108,7 @@ static void __init add_clocksource(struct device_node *source_timer) static u64 notrace read_sched_clock(void) { - return ~__raw_readl(sched_io_base); + return ~readl_relaxed(sched_io_base); } static const struct of_device_id sptimer_ids[] __initconst = { -- cgit v1.1 From 59196bcef5c1034f020fe5bf3579f89767537246 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 30 Mar 2015 22:17:11 +0200 Subject: clocksource/drivers/tegra: Fix IO endianness Support big-endian kernel by using endian-aware register access functions. Signed-off-by: Dmitry Osipenko Signed-off-by: Daniel Lezcano Acked-by: Thierry Reding Cc: ben.dooks@codethink.co.uk Cc: hdegoede@redhat.com Cc: laurent.pinchart+renesas@ideasonboard.com Cc: linux-arm-kernel@lists.infradead.org Cc: maxime.ripard@free-electrons.com Cc: viresh.kumar@linaro.org Link: http://lkml.kernel.org/r/1427746633-9137-9-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/tegra20_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c index d2616ef..d8a3a4e 100644 --- a/drivers/clocksource/tegra20_timer.c +++ b/drivers/clocksource/tegra20_timer.c @@ -57,9 +57,9 @@ static u64 persistent_ms, last_persistent_ms; static struct delay_timer tegra_delay_timer; #define timer_writel(value, reg) \ - __raw_writel(value, timer_reg_base + (reg)) + writel_relaxed(value, timer_reg_base + (reg)) #define timer_readl(reg) \ - __raw_readl(timer_reg_base + (reg)) + readl_relaxed(timer_reg_base + (reg)) static int tegra_timer_set_next_event(unsigned long cycles, struct clock_event_device *evt) -- cgit v1.1 From 37b8b003c483191a327a740fc32c1993959e7ae9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 30 Mar 2015 22:17:10 +0200 Subject: clocksource/drivers/sun4i-timer: Only register a sched_clock on sun4i and sun5i sun6i and newer have an arm arch timer which is a better sched_clock source then the sun4i-timer, and sched_clock does not have priorities, so do not register the sun4i-timer sched_clock at all on sun6i and newer. Signed-off-by: Hans de Goede Signed-off-by: Daniel Lezcano Acked-by: Maxime Ripard Cc: ben.dooks@codethink.co.uk Cc: digetx@gmail.com Cc: laurent.pinchart+renesas@ideasonboard.com Cc: linux-arm-kernel@lists.infradead.org Cc: viresh.kumar@linaro.org Link: http://lkml.kernel.org/r/1427746633-9137-8-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/sun4i_timer.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index f4a9c00..1928a89 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -170,7 +170,15 @@ static void __init sun4i_timer_init(struct device_node *node) TIMER_CTL_CLK_SRC(TIMER_CTL_CLK_SRC_OSC24M), timer_base + TIMER_CTL_REG(1)); - sched_clock_register(sun4i_timer_sched_read, 32, rate); + /* + * sched_clock_register does not have priorities, and on sun6i and + * later there is a better sched_clock registered by arm_arch_timer.c + */ + if (of_machine_is_compatible("allwinner,sun4i-a10") || + of_machine_is_compatible("allwinner,sun5i-a13") || + of_machine_is_compatible("allwinner,sun5i-a10s")) + sched_clock_register(sun4i_timer_sched_read, 32, rate); + clocksource_mmio_init(timer_base + TIMER_CNTVAL_REG(1), node->name, rate, 350, 32, clocksource_mmio_readl_down); -- cgit v1.1 From 4806c87f017d8a7003ad34886f58c3b9e023df6a Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Mar 2015 22:17:09 +0200 Subject: clocksource/drivers/at91: Fix IO endianness Fix the use of __raw IO accessor with the readl/writel_relaxed() versions to allow the code to be used on a system running in big endian mode. Signed-off-by: Ben Dooks Signed-off-by: Daniel Lezcano Acked-by: Nicolas Ferre Cc: Andrew Victor Cc: Jean-Christophe Plagniol-Villard Cc: Linux ARM Kernel Cc: Thomas Gleixner Cc: digetx@gmail.com Cc: hdegoede@redhat.com Cc: laurent.pinchart+renesas@ideasonboard.com Cc: maxime.ripard@free-electrons.com Cc: viresh.kumar@linaro.org Link: http://lkml.kernel.org/r/1427746633-9137-7-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-atmel-pit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c index b5b4d45..c0304ff 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -61,12 +61,12 @@ static inline struct pit_data *clkevt_to_pit_data(struct clock_event_device *clk static inline unsigned int pit_read(void __iomem *base, unsigned int reg_offset) { - return __raw_readl(base + reg_offset); + return readl_relaxed(base + reg_offset); } static inline void pit_write(void __iomem *base, unsigned int reg_offset, unsigned long value) { - __raw_writel(value, base + reg_offset); + writel_relaxed(value, base + reg_offset); } /* -- cgit v1.1 From 566e6dfad580a55084c29fe3e887c7273b16fc6a Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 31 Mar 2015 12:12:22 +0200 Subject: clocksource/drivers/arm_arch_timer: Rename 'arch_timer_probed()' to 'arch_timer_needs_probing()' to reflect behaviour The arch_timer_probed() function returns whether the given time doesn't need to be probed. This can be the case when the timer has been probed already, but also when it has no corresponding enabled node in DT. Rename the function to arch_timer_needs_probing() and invert its return value to better reflect the function's purpose and behaviour. Signed-off-by: Laurent Pinchart Signed-off-by: Daniel Lezcano Acked-by: Sudeep Holla Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1427796746-373-1-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/arm_arch_timer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a3025e7..2664696 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -661,17 +661,17 @@ static const struct of_device_id arch_timer_mem_of_match[] __initconst = { }; static bool __init -arch_timer_probed(int type, const struct of_device_id *matches) +arch_timer_needs_probing(int type, const struct of_device_id *matches) { struct device_node *dn; - bool probed = true; + bool needs_probing = false; dn = of_find_matching_node(NULL, matches); if (dn && of_device_is_available(dn) && !(arch_timers_present & type)) - probed = false; + needs_probing = true; of_node_put(dn); - return probed; + return needs_probing; } static void __init arch_timer_common_init(void) @@ -680,9 +680,9 @@ static void __init arch_timer_common_init(void) /* Wait until both nodes are probed if we have two timers */ if ((arch_timers_present & mask) != mask) { - if (!arch_timer_probed(ARCH_MEM_TIMER, arch_timer_mem_of_match)) + if (arch_timer_needs_probing(ARCH_MEM_TIMER, arch_timer_mem_of_match)) return; - if (!arch_timer_probed(ARCH_CP15_TIMER, arch_timer_of_match)) + if (arch_timer_needs_probing(ARCH_CP15_TIMER, arch_timer_of_match)) return; } -- cgit v1.1 From 5673bc5a863bd4391eab5bb85277f0f1dd1dca50 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 31 Mar 2015 12:12:23 +0200 Subject: clocksource/drivers/sun5i: Switch to request_irq() The current code uses setup_irq(), while it could perfectly use the much simpler request_irq(). Switch to that. Signed-off-by: Maxime Ripard Signed-off-by: Daniel Lezcano Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1427796746-373-2-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-sun5i.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 58597fb..03f04d8 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -129,13 +129,6 @@ static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static struct irqaction sun5i_timer_irq = { - .name = "sun5i_timer0", - .flags = IRQF_TIMER | IRQF_IRQPOLL, - .handler = sun5i_timer_interrupt, - .dev_id = &sun5i_clockevent, -}; - static void __init sun5i_timer_init(struct device_node *node) { struct reset_control *rstc; @@ -181,7 +174,8 @@ static void __init sun5i_timer_init(struct device_node *node) clockevents_config_and_register(&sun5i_clockevent, rate, TIMER_SYNC_TICKS, 0xffffffff); - ret = setup_irq(irq, &sun5i_timer_irq); + ret = request_irq(irq, sun5i_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL, + "sun5i_timer0", &sun5i_clockevent); if (ret) pr_warn("failed to setup irq %d\n", irq); } -- cgit v1.1 From a45860d0ba433c217d359fa2cc2a4984d18ce263 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 31 Mar 2015 12:12:24 +0200 Subject: clocksource/drivers/sun5i: Use of_io_request_and_map() of_iomap doesn't do a request_mem_region() on the memory area defined in the DT it maps. Switch to of_io_request_and_map() to make sure we're the only users. Signed-off-by: Maxime Ripard Signed-off-by: Daniel Lezcano Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1427796746-373-3-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-sun5i.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 03f04d8..87f7b81 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -137,7 +137,7 @@ static void __init sun5i_timer_init(struct device_node *node) int ret, irq; u32 val; - timer_base = of_iomap(node, 0); + timer_base = of_io_request_and_map(node, 0, of_node_full_name(node)); if (!timer_base) panic("Can't map registers"); -- cgit v1.1 From 4a59058f0b09682200c04b1db236b4a3b92128d7 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 31 Mar 2015 12:12:25 +0200 Subject: clocksource/drivers/sun5i: Refactor the current code Refactor the code in order to remove the global variables and split the clock source and clock events registration in order to ease the addition of the clock notifiers needed to handle the parent clock rate changes. Signed-off-by: Maxime Ripard Signed-off-by: Daniel Lezcano Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1427796746-373-4-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-sun5i.c | 231 +++++++++++++++++++++++++++----------- 1 file changed, 166 insertions(+), 65 deletions(-) diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 87f7b81..2330090 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -36,8 +37,27 @@ #define TIMER_SYNC_TICKS 3 -static void __iomem *timer_base; -static u32 ticks_per_jiffy; +struct sun5i_timer { + void __iomem *base; + struct clk *clk; + u32 ticks_per_jiffy; +}; + +struct sun5i_timer_clksrc { + struct sun5i_timer timer; + struct clocksource clksrc; +}; + +#define to_sun5i_timer_clksrc(x) \ + container_of(x, struct sun5i_timer_clksrc, clksrc) + +struct sun5i_timer_clkevt { + struct sun5i_timer timer; + struct clock_event_device clkevt; +}; + +#define to_sun5i_timer_clkevt(x) \ + container_of(x, struct sun5i_timer_clkevt, clkevt) /* * When we disable a timer, we need to wait at least for 2 cycles of @@ -45,30 +65,30 @@ static u32 ticks_per_jiffy; * that is already setup and runs at the same frequency than the other * timers, and we never will be disabled. */ -static void sun5i_clkevt_sync(void) +static void sun5i_clkevt_sync(struct sun5i_timer_clkevt *ce) { - u32 old = readl(timer_base + TIMER_CNTVAL_LO_REG(1)); + u32 old = readl(ce->timer.base + TIMER_CNTVAL_LO_REG(1)); - while ((old - readl(timer_base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS) + while ((old - readl(ce->timer.base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS) cpu_relax(); } -static void sun5i_clkevt_time_stop(u8 timer) +static void sun5i_clkevt_time_stop(struct sun5i_timer_clkevt *ce, u8 timer) { - u32 val = readl(timer_base + TIMER_CTL_REG(timer)); - writel(val & ~TIMER_CTL_ENABLE, timer_base + TIMER_CTL_REG(timer)); + u32 val = readl(ce->timer.base + TIMER_CTL_REG(timer)); + writel(val & ~TIMER_CTL_ENABLE, ce->timer.base + TIMER_CTL_REG(timer)); - sun5i_clkevt_sync(); + sun5i_clkevt_sync(ce); } -static void sun5i_clkevt_time_setup(u8 timer, u32 delay) +static void sun5i_clkevt_time_setup(struct sun5i_timer_clkevt *ce, u8 timer, u32 delay) { - writel(delay, timer_base + TIMER_INTVAL_LO_REG(timer)); + writel(delay, ce->timer.base + TIMER_INTVAL_LO_REG(timer)); } -static void sun5i_clkevt_time_start(u8 timer, bool periodic) +static void sun5i_clkevt_time_start(struct sun5i_timer_clkevt *ce, u8 timer, bool periodic) { - u32 val = readl(timer_base + TIMER_CTL_REG(timer)); + u32 val = readl(ce->timer.base + TIMER_CTL_REG(timer)); if (periodic) val &= ~TIMER_CTL_ONESHOT; @@ -76,66 +96,170 @@ static void sun5i_clkevt_time_start(u8 timer, bool periodic) val |= TIMER_CTL_ONESHOT; writel(val | TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, - timer_base + TIMER_CTL_REG(timer)); + ce->timer.base + TIMER_CTL_REG(timer)); } static void sun5i_clkevt_mode(enum clock_event_mode mode, - struct clock_event_device *clk) + struct clock_event_device *clkevt) { + struct sun5i_timer_clkevt *ce = to_sun5i_timer_clkevt(clkevt); + switch (mode) { case CLOCK_EVT_MODE_PERIODIC: - sun5i_clkevt_time_stop(0); - sun5i_clkevt_time_setup(0, ticks_per_jiffy); - sun5i_clkevt_time_start(0, true); + sun5i_clkevt_time_stop(ce, 0); + sun5i_clkevt_time_setup(ce, 0, ce->timer.ticks_per_jiffy); + sun5i_clkevt_time_start(ce, 0, true); break; case CLOCK_EVT_MODE_ONESHOT: - sun5i_clkevt_time_stop(0); - sun5i_clkevt_time_start(0, false); + sun5i_clkevt_time_stop(ce, 0); + sun5i_clkevt_time_start(ce, 0, false); break; case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: default: - sun5i_clkevt_time_stop(0); + sun5i_clkevt_time_stop(ce, 0); break; } } static int sun5i_clkevt_next_event(unsigned long evt, - struct clock_event_device *unused) + struct clock_event_device *clkevt) { - sun5i_clkevt_time_stop(0); - sun5i_clkevt_time_setup(0, evt - TIMER_SYNC_TICKS); - sun5i_clkevt_time_start(0, false); + struct sun5i_timer_clkevt *ce = to_sun5i_timer_clkevt(clkevt); + + sun5i_clkevt_time_stop(ce, 0); + sun5i_clkevt_time_setup(ce, 0, evt - TIMER_SYNC_TICKS); + sun5i_clkevt_time_start(ce, 0, false); return 0; } -static struct clock_event_device sun5i_clockevent = { - .name = "sun5i_tick", - .rating = 340, - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = sun5i_clkevt_mode, - .set_next_event = sun5i_clkevt_next_event, -}; - - static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id) { - struct clock_event_device *evt = (struct clock_event_device *)dev_id; + struct sun5i_timer_clkevt *ce = (struct sun5i_timer_clkevt *)dev_id; - writel(0x1, timer_base + TIMER_IRQ_ST_REG); - evt->event_handler(evt); + writel(0x1, ce->timer.base + TIMER_IRQ_ST_REG); + ce->clkevt.event_handler(&ce->clkevt); return IRQ_HANDLED; } +static cycle_t sun5i_clksrc_read(struct clocksource *clksrc) +{ + struct sun5i_timer_clksrc *cs = to_sun5i_timer_clksrc(clksrc); + + return ~readl(cs->timer.base + TIMER_CNTVAL_LO_REG(1)); +} + +static int __init sun5i_setup_clocksource(struct device_node *node, + void __iomem *base, + struct clk *clk, int irq) +{ + struct sun5i_timer_clksrc *cs; + unsigned long rate; + int ret; + + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) + return -ENOMEM; + + ret = clk_prepare_enable(clk); + if (ret) { + pr_err("Couldn't enable parent clock\n"); + goto err_free; + } + + rate = clk_get_rate(clk); + + cs->timer.base = base; + cs->timer.clk = clk; + + writel(~0, base + TIMER_INTVAL_LO_REG(1)); + writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, + base + TIMER_CTL_REG(1)); + + cs->clksrc.name = node->name; + cs->clksrc.rating = 340; + cs->clksrc.read = sun5i_clksrc_read; + cs->clksrc.mask = CLOCKSOURCE_MASK(32); + cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS; + + ret = clocksource_register_hz(&cs->clksrc, rate); + if (ret) { + pr_err("Couldn't register clock source.\n"); + goto err_disable_clk; + } + + return 0; + +err_disable_clk: + clk_disable_unprepare(clk); +err_free: + kfree(cs); + return ret; +} + +static int __init sun5i_setup_clockevent(struct device_node *node, void __iomem *base, + struct clk *clk, int irq) +{ + struct sun5i_timer_clkevt *ce; + unsigned long rate; + int ret; + u32 val; + + ce = kzalloc(sizeof(*ce), GFP_KERNEL); + if (!ce) + return -ENOMEM; + + ret = clk_prepare_enable(clk); + if (ret) { + pr_err("Couldn't enable parent clock\n"); + goto err_free; + } + + rate = clk_get_rate(clk); + + ce->timer.base = base; + ce->timer.ticks_per_jiffy = DIV_ROUND_UP(rate, HZ); + ce->timer.clk = clk; + + ce->clkevt.name = node->name; + ce->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; + ce->clkevt.set_next_event = sun5i_clkevt_next_event; + ce->clkevt.set_mode = sun5i_clkevt_mode; + ce->clkevt.rating = 340; + ce->clkevt.irq = irq; + ce->clkevt.cpumask = cpu_possible_mask; + + /* Enable timer0 interrupt */ + val = readl(base + TIMER_IRQ_EN_REG); + writel(val | TIMER_IRQ_EN(0), base + TIMER_IRQ_EN_REG); + + clockevents_config_and_register(&ce->clkevt, rate, + TIMER_SYNC_TICKS, 0xffffffff); + + ret = request_irq(irq, sun5i_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL, + "sun5i_timer0", ce); + if (ret) { + pr_err("Unable to register interrupt\n"); + goto err_disable_clk; + } + + return 0; + +err_disable_clk: + clk_disable_unprepare(clk); +err_free: + kfree(ce); + return ret; +} + static void __init sun5i_timer_init(struct device_node *node) { struct reset_control *rstc; - unsigned long rate; + void __iomem *timer_base; struct clk *clk; - int ret, irq; - u32 val; + int irq; timer_base = of_io_request_and_map(node, 0, of_node_full_name(node)); if (!timer_base) @@ -148,36 +272,13 @@ static void __init sun5i_timer_init(struct device_node *node) clk = of_clk_get(node, 0); if (IS_ERR(clk)) panic("Can't get timer clock"); - clk_prepare_enable(clk); - rate = clk_get_rate(clk); rstc = of_reset_control_get(node, NULL); if (!IS_ERR(rstc)) reset_control_deassert(rstc); - writel(~0, timer_base + TIMER_INTVAL_LO_REG(1)); - writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, - timer_base + TIMER_CTL_REG(1)); - - clocksource_mmio_init(timer_base + TIMER_CNTVAL_LO_REG(1), node->name, - rate, 340, 32, clocksource_mmio_readl_down); - - ticks_per_jiffy = DIV_ROUND_UP(rate, HZ); - - /* Enable timer0 interrupt */ - val = readl(timer_base + TIMER_IRQ_EN_REG); - writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG); - - sun5i_clockevent.cpumask = cpu_possible_mask; - sun5i_clockevent.irq = irq; - - clockevents_config_and_register(&sun5i_clockevent, rate, - TIMER_SYNC_TICKS, 0xffffffff); - - ret = request_irq(irq, sun5i_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL, - "sun5i_timer0", &sun5i_clockevent); - if (ret) - pr_warn("failed to setup irq %d\n", irq); + sun5i_setup_clocksource(node, timer_base, clk, irq); + sun5i_setup_clockevent(node, timer_base, clk, irq); } CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer", sun5i_timer_init); -- cgit v1.1 From 3071efa466b30636bf958f3d13bc808e03105cd8 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 31 Mar 2015 12:12:26 +0200 Subject: clocksource/drivers/sun5i: Add clock notifiers The parent clock of the sun5i timer is the AHB clock, which rate might change because of other devices requirements. This is for example the case on the Allwinner A31, where the DMA controller needs a minimum rate higher than the default, that is enforced after the timer driver has probed. Add clock notifiers to make sure we reflect the clock rate changes in the timer rates. Signed-off-by: Maxime Ripard Signed-off-by: Daniel Lezcano Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1427796746-373-5-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-sun5i.c | 66 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 2330090..28aa4b7 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -40,9 +40,13 @@ struct sun5i_timer { void __iomem *base; struct clk *clk; + struct notifier_block clk_rate_cb; u32 ticks_per_jiffy; }; +#define to_sun5i_timer(x) \ + container_of(x, struct sun5i_timer, clk_rate_cb) + struct sun5i_timer_clksrc { struct sun5i_timer timer; struct clocksource clksrc; @@ -151,6 +155,29 @@ static cycle_t sun5i_clksrc_read(struct clocksource *clksrc) return ~readl(cs->timer.base + TIMER_CNTVAL_LO_REG(1)); } +static int sun5i_rate_cb_clksrc(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct clk_notifier_data *ndata = data; + struct sun5i_timer *timer = to_sun5i_timer(nb); + struct sun5i_timer_clksrc *cs = container_of(timer, struct sun5i_timer_clksrc, timer); + + switch (event) { + case PRE_RATE_CHANGE: + clocksource_unregister(&cs->clksrc); + break; + + case POST_RATE_CHANGE: + clocksource_register_hz(&cs->clksrc, ndata->new_rate); + break; + + default: + break; + } + + return NOTIFY_DONE; +} + static int __init sun5i_setup_clocksource(struct device_node *node, void __iomem *base, struct clk *clk, int irq) @@ -173,6 +200,14 @@ static int __init sun5i_setup_clocksource(struct device_node *node, cs->timer.base = base; cs->timer.clk = clk; + cs->timer.clk_rate_cb.notifier_call = sun5i_rate_cb_clksrc; + cs->timer.clk_rate_cb.next = NULL; + + ret = clk_notifier_register(clk, &cs->timer.clk_rate_cb); + if (ret) { + pr_err("Unable to register clock notifier.\n"); + goto err_disable_clk; + } writel(~0, base + TIMER_INTVAL_LO_REG(1)); writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, @@ -187,11 +222,13 @@ static int __init sun5i_setup_clocksource(struct device_node *node, ret = clocksource_register_hz(&cs->clksrc, rate); if (ret) { pr_err("Couldn't register clock source.\n"); - goto err_disable_clk; + goto err_remove_notifier; } return 0; +err_remove_notifier: + clk_notifier_unregister(clk, &cs->timer.clk_rate_cb); err_disable_clk: clk_disable_unprepare(clk); err_free: @@ -199,6 +236,21 @@ err_free: return ret; } +static int sun5i_rate_cb_clkevt(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct clk_notifier_data *ndata = data; + struct sun5i_timer *timer = to_sun5i_timer(nb); + struct sun5i_timer_clkevt *ce = container_of(timer, struct sun5i_timer_clkevt, timer); + + if (event == POST_RATE_CHANGE) { + clockevents_update_freq(&ce->clkevt, ndata->new_rate); + ce->timer.ticks_per_jiffy = DIV_ROUND_UP(ndata->new_rate, HZ); + } + + return NOTIFY_DONE; +} + static int __init sun5i_setup_clockevent(struct device_node *node, void __iomem *base, struct clk *clk, int irq) { @@ -222,6 +274,14 @@ static int __init sun5i_setup_clockevent(struct device_node *node, void __iomem ce->timer.base = base; ce->timer.ticks_per_jiffy = DIV_ROUND_UP(rate, HZ); ce->timer.clk = clk; + ce->timer.clk_rate_cb.notifier_call = sun5i_rate_cb_clkevt; + ce->timer.clk_rate_cb.next = NULL; + + ret = clk_notifier_register(clk, &ce->timer.clk_rate_cb); + if (ret) { + pr_err("Unable to register clock notifier.\n"); + goto err_disable_clk; + } ce->clkevt.name = node->name; ce->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; @@ -242,11 +302,13 @@ static int __init sun5i_setup_clockevent(struct device_node *node, void __iomem "sun5i_timer0", ce); if (ret) { pr_err("Unable to register interrupt\n"); - goto err_disable_clk; + goto err_remove_notifier; } return 0; +err_remove_notifier: + clk_notifier_unregister(clk, &ce->timer.clk_rate_cb); err_disable_clk: clk_disable_unprepare(clk); err_free: -- cgit v1.1 From 9f083b74df3a7eaa100b456f2dc195512daf728e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:05:19 +0100 Subject: clockevents: Remove CONFIG_GENERIC_CLOCKEVENTS_BUILD This option was for simpler migration to the clock events code. Most architectures have been converted and the option has been disfunctional as a standalone option for quite some time. Remove it. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5021859.jl9OC1medj@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 9 +++------ kernel/time/Kconfig | 6 ------ kernel/time/Makefile | 6 ++---- kernel/time/clockevents.c | 3 --- kernel/time/tick-internal.h | 4 ++-- 5 files changed, 7 insertions(+), 21 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index e20232c..5797513 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -22,7 +22,7 @@ enum clock_event_nofitiers { CLOCK_EVT_NOTIFY_CPU_DEAD, }; -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD +#ifdef CONFIG_GENERIC_CLOCKEVENTS #include #include @@ -229,13 +229,9 @@ static inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) {}; #endif -#ifdef CONFIG_GENERIC_CLOCKEVENTS extern int clockevents_notify(unsigned long reason, void *arg); -#else -static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } -#endif -#else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */ +#else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void clockevents_suspend(void) {} static inline void clockevents_resume(void) {} @@ -243,6 +239,7 @@ static inline void clockevents_resume(void) {} static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } static inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) {}; +static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } #endif diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index d626dc9..579ce1b 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -33,12 +33,6 @@ config ARCH_USES_GETTIMEOFFSET config GENERIC_CLOCKEVENTS bool -# Migration helper. Builds, but does not invoke -config GENERIC_CLOCKEVENTS_BUILD - bool - default y - depends on GENERIC_CLOCKEVENTS - # Architecture can handle broadcast in a driver-agnostic way config ARCH_HAS_TICK_BROADCAST bool diff --git a/kernel/time/Makefile b/kernel/time/Makefile index c09c078..01f0312 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -2,15 +2,13 @@ obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o +obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o tick-common.o ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) obj-y += tick-broadcast.o obj-$(CONFIG_TICK_ONESHOT) += tick-broadcast-hrtimer.o endif obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o -obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o -obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o +obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o obj-$(CONFIG_TIMER_STATS) += timer_stats.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 73689df..3531bee 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -645,7 +645,6 @@ void clockevents_resume(void) dev->resume(dev); } -#ifdef CONFIG_GENERIC_CLOCKEVENTS /** * clockevents_notify - notification about relevant events * Returns 0 on success, any other value on error @@ -831,5 +830,3 @@ static int __init clockevents_init_sysfs(void) } device_initcall(clockevents_init_sysfs); #endif /* SYSFS */ - -#endif /* GENERIC_CLOCK_EVENTS */ diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 98700e4..c7b75be 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -10,7 +10,7 @@ extern seqlock_t jiffies_lock; #define CS_NAME_LEN 32 -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD +#ifdef CONFIG_GENERIC_CLOCKEVENTS #define TICK_DO_TIMER_NONE -1 #define TICK_DO_TIMER_BOOT -2 @@ -167,7 +167,7 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); -#endif +#endif /* GENERIC_CLOCKEVENTS */ extern void do_timer(unsigned long ticks); extern void update_wall_time(void); -- cgit v1.1 From bfb83b27519aa7ed9510f601a8f825a2c1484bc2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:06:04 +0100 Subject: tick: Move clocksource related stuff to timekeeping.h Move clocksource related stuff to timekeeping.h and remove the pointless include from ntp.c Signed-off-by: Thomas Gleixner [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/2714218.nM5AEfAHj0@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 2 +- kernel/time/jiffies.c | 2 +- kernel/time/ntp.c | 1 - kernel/time/tick-internal.h | 6 ------ kernel/time/timekeeping.h | 7 +++++++ 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c3be3c7..8b4010f 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -31,7 +31,7 @@ #include #include -#include "tick-internal.h" +#include "timekeeping.h" #include "timekeeping_internal.h" /** diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index c4bb518..347fecf 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -25,7 +25,7 @@ #include #include -#include "tick-internal.h" +#include "timekeeping.h" /* The Jiffies based clocksource is the lowest common * denominator clock source which should function on diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 0f60b08..9ad60d0 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -17,7 +17,6 @@ #include #include -#include "tick-internal.h" #include "ntp_internal.h" /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index c7b75be..cba5214 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -6,10 +6,6 @@ #include "timekeeping.h" -extern seqlock_t jiffies_lock; - -#define CS_NAME_LEN 32 - #ifdef CONFIG_GENERIC_CLOCKEVENTS #define TICK_DO_TIMER_NONE -1 @@ -169,5 +165,3 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); #endif /* GENERIC_CLOCKEVENTS */ -extern void do_timer(unsigned long ticks); -extern void update_wall_time(void); diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 1d91416..ead8794 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -19,4 +19,11 @@ extern void timekeeping_clocktai(struct timespec *ts); extern int timekeeping_suspend(void); extern void timekeeping_resume(void); +extern void do_timer(unsigned long ticks); +extern void update_wall_time(void); + +extern seqlock_t jiffies_lock; + +#define CS_NAME_LEN 32 + #endif -- cgit v1.1 From b7475eb599ddb2e8cab2dc86ff38a9507463ad6b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:06:47 +0100 Subject: tick: Simplify tick-internal.h tick-internal.h is pretty confusing as a lot of the stub inlines are there several times. Distangle the maze and make clear functional sections. Signed-off-by: Thomas Gleixner [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/16068264.vcNp79HLaT@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/tick-internal.h | 145 +++++++++++++++----------------------------- 1 file changed, 49 insertions(+), 96 deletions(-) diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index cba5214..d86eb8d 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -27,14 +27,19 @@ extern bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev); extern void tick_install_replacement(struct clock_event_device *dev); -extern void clockevents_shutdown(struct clock_event_device *dev); extern int clockevents_tick_resume(struct clock_event_device *dev); +/* Check, if the device is functional or a dummy for broadcast */ +static inline int tick_device_is_functional(struct clock_event_device *dev) +{ + return !(dev->features & CLOCK_EVT_FEAT_DUMMY); +} +extern void clockevents_shutdown(struct clock_event_device *dev); +extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); +#endif /* GENERIC_CLOCKEVENTS */ -/* - * NO_HZ / high resolution timer shared code - */ +/* Oneshot related functions */ #ifdef CONFIG_TICK_ONESHOT extern void tick_setup_oneshot(struct clock_event_device *newdev, void (*handler)(struct clock_event_device *), @@ -43,69 +48,19 @@ extern int tick_program_event(ktime_t expires, int force); extern void tick_oneshot_notify(void); extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); extern void tick_resume_oneshot(void); -# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); -extern int tick_broadcast_oneshot_control(unsigned long reason); -extern void tick_broadcast_switch_to_oneshot(void); -extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); -extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); -extern int tick_broadcast_oneshot_active(void); -extern void tick_check_oneshot_broadcast_this_cpu(void); -bool tick_broadcast_oneshot_available(void); -# else /* BROADCAST */ -static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) -{ - BUG(); -} -static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } -static inline void tick_broadcast_switch_to_oneshot(void) { } -static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } -static inline int tick_broadcast_oneshot_active(void) { return 0; } -static inline void tick_check_oneshot_broadcast_this_cpu(void) { } -static inline bool tick_broadcast_oneshot_available(void) { return true; } -# endif /* !BROADCAST */ - +static inline bool tick_oneshot_possible(void) { return true; } #else /* !ONESHOT */ static inline void tick_setup_oneshot(struct clock_event_device *newdev, void (*handler)(struct clock_event_device *), - ktime_t nextevt) -{ - BUG(); -} -static inline void tick_resume_oneshot(void) -{ - BUG(); -} -static inline int tick_program_event(ktime_t expires, int force) -{ - return 0; -} + ktime_t nextevt) { BUG(); } +static inline void tick_resume_oneshot(void) { BUG(); } +static inline int tick_program_event(ktime_t expires, int force) { return 0; } static inline void tick_oneshot_notify(void) { } -static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) -{ - BUG(); -} -static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } -static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } -static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) -{ - return 0; -} -static inline int tick_broadcast_oneshot_active(void) { return 0; } -static inline bool tick_broadcast_oneshot_available(void) { return false; } +static inline bool tick_oneshot_possible(void) { return false; } #endif /* !TICK_ONESHOT */ -/* NO_HZ_FULL internal */ -#ifdef CONFIG_NO_HZ_FULL -extern void tick_nohz_init(void); -# else -static inline void tick_nohz_init(void) { } -#endif - -/* - * Broadcasting support - */ +/* Broadcasting support */ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); extern void tick_install_broadcast_device(struct clock_event_device *dev); @@ -115,53 +70,51 @@ extern void tick_shutdown_broadcast(unsigned int *cpup); extern void tick_suspend_broadcast(void); extern int tick_resume_broadcast(void); extern void tick_broadcast_init(void); -extern void -tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); -int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); - +extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); +extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); #else /* !BROADCAST */ - -static inline void tick_install_broadcast_device(struct clock_event_device *dev) -{ -} - -static inline int tick_is_broadcast_device(struct clock_event_device *dev) -{ - return 0; -} -static inline int tick_device_uses_broadcast(struct clock_event_device *dev, - int cpu) -{ - return 0; -} +static inline void tick_install_broadcast_device(struct clock_event_device *dev) { } +static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; } +static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; } static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { } static inline void tick_shutdown_broadcast(unsigned int *cpup) { } static inline void tick_suspend_broadcast(void) { } static inline int tick_resume_broadcast(void) { return 0; } static inline void tick_broadcast_init(void) { } -static inline int tick_broadcast_update_freq(struct clock_event_device *dev, - u32 freq) { return -ENODEV; } +static inline int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { return -ENODEV; } -/* - * Set the periodic handler in non broadcast mode - */ -static inline void tick_set_periodic_handler(struct clock_event_device *dev, - int broadcast) +/* Set the periodic handler in non broadcast mode */ +static inline void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) { dev->event_handler = tick_handle_periodic; } #endif /* !BROADCAST */ -/* - * Check, if the device is functional or a dummy for broadcast - */ -static inline int tick_device_is_functional(struct clock_event_device *dev) -{ - return !(dev->features & CLOCK_EVT_FEAT_DUMMY); -} - -int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); - -#endif /* GENERIC_CLOCKEVENTS */ +/* Functions related to oneshot broadcasting */ +#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); +extern int tick_broadcast_oneshot_control(unsigned long reason); +extern void tick_broadcast_switch_to_oneshot(void); +extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); +extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); +extern int tick_broadcast_oneshot_active(void); +extern void tick_check_oneshot_broadcast_this_cpu(void); +bool tick_broadcast_oneshot_available(void); +#else /* BROADCAST && ONESHOT */ +static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } +static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } +static inline void tick_broadcast_switch_to_oneshot(void) { } +static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } +static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { return 0; } +static inline int tick_broadcast_oneshot_active(void) { return 0; } +static inline void tick_check_oneshot_broadcast_this_cpu(void) { } +static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } +#endif /* !BROADCAST && ONESHOT */ +/* NO_HZ_FULL internal */ +#ifdef CONFIG_NO_HZ_FULL +extern void tick_nohz_init(void); +# else +static inline void tick_nohz_init(void) { } +#endif -- cgit v1.1 From c1797baf6880174f899ce3960d0598f5bbeeb7ff Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:07:37 +0100 Subject: tick: Move core only declarations and functions to core No point to expose everything to the world. People just believe such functions can be abused for whatever purposes. Sigh. Signed-off-by: Thomas Gleixner [ Rebased on top of 4.0-rc5 ] Signed-off-by: Rafael J. Wysocki Cc: Nicolas Pitre Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/28017337.VbCUc39Gme@vostro.rjw.lan [ Merged to latest timers/core ] Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 15 ++--- include/linux/tick.h | 134 ++++++-------------------------------------- kernel/time/clocksource.c | 2 +- kernel/time/hrtimer.c | 2 +- kernel/time/tick-internal.h | 15 +++++ kernel/time/tick-sched.c | 7 ++- kernel/time/tick-sched.h | 64 +++++++++++++++++++++ kernel/time/timer_list.c | 2 +- 8 files changed, 112 insertions(+), 129 deletions(-) create mode 100644 kernel/time/tick-sched.h diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 5797513..bc3af82 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -193,15 +193,6 @@ extern void clockevents_config_and_register(struct clock_event_device *dev, extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); -extern void clockevents_exchange_device(struct clock_event_device *old, - struct clock_event_device *new); -extern void clockevents_set_state(struct clock_event_device *dev, - enum clock_event_state state); -extern int clockevents_program_event(struct clock_event_device *dev, - ktime_t expires, bool force); - -extern void clockevents_handle_noop(struct clock_event_device *dev); - static inline void clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) { @@ -209,6 +200,12 @@ clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) freq, minsec); } +/* Should be core only, but is abused by arm bl_switcher */ +extern void clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state); +extern int clockevents_program_event(struct clock_event_device *dev, + ktime_t expires, bool force); + extern void clockevents_suspend(void); extern void clockevents_resume(void); diff --git a/include/linux/tick.h b/include/linux/tick.h index 9c085dc..f9a2d26 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -1,7 +1,5 @@ -/* linux/include/linux/tick.h - * - * This file contains the structure definitions for tick related functions - * +/* + * Tick related global functions */ #ifndef _LINUX_TICK_H #define _LINUX_TICK_H @@ -9,13 +7,12 @@ #include #include #include -#include #include #include #include +/* ARM BL switcher abuse support */ #ifdef CONFIG_GENERIC_CLOCKEVENTS - enum tick_device_mode { TICKDEV_MODE_PERIODIC, TICKDEV_MODE_ONESHOT, @@ -25,133 +22,38 @@ struct tick_device { struct clock_event_device *evtdev; enum tick_device_mode mode; }; - -enum tick_nohz_mode { - NOHZ_MODE_INACTIVE, - NOHZ_MODE_LOWRES, - NOHZ_MODE_HIGHRES, -}; - -/** - * struct tick_sched - sched tick emulation and no idle tick control/stats - * @sched_timer: hrtimer to schedule the periodic tick in high - * resolution mode - * @last_tick: Store the last tick expiry time when the tick - * timer is modified for nohz sleeps. This is necessary - * to resume the tick timer operation in the timeline - * when the CPU returns from nohz sleep. - * @tick_stopped: Indicator that the idle tick has been stopped - * @idle_jiffies: jiffies at the entry to idle for idle time accounting - * @idle_calls: Total number of idle calls - * @idle_sleeps: Number of idle calls, where the sched tick was stopped - * @idle_entrytime: Time when the idle call was entered - * @idle_waketime: Time when the idle was interrupted - * @idle_exittime: Time when the idle state was left - * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped - * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding - * @sleep_length: Duration of the current idle sleep - * @do_timer_lst: CPU was the last one doing do_timer before going idle - */ -struct tick_sched { - struct hrtimer sched_timer; - unsigned long check_clocks; - enum tick_nohz_mode nohz_mode; - ktime_t last_tick; - int inidle; - int tick_stopped; - unsigned long idle_jiffies; - unsigned long idle_calls; - unsigned long idle_sleeps; - int idle_active; - ktime_t idle_entrytime; - ktime_t idle_waketime; - ktime_t idle_exittime; - ktime_t idle_sleeptime; - ktime_t iowait_sleeptime; - ktime_t sleep_length; - unsigned long last_jiffies; - unsigned long next_jiffies; - ktime_t idle_expires; - int do_timer_last; -}; - -extern void __init tick_init(void); -extern int tick_is_oneshot_available(void); extern struct tick_device *tick_get_device(int cpu); +#endif +#ifdef CONFIG_GENERIC_CLOCKEVENTS +extern void __init tick_init(void); extern void tick_freeze(void); extern void tick_unfreeze(void); +#else /* CONFIG_GENERIC_CLOCKEVENTS */ +static inline void tick_init(void) { } +static inline void tick_freeze(void) { } +static inline void tick_unfreeze(void) { } +#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -# ifdef CONFIG_HIGH_RES_TIMERS -extern int tick_init_highres(void); -extern int tick_program_event(ktime_t expires, int force); -extern void tick_setup_sched_timer(void); -# endif - -# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS -extern void tick_cancel_sched_timer(int cpu); -# else -static inline void tick_cancel_sched_timer(int cpu) { } -# endif - -# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -extern struct tick_device *tick_get_broadcast_device(void); -extern struct cpumask *tick_get_broadcast_mask(void); - -# ifdef CONFIG_TICK_ONESHOT -extern struct cpumask *tick_get_broadcast_oneshot_mask(void); -# endif - -# endif /* BROADCAST */ - -# ifdef CONFIG_TICK_ONESHOT -extern void tick_clock_notify(void); -extern int tick_check_oneshot_change(int allow_nohz); -extern struct tick_sched *tick_get_tick_sched(int cpu); +#ifdef CONFIG_TICK_ONESHOT extern void tick_irq_enter(void); -extern int tick_oneshot_mode_active(void); # ifndef arch_needs_cpu # define arch_needs_cpu() (0) # endif # else -static inline void tick_clock_notify(void) { } -static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } -static inline void tick_irq_enter(void) { } -static inline int tick_oneshot_mode_active(void) { return 0; } -# endif - -#else /* CONFIG_GENERIC_CLOCKEVENTS */ -static inline void tick_init(void) { } -static inline void tick_freeze(void) { } -static inline void tick_unfreeze(void) { } -static inline void tick_cancel_sched_timer(int cpu) { } -static inline void tick_clock_notify(void) { } -static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } static inline void tick_irq_enter(void) { } -static inline int tick_oneshot_mode_active(void) { return 0; } -#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ - -# ifdef CONFIG_NO_HZ_COMMON -DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); - -static inline int tick_nohz_tick_stopped(void) -{ - return __this_cpu_read(tick_cpu_sched.tick_stopped); -} +#endif +#ifdef CONFIG_NO_HZ_COMMON +extern int tick_nohz_tick_stopped(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); - -# else /* !CONFIG_NO_HZ_COMMON */ -static inline int tick_nohz_tick_stopped(void) -{ - return 0; -} - +#else /* !CONFIG_NO_HZ_COMMON */ +static inline int tick_nohz_tick_stopped(void) { return 0; } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } @@ -163,7 +65,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } -# endif /* !CONFIG_NO_HZ_COMMON */ +#endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 8b4010f..c3be3c7 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -31,7 +31,7 @@ #include #include -#include "timekeeping.h" +#include "tick-internal.h" #include "timekeeping_internal.h" /** diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index bee0c1f..721d29b 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -54,7 +54,7 @@ #include -#include "timekeeping.h" +#include "tick-internal.h" /* * The timer bases: diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index d86eb8d..dd2c45d 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -5,6 +5,7 @@ #include #include "timekeeping.h" +#include "tick-sched.h" #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -26,6 +27,7 @@ extern void tick_resume(void); extern bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev); extern void tick_install_replacement(struct clock_event_device *dev); +extern int tick_is_oneshot_available(void); extern int clockevents_tick_resume(struct clock_event_device *dev); /* Check, if the device is functional or a dummy for broadcast */ @@ -35,6 +37,9 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) } extern void clockevents_shutdown(struct clock_event_device *dev); +extern void clockevents_exchange_device(struct clock_event_device *old, + struct clock_event_device *new); +extern void clockevents_handle_noop(struct clock_event_device *dev); extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); #endif /* GENERIC_CLOCKEVENTS */ @@ -49,6 +54,10 @@ extern void tick_oneshot_notify(void); extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); extern void tick_resume_oneshot(void); static inline bool tick_oneshot_possible(void) { return true; } +extern int tick_oneshot_mode_active(void); +extern void tick_clock_notify(void); +extern int tick_check_oneshot_change(int allow_nohz); +extern int tick_init_highres(void); #else /* !ONESHOT */ static inline void tick_setup_oneshot(struct clock_event_device *newdev, @@ -58,6 +67,9 @@ static inline void tick_resume_oneshot(void) { BUG(); } static inline int tick_program_event(ktime_t expires, int force) { return 0; } static inline void tick_oneshot_notify(void) { } static inline bool tick_oneshot_possible(void) { return false; } +static inline int tick_oneshot_mode_active(void) { return 0; } +static inline void tick_clock_notify(void) { } +static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } #endif /* !TICK_ONESHOT */ /* Broadcasting support */ @@ -72,6 +84,8 @@ extern int tick_resume_broadcast(void); extern void tick_broadcast_init(void); extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); +extern struct tick_device *tick_get_broadcast_device(void); +extern struct cpumask *tick_get_broadcast_mask(void); #else /* !BROADCAST */ static inline void tick_install_broadcast_device(struct clock_event_device *dev) { } static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; } @@ -101,6 +115,7 @@ extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); +extern struct cpumask *tick_get_broadcast_oneshot_mask(void); #else /* BROADCAST && ONESHOT */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a4c4eda..9142591 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -34,7 +34,7 @@ /* * Per cpu nohz control structure */ -DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); +static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); /* * The time, when the last jiffy update happened. Protected by jiffies_lock. @@ -416,6 +416,11 @@ static int __init setup_tick_nohz(char *str) __setup("nohz=", setup_tick_nohz); +int tick_nohz_tick_stopped(void) +{ + return __this_cpu_read(tick_cpu_sched.tick_stopped); +} + /** * tick_nohz_update_jiffies - update jiffies when idle was interrupted * diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h new file mode 100644 index 0000000..9307432 --- /dev/null +++ b/kernel/time/tick-sched.h @@ -0,0 +1,64 @@ +#ifndef _TICK_SCHED_H +#define _TICK_SCHED_H + +#include + +enum tick_nohz_mode { + NOHZ_MODE_INACTIVE, + NOHZ_MODE_LOWRES, + NOHZ_MODE_HIGHRES, +}; + +/** + * struct tick_sched - sched tick emulation and no idle tick control/stats + * @sched_timer: hrtimer to schedule the periodic tick in high + * resolution mode + * @last_tick: Store the last tick expiry time when the tick + * timer is modified for nohz sleeps. This is necessary + * to resume the tick timer operation in the timeline + * when the CPU returns from nohz sleep. + * @tick_stopped: Indicator that the idle tick has been stopped + * @idle_jiffies: jiffies at the entry to idle for idle time accounting + * @idle_calls: Total number of idle calls + * @idle_sleeps: Number of idle calls, where the sched tick was stopped + * @idle_entrytime: Time when the idle call was entered + * @idle_waketime: Time when the idle was interrupted + * @idle_exittime: Time when the idle state was left + * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped + * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding + * @sleep_length: Duration of the current idle sleep + * @do_timer_lst: CPU was the last one doing do_timer before going idle + */ +struct tick_sched { + struct hrtimer sched_timer; + unsigned long check_clocks; + enum tick_nohz_mode nohz_mode; + ktime_t last_tick; + int inidle; + int tick_stopped; + unsigned long idle_jiffies; + unsigned long idle_calls; + unsigned long idle_sleeps; + int idle_active; + ktime_t idle_entrytime; + ktime_t idle_waketime; + ktime_t idle_exittime; + ktime_t idle_sleeptime; + ktime_t iowait_sleeptime; + ktime_t sleep_length; + unsigned long last_jiffies; + unsigned long next_jiffies; + ktime_t idle_expires; + int do_timer_last; +}; + +extern struct tick_sched *tick_get_tick_sched(int cpu); + +extern void tick_setup_sched_timer(void); +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS +extern void tick_cancel_sched_timer(int cpu); +#else +static inline void tick_cancel_sched_timer(int cpu) { } +#endif + +#endif diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 05aa559..e878c2e 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -16,10 +16,10 @@ #include #include #include -#include #include +#include "tick-internal.h" struct timer_list_iter { int cpu; -- cgit v1.1 From db6f672ef11d7a3c5aa128a3c3e57c92580a25f7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:08:27 +0100 Subject: clockevents: Remove extra local_irq_save() in clockevents_exchange_device() Called with 'clockevents_lock' held and interrupts disabled already. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/51005827.yXt5tjZMBs@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 3531bee..b730027 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -595,14 +595,12 @@ void clockevents_handle_noop(struct clock_event_device *dev) * @old: device to release (can be NULL) * @new: device to request (can be NULL) * - * Called from the notifier chain. clockevents_lock is held already + * Called from various tick functions with clockevents_lock held and + * interrupts disabled. */ void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new) { - unsigned long flags; - - local_irq_save(flags); /* * Caller releases a clock event device. We queue it into the * released list and do a notify add later. @@ -618,7 +616,6 @@ void clockevents_exchange_device(struct clock_event_device *old, BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED); clockevents_shutdown(new); } - local_irq_restore(flags); } /** -- cgit v1.1 From 4ffee521f36390c7720d493591b764ca35c8030b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:09:16 +0100 Subject: clockevents: Make suspend/resume calls explicit clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the suspend/resume() calls and invoke them directly from the call sites. No locking required at this point because these calls happen with interrupts disabled and a single cpu online. Signed-off-by: Thomas Gleixner [ Rebased on top of 4.0-rc5. ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/713674030.jVm1qaHuPf@vostro.rjw.lan [ Rebased on top of latest timers/core. ] Signed-off-by: Ingo Molnar --- arch/x86/xen/suspend.c | 11 ++++------- include/linux/clockchips.h | 2 -- include/linux/tick.h | 3 +++ kernel/time/clockevents.c | 9 --------- kernel/time/tick-common.c | 26 +++++++++++++++++++++++--- kernel/time/tick-internal.h | 3 ++- kernel/time/timekeeping.c | 6 ++---- 7 files changed, 34 insertions(+), 26 deletions(-) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index c4df9db..033e428 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,5 +1,5 @@ #include -#include +#include #include #include @@ -81,17 +81,14 @@ void xen_arch_post_suspend(int cancelled) static void xen_vcpu_notify_restore(void *data) { - unsigned long reason = (unsigned long)data; - /* Boot processor notified via generic timekeeping_resume() */ - if ( smp_processor_id() == 0) + if (smp_processor_id() == 0) return; - clockevents_notify(reason, NULL); + tick_resume(); } void xen_arch_resume(void) { - on_each_cpu(xen_vcpu_notify_restore, - (void *)CLOCK_EVT_NOTIFY_RESUME, 1); + on_each_cpu(xen_vcpu_notify_restore, NULL, 1); } diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index bc3af82..50ce975 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -16,8 +16,6 @@ enum clock_event_nofitiers { CLOCK_EVT_NOTIFY_BROADCAST_FORCE, CLOCK_EVT_NOTIFY_BROADCAST_ENTER, CLOCK_EVT_NOTIFY_BROADCAST_EXIT, - CLOCK_EVT_NOTIFY_SUSPEND, - CLOCK_EVT_NOTIFY_RESUME, CLOCK_EVT_NOTIFY_CPU_DYING, CLOCK_EVT_NOTIFY_CPU_DEAD, }; diff --git a/include/linux/tick.h b/include/linux/tick.h index f9a2d26..7e07e0e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -29,10 +29,13 @@ extern struct tick_device *tick_get_device(int cpu); extern void __init tick_init(void); extern void tick_freeze(void); extern void tick_unfreeze(void); +/* Should be core only, but XEN resume magic abuses this interface */ +extern void tick_resume(void); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_freeze(void) { } static inline void tick_unfreeze(void) { } +static inline void tick_resume(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #ifdef CONFIG_TICK_ONESHOT diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index b730027..7af6148 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -670,15 +670,6 @@ int clockevents_notify(unsigned long reason, void *arg) tick_handover_do_timer(arg); break; - case CLOCK_EVT_NOTIFY_SUSPEND: - tick_suspend(); - tick_suspend_broadcast(); - break; - - case CLOCK_EVT_NOTIFY_RESUME: - tick_resume(); - break; - case CLOCK_EVT_NOTIFY_CPU_DEAD: tick_shutdown_broadcast_oneshot(arg); tick_shutdown_broadcast(arg); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index a5b8771..1a60c2a 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -373,18 +373,39 @@ void tick_shutdown(unsigned int *cpup) } } +/** + * tick_suspend - Suspend the tick and the broadcast device + * + * Called from syscore_suspend() via timekeeping_suspend with only one + * CPU online and interrupts disabled or from tick_unfreeze() under + * tick_freeze_lock. + * + * No locks required. Nothing can change the per cpu device. + */ void tick_suspend(void) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); clockevents_shutdown(td->evtdev); + tick_suspend_broadcast(); } +/** + * tick_resume - Resume the tick and the broadcast device + * + * Called from syscore_resume() via timekeeping_resume with only one + * CPU online and interrupts disabled or from tick_unfreeze() under + * tick_freeze_lock. + * + * No locks required. Nothing can change the per cpu device. + */ void tick_resume(void) { - struct tick_device *td = this_cpu_ptr(&tick_cpu_device); - int broadcast = tick_resume_broadcast(); + struct tick_device *td; + int broadcast; + broadcast = tick_resume_broadcast(); + td = this_cpu_ptr(&tick_cpu_device); clockevents_tick_resume(td->evtdev); if (!broadcast) { @@ -416,7 +437,6 @@ void tick_freeze(void) timekeeping_suspend(); } else { tick_suspend(); - tick_suspend_broadcast(); } raw_spin_unlock(&tick_freeze_lock); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index dd2c45d..85a9571 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -23,7 +23,6 @@ extern void tick_check_new_device(struct clock_event_device *dev); extern void tick_handover_do_timer(int *cpup); extern void tick_shutdown(unsigned int *cpup); extern void tick_suspend(void); -extern void tick_resume(void); extern bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev); extern void tick_install_replacement(struct clock_event_device *dev); @@ -42,6 +41,8 @@ extern void clockevents_exchange_device(struct clock_event_device *old, extern void clockevents_handle_noop(struct clock_event_device *dev); extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); +#else +static inline void tick_suspend(void) { } #endif /* GENERIC_CLOCKEVENTS */ /* Oneshot related functions */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c3fcff0..5b12292 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1389,9 +1389,7 @@ void timekeeping_resume(void) touch_softlockup_watchdog(); - clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); - - /* Resume hrtimers */ + tick_resume(); hrtimers_resume(); } @@ -1444,7 +1442,7 @@ int timekeeping_suspend(void) write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); - clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); + tick_suspend(); clocksource_suspend(); clockevents_suspend(); -- cgit v1.1 From 080873ce2d1abd8c0a2b8c87bfa0762546a6b713 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:09:55 +0100 Subject: tick: Make tick_resume_broadcast_oneshot() static Solely used in tick-broadcast.c and the return value is hardcoded 0. Make it static and void. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1689058.QkHYDJSRKu@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/tick-broadcast.c | 7 ++++--- kernel/time/tick-internal.h | 2 -- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f0f8ee9..60e6c23 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -37,8 +37,10 @@ static int tick_broadcast_force; #ifdef CONFIG_TICK_ONESHOT static void tick_broadcast_clear_oneshot(int cpu); +static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); #else static inline void tick_broadcast_clear_oneshot(int cpu) { } +static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } #endif /* @@ -475,7 +477,7 @@ int tick_resume_broadcast(void) break; case TICKDEV_MODE_ONESHOT: if (!cpumask_empty(tick_broadcast_mask)) - broadcast = tick_resume_broadcast_oneshot(bc); + tick_resume_broadcast_oneshot(bc); break; } } @@ -541,10 +543,9 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, return ret; } -int tick_resume_broadcast_oneshot(struct clock_event_device *bc) +static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); - return 0; } /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 85a9571..5c9f0ee 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -112,7 +112,6 @@ extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); extern int tick_broadcast_oneshot_control(unsigned long reason); extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); -extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); @@ -122,7 +121,6 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } -static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { return 0; } static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline void tick_check_oneshot_broadcast_this_cpu(void) { } static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } -- cgit v1.1 From f46481d0a7cb942b84145acb80ad43bdb1ff8eb4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:11:04 +0100 Subject: tick/xen: Provide and use tick_suspend_local() and tick_resume_local() Xen calls on every cpu into tick_resume() which is just wrong. tick_resume() is for the syscore global suspend/resume invocation. What XEN really wants is a per cpu local resume function. Provide a tick_resume_local() function and use it in XEN. Also provide a complementary tick_suspend_local() and modify tick_unfreeze() and tick_freeze(), respectively, to use the new local tick resume/suspend functions. Signed-off-by: Thomas Gleixner [ Combined two patches, rebased, modified subject/changelog. ] Signed-off-by: Rafael J. Wysocki Cc: Boris Ostrovsky Cc: David Vrabel Cc: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1698741.eezk9tnXtG@vostro.rjw.lan [ Merged to latest timers/core. ] Signed-off-by: Ingo Molnar --- arch/x86/xen/suspend.c | 2 +- include/linux/tick.h | 6 ++--- kernel/time/tick-broadcast.c | 24 +++++++++++++------ kernel/time/tick-common.c | 55 ++++++++++++++++++++++++++++++-------------- kernel/time/tick-internal.h | 8 +++++-- 5 files changed, 65 insertions(+), 30 deletions(-) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 033e428..d949769 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -85,7 +85,7 @@ static void xen_vcpu_notify_restore(void *data) if (smp_processor_id() == 0) return; - tick_resume(); + tick_resume_local(); } void xen_arch_resume(void) diff --git a/include/linux/tick.h b/include/linux/tick.h index 7e07e0e..a3d4d28 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -29,13 +29,13 @@ extern struct tick_device *tick_get_device(int cpu); extern void __init tick_init(void); extern void tick_freeze(void); extern void tick_unfreeze(void); -/* Should be core only, but XEN resume magic abuses this interface */ -extern void tick_resume(void); +/* Should be core only, but XEN resume magic requires this */ +extern void tick_resume_local(void); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_freeze(void) { } static inline void tick_unfreeze(void) { } -static inline void tick_resume(void) { } +static inline void tick_resume_local(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #ifdef CONFIG_TICK_ONESHOT diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 60e6c23..19cfb38 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -455,11 +455,26 @@ void tick_suspend_broadcast(void) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -int tick_resume_broadcast(void) +/* + * This is called from tick_resume_local() on a resuming CPU. That's + * called from the core resume function, tick_unfreeze() and the magic XEN + * resume hackery. + * + * In none of these cases the broadcast device mode can change and the + * bit of the resuming CPU in the broadcast mask is safe as well. + */ +bool tick_resume_check_broadcast(void) +{ + if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) + return false; + else + return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); +} + +void tick_resume_broadcast(void) { struct clock_event_device *bc; unsigned long flags; - int broadcast = 0; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -472,8 +487,6 @@ int tick_resume_broadcast(void) case TICKDEV_MODE_PERIODIC: if (!cpumask_empty(tick_broadcast_mask)) tick_broadcast_start_periodic(bc); - broadcast = cpumask_test_cpu(smp_processor_id(), - tick_broadcast_mask); break; case TICKDEV_MODE_ONESHOT: if (!cpumask_empty(tick_broadcast_mask)) @@ -482,11 +495,8 @@ int tick_resume_broadcast(void) } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); - - return broadcast; } - #ifdef CONFIG_TICK_ONESHOT static cpumask_var_t tick_broadcast_oneshot_mask; diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 1a60c2a..da796d6 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -374,40 +374,32 @@ void tick_shutdown(unsigned int *cpup) } /** - * tick_suspend - Suspend the tick and the broadcast device + * tick_suspend_local - Suspend the local tick device * - * Called from syscore_suspend() via timekeeping_suspend with only one - * CPU online and interrupts disabled or from tick_unfreeze() under - * tick_freeze_lock. + * Called from the local cpu for freeze with interrupts disabled. * * No locks required. Nothing can change the per cpu device. */ -void tick_suspend(void) +static void tick_suspend_local(void) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); clockevents_shutdown(td->evtdev); - tick_suspend_broadcast(); } /** - * tick_resume - Resume the tick and the broadcast device + * tick_resume_local - Resume the local tick device * - * Called from syscore_resume() via timekeeping_resume with only one - * CPU online and interrupts disabled or from tick_unfreeze() under - * tick_freeze_lock. + * Called from the local CPU for unfreeze or XEN resume magic. * * No locks required. Nothing can change the per cpu device. */ -void tick_resume(void) +void tick_resume_local(void) { - struct tick_device *td; - int broadcast; + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + bool broadcast = tick_resume_check_broadcast(); - broadcast = tick_resume_broadcast(); - td = this_cpu_ptr(&tick_cpu_device); clockevents_tick_resume(td->evtdev); - if (!broadcast) { if (td->mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(td->evtdev, 0); @@ -416,6 +408,35 @@ void tick_resume(void) } } +/** + * tick_suspend - Suspend the tick and the broadcast device + * + * Called from syscore_suspend() via timekeeping_suspend with only one + * CPU online and interrupts disabled or from tick_unfreeze() under + * tick_freeze_lock. + * + * No locks required. Nothing can change the per cpu device. + */ +void tick_suspend(void) +{ + tick_suspend_local(); + tick_suspend_broadcast(); +} + +/** + * tick_resume - Resume the tick and the broadcast device + * + * Called from syscore_resume() via timekeeping_resume with only one + * CPU online and interrupts disabled. + * + * No locks required. Nothing can change the per cpu device. + */ +void tick_resume(void) +{ + tick_resume_broadcast(); + tick_resume_local(); +} + static DEFINE_RAW_SPINLOCK(tick_freeze_lock); static unsigned int tick_freeze_depth; @@ -436,7 +457,7 @@ void tick_freeze(void) if (tick_freeze_depth == num_online_cpus()) { timekeeping_suspend(); } else { - tick_suspend(); + tick_suspend_local(); } raw_spin_unlock(&tick_freeze_lock); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 5c9f0ee..6ba7bce 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -23,6 +23,7 @@ extern void tick_check_new_device(struct clock_event_device *dev); extern void tick_handover_do_timer(int *cpup); extern void tick_shutdown(unsigned int *cpup); extern void tick_suspend(void); +extern void tick_resume(void); extern bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev); extern void tick_install_replacement(struct clock_event_device *dev); @@ -43,6 +44,7 @@ extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); #else static inline void tick_suspend(void) { } +static inline void tick_resume(void) { } #endif /* GENERIC_CLOCKEVENTS */ /* Oneshot related functions */ @@ -81,7 +83,8 @@ extern int tick_is_broadcast_device(struct clock_event_device *dev); extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); extern void tick_shutdown_broadcast(unsigned int *cpup); extern void tick_suspend_broadcast(void); -extern int tick_resume_broadcast(void); +extern void tick_resume_broadcast(void); +extern bool tick_resume_check_broadcast(void); extern void tick_broadcast_init(void); extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); @@ -95,7 +98,8 @@ static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { } static inline void tick_shutdown_broadcast(unsigned int *cpup) { } static inline void tick_suspend_broadcast(void) { } -static inline int tick_resume_broadcast(void) { return 0; } +static inline void tick_resume_broadcast(void) { } +static inline bool tick_resume_check_broadcast(void) { return false; } static inline void tick_broadcast_init(void) { } static inline int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { return -ENODEV; } -- cgit v1.1 From 7270d11c56f594af4d166b2988421cd8ed933dc1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:11:52 +0100 Subject: arm/bL_switcher: Kill tick suspend hackery Use the new tick_suspend/resume_local() and get rid of the homebrewn implementation of these in the ARM bL switcher. The check for the cpumask is completely pointless. There is no harm to suspend a per cpu tick device unconditionally. If that's a real issue then we fix it proper at the core level and not with some completely undocumented hacks in some random core code. Move the tick internals to the core code, now that this nuisance is gone. Signed-off-by: Thomas Gleixner [ rjw: Rebase, changelog ] Signed-off-by: Rafael J. Wysocki Cc: Nicolas Pitre Cc: Peter Zijlstra Cc: Russell King Link: http://lkml.kernel.org/r/1655112.Ws17YsMfN7@vostro.rjw.lan Signed-off-by: Ingo Molnar --- arch/arm/common/bL_switcher.c | 16 ++-------------- include/linux/clockchips.h | 6 ------ include/linux/tick.h | 19 ++++--------------- kernel/time/tick-common.c | 2 +- kernel/time/tick-internal.h | 5 +++++ kernel/time/tick-sched.h | 10 ++++++++++ 6 files changed, 22 insertions(+), 36 deletions(-) diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index d4f970a..37dc0fe 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -151,8 +151,6 @@ static int bL_switch_to(unsigned int new_cluster_id) unsigned int mpidr, this_cpu, that_cpu; unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster; struct completion inbound_alive; - struct tick_device *tdev; - enum clock_event_state tdev_state; long volatile *handshake_ptr; int ipi_nr, ret; @@ -219,13 +217,7 @@ static int bL_switch_to(unsigned int new_cluster_id) /* redirect GIC's SGIs to our counterpart */ gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]); - tdev = tick_get_device(this_cpu); - if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu))) - tdev = NULL; - if (tdev) { - tdev_state = tdev->evtdev->state; - clockevents_set_state(tdev->evtdev, CLOCK_EVT_STATE_SHUTDOWN); - } + tick_suspend_local(); ret = cpu_pm_enter(); @@ -251,11 +243,7 @@ static int bL_switch_to(unsigned int new_cluster_id) ret = cpu_pm_exit(); - if (tdev) { - clockevents_set_state(tdev->evtdev, tdev_state); - clockevents_program_event(tdev->evtdev, - tdev->evtdev->next_event, 1); - } + tick_resume_local(); trace_cpu_migrate_finish(ktime_get_real_ns(), ib_mpidr); local_fiq_enable(); diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 50ce975..3ac7e2d 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -198,12 +198,6 @@ clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) freq, minsec); } -/* Should be core only, but is abused by arm bl_switcher */ -extern void clockevents_set_state(struct clock_event_device *dev, - enum clock_event_state state); -extern int clockevents_program_event(struct clock_event_device *dev, - ktime_t expires, bool force); - extern void clockevents_suspend(void); extern void clockevents_resume(void); diff --git a/include/linux/tick.h b/include/linux/tick.h index a3d4d28..589868b 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -11,30 +11,19 @@ #include #include -/* ARM BL switcher abuse support */ -#ifdef CONFIG_GENERIC_CLOCKEVENTS -enum tick_device_mode { - TICKDEV_MODE_PERIODIC, - TICKDEV_MODE_ONESHOT, -}; - -struct tick_device { - struct clock_event_device *evtdev; - enum tick_device_mode mode; -}; -extern struct tick_device *tick_get_device(int cpu); -#endif - #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void __init tick_init(void); extern void tick_freeze(void); extern void tick_unfreeze(void); -/* Should be core only, but XEN resume magic requires this */ +/* Should be core only, but ARM BL switcher requires it */ +extern void tick_suspend_local(void); +/* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_freeze(void) { } static inline void tick_unfreeze(void) { } +static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index da796d6..e28ba5c 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -380,7 +380,7 @@ void tick_shutdown(unsigned int *cpup) * * No locks required. Nothing can change the per cpu device. */ -static void tick_suspend_local(void) +void tick_suspend_local(void) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 6ba7bce..5fc2daf 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -28,6 +28,7 @@ extern bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev); extern void tick_install_replacement(struct clock_event_device *dev); extern int tick_is_oneshot_available(void); +extern struct tick_device *tick_get_device(int cpu); extern int clockevents_tick_resume(struct clock_event_device *dev); /* Check, if the device is functional or a dummy for broadcast */ @@ -39,6 +40,10 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) extern void clockevents_shutdown(struct clock_event_device *dev); extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); +extern void clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state); +extern int clockevents_program_event(struct clock_event_device *dev, + ktime_t expires, bool force); extern void clockevents_handle_noop(struct clock_event_device *dev); extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 9307432..28b5da3 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -3,6 +3,16 @@ #include +enum tick_device_mode { + TICKDEV_MODE_PERIODIC, + TICKDEV_MODE_ONESHOT, +}; + +struct tick_device { + struct clock_event_device *evtdev; + enum tick_device_mode mode; +}; + enum tick_nohz_mode { NOHZ_MODE_INACTIVE, NOHZ_MODE_LOWRES, -- cgit v1.1 From 3ae7a939165c6159afb3c09e1d7405b6d1807f2b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Apr 2015 11:26:06 +0200 Subject: tick: Further simplify tick-internal.h Move the broadcasting related section to the GENERIC_CLOCKEVENTS=y section - this also solves build failures on architectures that don't use generic clockevents yet. Also standardize include file style to make it easier to read, and use nesting depth aware preprocessor directives to make future merges easier. Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/time/tick-internal.h | 79 +++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 5fc2daf..b6ba0a4 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -9,8 +9,8 @@ #ifdef CONFIG_GENERIC_CLOCKEVENTS -#define TICK_DO_TIMER_NONE -1 -#define TICK_DO_TIMER_BOOT -2 +# define TICK_DO_TIMER_NONE -1 +# define TICK_DO_TIMER_BOOT -2 DECLARE_PER_CPU(struct tick_device, tick_cpu_device); extern ktime_t tick_next_period; @@ -47,41 +47,9 @@ extern int clockevents_program_event(struct clock_event_device *dev, extern void clockevents_handle_noop(struct clock_event_device *dev); extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); -#else -static inline void tick_suspend(void) { } -static inline void tick_resume(void) { } -#endif /* GENERIC_CLOCKEVENTS */ - -/* Oneshot related functions */ -#ifdef CONFIG_TICK_ONESHOT -extern void tick_setup_oneshot(struct clock_event_device *newdev, - void (*handler)(struct clock_event_device *), - ktime_t nextevt); -extern int tick_program_event(ktime_t expires, int force); -extern void tick_oneshot_notify(void); -extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); -extern void tick_resume_oneshot(void); -static inline bool tick_oneshot_possible(void) { return true; } -extern int tick_oneshot_mode_active(void); -extern void tick_clock_notify(void); -extern int tick_check_oneshot_change(int allow_nohz); -extern int tick_init_highres(void); -#else /* !ONESHOT */ -static inline -void tick_setup_oneshot(struct clock_event_device *newdev, - void (*handler)(struct clock_event_device *), - ktime_t nextevt) { BUG(); } -static inline void tick_resume_oneshot(void) { BUG(); } -static inline int tick_program_event(ktime_t expires, int force) { return 0; } -static inline void tick_oneshot_notify(void) { } -static inline bool tick_oneshot_possible(void) { return false; } -static inline int tick_oneshot_mode_active(void) { return 0; } -static inline void tick_clock_notify(void) { } -static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } -#endif /* !TICK_ONESHOT */ /* Broadcasting support */ -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); extern void tick_install_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); @@ -95,7 +63,7 @@ extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadc extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); extern struct tick_device *tick_get_broadcast_device(void); extern struct cpumask *tick_get_broadcast_mask(void); -#else /* !BROADCAST */ +# else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */ static inline void tick_install_broadcast_device(struct clock_event_device *dev) { } static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; } static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; } @@ -113,7 +81,40 @@ static inline void tick_set_periodic_handler(struct clock_event_device *dev, int { dev->event_handler = tick_handle_periodic; } -#endif /* !BROADCAST */ +# endif /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */ + +#else /* !GENERIC_CLOCKEVENTS: */ +static inline void tick_suspend(void) { } +static inline void tick_resume(void) { } +#endif /* !GENERIC_CLOCKEVENTS */ + +/* Oneshot related functions */ +#ifdef CONFIG_TICK_ONESHOT +extern void tick_setup_oneshot(struct clock_event_device *newdev, + void (*handler)(struct clock_event_device *), + ktime_t nextevt); +extern int tick_program_event(ktime_t expires, int force); +extern void tick_oneshot_notify(void); +extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); +extern void tick_resume_oneshot(void); +static inline bool tick_oneshot_possible(void) { return true; } +extern int tick_oneshot_mode_active(void); +extern void tick_clock_notify(void); +extern int tick_check_oneshot_change(int allow_nohz); +extern int tick_init_highres(void); +#else /* !CONFIG_TICK_ONESHOT: */ +static inline +void tick_setup_oneshot(struct clock_event_device *newdev, + void (*handler)(struct clock_event_device *), + ktime_t nextevt) { BUG(); } +static inline void tick_resume_oneshot(void) { BUG(); } +static inline int tick_program_event(ktime_t expires, int force) { return 0; } +static inline void tick_oneshot_notify(void) { } +static inline bool tick_oneshot_possible(void) { return false; } +static inline int tick_oneshot_mode_active(void) { return 0; } +static inline void tick_clock_notify(void) { } +static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } +#endif /* !CONFIG_TICK_ONESHOT */ /* Functions related to oneshot broadcasting */ #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) @@ -125,7 +126,7 @@ extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); extern struct cpumask *tick_get_broadcast_oneshot_mask(void); -#else /* BROADCAST && ONESHOT */ +#else /* !(BROADCAST && ONESHOT): */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } static inline void tick_broadcast_switch_to_oneshot(void) { } @@ -133,7 +134,7 @@ static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline void tick_check_oneshot_broadcast_this_cpu(void) { } static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } -#endif /* !BROADCAST && ONESHOT */ +#endif /* !(BROADCAST && ONESHOT) */ /* NO_HZ_FULL internal */ #ifdef CONFIG_NO_HZ_FULL -- cgit v1.1 From 9eed56e889d8a0bb7870e1216d8d4326dd63ec50 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Apr 2015 11:26:23 +0200 Subject: clockevents: Clean up clockchips.h Do various cleanups on the clockchips.h file: - indent preprocessor blocks to make it more clear which block we are in, this also makes merge resolution easier - comment larger preprocessor blocks consistently, using the: #if FOO ... #else /* !FOO: */ ... #endif /* !FOO */ notation. - unbreak lines - etc. No change in functionality. Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 87 ++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 46 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 3ac7e2d..caeca76 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -22,17 +22,17 @@ enum clock_event_nofitiers { #ifdef CONFIG_GENERIC_CLOCKEVENTS -#include -#include -#include -#include +# include +# include +# include +# include struct clock_event_device; struct module; /* Clock event mode commands for legacy ->set_mode(): OBSOLETE */ enum clock_event_mode { - CLOCK_EVT_MODE_UNUSED = 0, + CLOCK_EVT_MODE_UNUSED, CLOCK_EVT_MODE_SHUTDOWN, CLOCK_EVT_MODE_PERIODIC, CLOCK_EVT_MODE_ONESHOT, @@ -51,7 +51,7 @@ enum clock_event_mode { * from DETACHED or SHUTDOWN. */ enum clock_event_state { - CLOCK_EVT_STATE_DETACHED = 0, + CLOCK_EVT_STATE_DETACHED, CLOCK_EVT_STATE_SHUTDOWN, CLOCK_EVT_STATE_PERIODIC, CLOCK_EVT_STATE_ONESHOT, @@ -60,28 +60,29 @@ enum clock_event_state { /* * Clock event features */ -#define CLOCK_EVT_FEAT_PERIODIC 0x000001 -#define CLOCK_EVT_FEAT_ONESHOT 0x000002 -#define CLOCK_EVT_FEAT_KTIME 0x000004 +# define CLOCK_EVT_FEAT_PERIODIC 0x000001 +# define CLOCK_EVT_FEAT_ONESHOT 0x000002 +# define CLOCK_EVT_FEAT_KTIME 0x000004 + /* - * x86(64) specific misfeatures: + * x86(64) specific (mis)features: * * - Clockevent source stops in C3 State and needs broadcast support. * - Local APIC timer is used as a dummy device. */ -#define CLOCK_EVT_FEAT_C3STOP 0x000008 -#define CLOCK_EVT_FEAT_DUMMY 0x000010 +# define CLOCK_EVT_FEAT_C3STOP 0x000008 +# define CLOCK_EVT_FEAT_DUMMY 0x000010 /* * Core shall set the interrupt affinity dynamically in broadcast mode */ -#define CLOCK_EVT_FEAT_DYNIRQ 0x000020 -#define CLOCK_EVT_FEAT_PERCPU 0x000040 +# define CLOCK_EVT_FEAT_DYNIRQ 0x000020 +# define CLOCK_EVT_FEAT_PERCPU 0x000040 /* * Clockevent device is based on a hrtimer for broadcast */ -#define CLOCK_EVT_FEAT_HRTIMER 0x000080 +# define CLOCK_EVT_FEAT_HRTIMER 0x000080 /** * struct clock_event_device - clock event device descriptor @@ -116,10 +117,8 @@ enum clock_event_state { */ struct clock_event_device { void (*event_handler)(struct clock_event_device *); - int (*set_next_event)(unsigned long evt, - struct clock_event_device *); - int (*set_next_ktime)(ktime_t expires, - struct clock_event_device *); + int (*set_next_event)(unsigned long evt, struct clock_event_device *); + int (*set_next_ktime)(ktime_t expires, struct clock_event_device *); ktime_t next_event; u64 max_delta_ns; u64 min_delta_ns; @@ -136,8 +135,7 @@ struct clock_event_device { * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. * - set_state_{shutdown|periodic|oneshot}(), tick_resume(). */ - void (*set_mode)(enum clock_event_mode mode, - struct clock_event_device *); + void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); int (*set_state_shutdown)(struct clock_event_device *); @@ -169,18 +167,18 @@ struct clock_event_device { * * factor = (clock_ticks << shift) / nanoseconds */ -static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec, - int shift) +static inline unsigned long +div_sc(unsigned long ticks, unsigned long nsec, int shift) { - uint64_t tmp = ((uint64_t)ticks) << shift; + u64 tmp = ((u64)ticks) << shift; do_div(tmp, nsec); + return (unsigned long) tmp; } /* Clock event layer functions */ -extern u64 clockevent_delta2ns(unsigned long latch, - struct clock_event_device *evt); +extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu); @@ -194,42 +192,39 @@ extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); static inline void clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) { - return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, - freq, minsec); + return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, minsec); } extern void clockevents_suspend(void); extern void clockevents_resume(void); -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST +# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +# ifdef CONFIG_ARCH_HAS_TICK_BROADCAST extern void tick_broadcast(const struct cpumask *mask); -#else -#define tick_broadcast NULL -#endif +# else +# define tick_broadcast NULL +# endif extern int tick_receive_broadcast(void); -#endif +# endif -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +# if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); -#else +# else static inline int tick_check_broadcast_expired(void) { return 0; } -static inline void tick_setup_hrtimer_broadcast(void) {}; -#endif +static inline void tick_setup_hrtimer_broadcast(void) { } +# endif extern int clockevents_notify(unsigned long reason, void *arg); -#else /* CONFIG_GENERIC_CLOCKEVENTS */ - -static inline void clockevents_suspend(void) {} -static inline void clockevents_resume(void) {} +#else /* !CONFIG_GENERIC_CLOCKEVENTS: */ +static inline void clockevents_suspend(void) { } +static inline void clockevents_resume(void) { } static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } static inline int tick_check_broadcast_expired(void) { return 0; } -static inline void tick_setup_hrtimer_broadcast(void) {}; -static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } +static inline void tick_setup_hrtimer_broadcast(void) { } -#endif +#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -#endif +#endif /* _LINUX_CLOCKCHIPS_H */ -- cgit v1.1 From 345527b1edce8df719e0884500c76832a18211c3 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Mon, 30 Mar 2015 14:59:19 +0530 Subject: clockevents: Fix cpu_down() race for hrtimer based broadcasting It was found when doing a hotplug stress test on POWER, that the machine either hit softlockups or rcu_sched stall warnings. The issue was traced to commit: 7cba160ad789 ("powernv/cpuidle: Redesign idle states management") which exposed the cpu_down() race with hrtimer based broadcast mode: 5d1638acb9f6 ("tick: Introduce hrtimer based broadcast") The race is the following: Assume CPU1 is the CPU which holds the hrtimer broadcasting duty before it is taken down. CPU0 CPU1 cpu_down() take_cpu_down() disable_interrupts() cpu_die() while (CPU1 != CPU_DEAD) { msleep(100); switch_to_idle(); stop_cpu_timer(); schedule_broadcast(); } tick_cleanup_cpu_dead() take_over_broadcast() So after CPU1 disabled interrupts it cannot handle the broadcast hrtimer anymore, so CPU0 will be stuck forever. Fix this by explicitly taking over broadcast duty before cpu_die(). This is a temporary workaround. What we really want is a callback in the clockevent device which allows us to do that from the dying CPU by pushing the hrtimer onto a different cpu. That might involve an IPI and is definitely more complex than this immediate fix. Changelog was picked up from: https://lkml.org/lkml/2015/2/16/213 Suggested-by: Thomas Gleixner Tested-by: Nicolas Pitre Signed-off-by: Preeti U. Murthy Cc: linuxppc-dev@lists.ozlabs.org Cc: mpe@ellerman.id.au Cc: nicolas.pitre@linaro.org Cc: peterz@infradead.org Cc: rjw@rjwysocki.net Fixes: http://linuxppc.10917.n7.nabble.com/offlining-cpus-breakage-td88619.html Link: http://lkml.kernel.org/r/20150330092410.24979.59887.stgit@preeti.in.ibm.com [ Merged it to the latest timer tree, renamed the callback, tidied up the changelog. ] Signed-off-by: Ingo Molnar --- include/linux/tick.h | 6 ++++++ kernel/cpu.c | 2 ++ kernel/time/tick-broadcast.c | 19 +++++++++++-------- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index 589868b..f9ff225 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -36,6 +36,12 @@ extern void tick_irq_enter(void); static inline void tick_irq_enter(void) { } #endif +#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern void hotplug_cpu__broadcast_tick_pull(int dead_cpu); +#else +static inline void hotplug_cpu__broadcast_tick_pull(int dead_cpu) { } +#endif + #ifdef CONFIG_NO_HZ_COMMON extern int tick_nohz_tick_stopped(void); extern void tick_nohz_idle_enter(void); diff --git a/kernel/cpu.c b/kernel/cpu.c index 1972b16..af5db20 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "smpboot.h" @@ -411,6 +412,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) while (!idle_cpu(cpu)) cpu_relax(); + hotplug_cpu__broadcast_tick_pull(cpu); /* This actually kills the CPU. */ __cpu_die(cpu); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 19cfb38..f5e0fd5 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -680,14 +680,19 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } -static void broadcast_move_bc(int deadcpu) +void hotplug_cpu__broadcast_tick_pull(int deadcpu) { - struct clock_event_device *bc = tick_broadcast_device.evtdev; + struct clock_event_device *bc; + unsigned long flags; - if (!bc || !broadcast_needs_cpu(bc, deadcpu)) - return; - /* This moves the broadcast assignment to this cpu */ - clockevents_program_event(bc, bc->next_event, 1); + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); + bc = tick_broadcast_device.evtdev; + + if (bc && broadcast_needs_cpu(bc, deadcpu)) { + /* This moves the broadcast assignment to this CPU: */ + clockevents_program_event(bc, bc->next_event, 1); + } + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* @@ -924,8 +929,6 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); cpumask_clear_cpu(cpu, tick_broadcast_force_mask); - broadcast_move_bc(cpu); - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -- cgit v1.1 From b337a9380f7effd60d082569dd7e0b97a7549730 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 31 Mar 2015 20:49:00 +0530 Subject: timer: Allocate per-cpu tvec_base's statically Memory for the 'tvec_base' array is allocated separately for the boot CPU (statically) and non-boot CPUs (dynamically). The reason is because __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've made NULL special, hint: lock_timer_base()) and we cannot get a compile time pointer to per-cpu entries because we don't know where we'll map the section, even for the boot cpu. This can be simplified a bit by statically allocating per-cpu memory. The only disadvantage is that memory for one of the structures will stay unused, i.e. for the boot CPU, which uses boot_tvec_bases. This will also guarantee that tvec_base is cacheline aligned. Even though tvec_base has ____cacheline_aligned stuck on, kzalloc_node() does not actually respect that (but guarantees a minimum u64 alignment). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/17cdf560f2727f687ab159707d0aa591f8a2f82d.1427814611.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timer.c | 48 +++++++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 2d3f5c5..f3cc653 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -90,8 +90,19 @@ struct tvec_base { struct tvec tv5; } ____cacheline_aligned; +/* + * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've + * made NULL special, hint: lock_timer_base()) and we cannot get a compile time + * pointer to per-cpu entries because we don't know where we'll map the section, + * even for the boot cpu. + * + * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the + * rest of them. + */ struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); +static DEFINE_PER_CPU(struct tvec_base, __tvec_bases); + static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; /* Functions below help us manage 'deferrable' flag */ @@ -1534,46 +1545,25 @@ EXPORT_SYMBOL(schedule_timeout_uninterruptible); static int init_timers_cpu(int cpu) { - int j; - struct tvec_base *base; + struct tvec_base *base = per_cpu(tvec_bases, cpu); static char tvec_base_done[NR_CPUS]; + int j; if (!tvec_base_done[cpu]) { - static char boot_done; + static char boot_cpu_skipped; - if (boot_done) { - /* - * The APs use this path later in boot - */ - base = kzalloc_node(sizeof(*base), GFP_KERNEL, - cpu_to_node(cpu)); - if (!base) - return -ENOMEM; - - /* Make sure tvec_base has TIMER_FLAG_MASK bits free */ - if (WARN_ON(base != tbase_get_base(base))) { - kfree(base); - return -ENOMEM; - } - per_cpu(tvec_bases, cpu) = base; + if (!boot_cpu_skipped) { + boot_cpu_skipped = 1; /* skip the boot cpu */ } else { - /* - * This is for the boot CPU - we use compile-time - * static initialisation because per-cpu memory isn't - * ready yet and because the memory allocators are not - * initialised either. - */ - boot_done = 1; - base = &boot_tvec_bases; + base = per_cpu_ptr(&__tvec_bases, cpu); + per_cpu(tvec_bases, cpu) = base; } + spin_lock_init(&base->lock); tvec_base_done[cpu] = 1; base->cpu = cpu; - } else { - base = per_cpu(tvec_bases, cpu); } - for (j = 0; j < TVN_SIZE; j++) { INIT_LIST_HEAD(base->tv5.vec + j); INIT_LIST_HEAD(base->tv4.vec + j); -- cgit v1.1 From 8def906044c02edcedac79aa3d6310ab4d90c4d8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 31 Mar 2015 20:49:01 +0530 Subject: timer: Don't initialize 'tvec_base' on hotplug There is no need to call init_timers_cpu() on every CPU hotplug event, there is not much we need to reset. - Timer-lists are already empty at the end of migrate_timers(). - timer_jiffies will be refreshed while adding a new timer, after the CPU is online again. - active_timers and all_timers can be reset from migrate_timers(). Signed-off-by: Viresh Kumar Signed-off-by: Peter Zijlstra (Intel) Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/54a1c30ea7b805af55beb220cadf5a07a21b0a4d.1427814611.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timer.c | 98 +++++++++++++++++++++++------------------------------ 1 file changed, 43 insertions(+), 55 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index f3cc653..1feb9c7 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1543,43 +1543,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) } EXPORT_SYMBOL(schedule_timeout_uninterruptible); -static int init_timers_cpu(int cpu) -{ - struct tvec_base *base = per_cpu(tvec_bases, cpu); - static char tvec_base_done[NR_CPUS]; - int j; - - if (!tvec_base_done[cpu]) { - static char boot_cpu_skipped; - - if (!boot_cpu_skipped) { - boot_cpu_skipped = 1; /* skip the boot cpu */ - } else { - base = per_cpu_ptr(&__tvec_bases, cpu); - per_cpu(tvec_bases, cpu) = base; - } - - spin_lock_init(&base->lock); - tvec_base_done[cpu] = 1; - base->cpu = cpu; - } - - for (j = 0; j < TVN_SIZE; j++) { - INIT_LIST_HEAD(base->tv5.vec + j); - INIT_LIST_HEAD(base->tv4.vec + j); - INIT_LIST_HEAD(base->tv3.vec + j); - INIT_LIST_HEAD(base->tv2.vec + j); - } - for (j = 0; j < TVR_SIZE; j++) - INIT_LIST_HEAD(base->tv1.vec + j); - - base->timer_jiffies = jiffies; - base->next_timer = base->timer_jiffies; - base->active_timers = 0; - base->all_timers = 0; - return 0; -} - #ifdef CONFIG_HOTPLUG_CPU static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) { @@ -1621,6 +1584,9 @@ static void migrate_timers(int cpu) migrate_timer_list(new_base, old_base->tv5.vec + i); } + old_base->active_timers = 0; + old_base->all_timers = 0; + spin_unlock(&old_base->lock); spin_unlock_irq(&new_base->lock); put_cpu_var(tvec_bases); @@ -1630,25 +1596,16 @@ static void migrate_timers(int cpu) static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - long cpu = (long)hcpu; - int err; - - switch(action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - err = init_timers_cpu(cpu); - if (err < 0) - return notifier_from_errno(err); - break; #ifdef CONFIG_HOTPLUG_CPU + switch (action) { case CPU_DEAD: case CPU_DEAD_FROZEN: - migrate_timers(cpu); + migrate_timers((long)hcpu); break; -#endif default: break; } +#endif return NOTIFY_OK; } @@ -1656,18 +1613,49 @@ static struct notifier_block timers_nb = { .notifier_call = timer_cpu_notify, }; +static void __init init_timer_cpu(struct tvec_base *base, int cpu) +{ + int j; -void __init init_timers(void) + base->cpu = cpu; + per_cpu(tvec_bases, cpu) = base; + spin_lock_init(&base->lock); + + for (j = 0; j < TVN_SIZE; j++) { + INIT_LIST_HEAD(base->tv5.vec + j); + INIT_LIST_HEAD(base->tv4.vec + j); + INIT_LIST_HEAD(base->tv3.vec + j); + INIT_LIST_HEAD(base->tv2.vec + j); + } + for (j = 0; j < TVR_SIZE; j++) + INIT_LIST_HEAD(base->tv1.vec + j); + + base->timer_jiffies = jiffies; + base->next_timer = base->timer_jiffies; +} + +static void __init init_timer_cpus(void) { - int err; + struct tvec_base *base; + int local_cpu = smp_processor_id(); + int cpu; + + for_each_possible_cpu(cpu) { + if (cpu == local_cpu) + base = &boot_tvec_bases; + else + base = per_cpu_ptr(&__tvec_bases, cpu); + + init_timer_cpu(base, cpu); + } +} +void __init init_timers(void) +{ /* ensure there are enough low bits for flags in timer->base pointer */ BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK); - err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - BUG_ON(err != NOTIFY_OK); - + init_timer_cpus(); init_timer_stats(); register_cpu_notifier(&timers_nb); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); -- cgit v1.1 From 3650b57fdf208bc0e36cbe7b5e0744bd0e0cf34d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 31 Mar 2015 20:49:02 +0530 Subject: timer: Further simplify the SMP and HOTPLUG logic Remove one CONFIG_HOTPLUG_CPU #ifdef in trade for introducing one CONFIG_SMP #ifdef. The CONFIG_SMP ifdef avoids declaring the per-CPU __tvec_bases storage on UP systems since they already have boot_tvec_bases. Also (re)add a runtime check on the base alignment -- for the paranoid amongst us :-) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/fdd2d35e169bdc554ffa3fe77f77716298c75ada.1427814611.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timer.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 1feb9c7..2ece3aa 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -101,7 +101,6 @@ struct tvec_base { */ struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); -static DEFINE_PER_CPU(struct tvec_base, __tvec_bases); static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; @@ -1038,6 +1037,8 @@ int try_to_del_timer_sync(struct timer_list *timer) EXPORT_SYMBOL(try_to_del_timer_sync); #ifdef CONFIG_SMP +static DEFINE_PER_CPU(struct tvec_base, __tvec_bases); + /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated @@ -1591,12 +1592,10 @@ static void migrate_timers(int cpu) spin_unlock_irq(&new_base->lock); put_cpu_var(tvec_bases); } -#endif /* CONFIG_HOTPLUG_CPU */ static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { -#ifdef CONFIG_HOTPLUG_CPU switch (action) { case CPU_DEAD: case CPU_DEAD_FROZEN: @@ -1605,18 +1604,24 @@ static int timer_cpu_notify(struct notifier_block *self, default: break; } -#endif + return NOTIFY_OK; } -static struct notifier_block timers_nb = { - .notifier_call = timer_cpu_notify, -}; +static inline void timer_register_cpu_notifier(void) +{ + cpu_notifier(timer_cpu_notify, 0); +} +#else +static inline void timer_register_cpu_notifier(void) { } +#endif /* CONFIG_HOTPLUG_CPU */ static void __init init_timer_cpu(struct tvec_base *base, int cpu) { int j; + BUG_ON(base != tbase_get_base(base)); + base->cpu = cpu; per_cpu(tvec_bases, cpu) = base; spin_lock_init(&base->lock); @@ -1643,8 +1648,10 @@ static void __init init_timer_cpus(void) for_each_possible_cpu(cpu) { if (cpu == local_cpu) base = &boot_tvec_bases; +#ifdef CONFIG_SMP else base = per_cpu_ptr(&__tvec_bases, cpu); +#endif init_timer_cpu(base, cpu); } @@ -1657,7 +1664,7 @@ void __init init_timers(void) init_timer_cpus(); init_timer_stats(); - register_cpu_notifier(&timers_nb); + timer_register_cpu_notifier(); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } -- cgit v1.1 From 9a806ddbb9a18c510e4acdcc828b9a87f5fd3aef Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:21 -0700 Subject: time: Add y2038 safe read_boot_clock64() As part of addressing in-kernel y2038 issues, this patch adds read_boot_clock64() and replaces all the call sites of read_boot_clock() with this function. This is a __weak implementation, which simply calls the existing y2038 unsafe read_boot_clock(). This allows architecture specific implementations to be converted independently, and eventually the y2038 unsafe read_boot_clock() can be removed after all its architecture specific implementations have been converted to read_boot_clock64(). Suggested-by: Arnd Bergmann Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/timekeeping.h | 1 + kernel/time/timekeeping.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 5047b83..18d27a3 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -269,6 +269,7 @@ static inline bool has_persistent_clock(void) extern void read_persistent_clock(struct timespec *ts); extern void read_boot_clock(struct timespec *ts); +extern void read_boot_clock64(struct timespec64 *ts); extern int update_persistent_clock(struct timespec now); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 5b12292..652e50a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1188,6 +1188,14 @@ void __weak read_boot_clock(struct timespec *ts) ts->tv_nsec = 0; } +void __weak read_boot_clock64(struct timespec64 *ts64) +{ + struct timespec ts; + + read_boot_clock(&ts); + *ts64 = timespec_to_timespec64(ts); +} + /* * timekeeping_init - Initializes the clocksource and common timekeeping values */ @@ -1209,8 +1217,7 @@ void __init timekeeping_init(void) } else if (now.tv_sec || now.tv_nsec) persistent_clock_exist = true; - read_boot_clock(&ts); - boot = timespec_to_timespec64(ts); + read_boot_clock64(&boot); if (!timespec64_valid_strict(&boot)) { pr_warn("WARNING: Boot clock returned invalid value!\n" " Check your CMOS/BIOS settings.\n"); -- cgit v1.1 From 2ee966320028ac846654eba5344540eeb4dc228d Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:22 -0700 Subject: time: Add y2038 safe read_persistent_clock64() As part of addressing in-kernel y2038 issues, this patch adds read_persistent_clock64() and replaces all the call sites of read_persistent_clock() with this function. This is a __weak implementation, which simply calls the existing y2038 unsafe read_persistent_clock(). This allows architecture specific implementations to be converted independently, and eventually the y2038 unsafe read_persistent_clock() can be removed after all its architecture specific implementations have been converted to read_persistent_clock64(). Suggested-by: Arnd Bergmann Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- arch/mips/lasat/sysctl.c | 4 ++-- include/linux/timekeeping.h | 1 + kernel/time/timekeeping.c | 22 ++++++++++++---------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index 3b7f65c..cf9b4633 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c @@ -75,11 +75,11 @@ static int rtctmp; int proc_dolasatrtc(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct timespec ts; + struct timespec64 ts; int r; if (!write) { - read_persistent_clock(&ts); + read_persistent_clock64(&ts); rtctmp = ts.tv_sec; /* check for time < 0 and set to 0 */ if (rtctmp < 0) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 18d27a3..4c0f76f4 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -268,6 +268,7 @@ static inline bool has_persistent_clock(void) } extern void read_persistent_clock(struct timespec *ts); +extern void read_persistent_clock64(struct timespec64 *ts); extern void read_boot_clock(struct timespec *ts); extern void read_boot_clock64(struct timespec64 *ts); extern int update_persistent_clock(struct timespec now); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 652e50a..b1dbfa5 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1173,6 +1173,14 @@ void __weak read_persistent_clock(struct timespec *ts) ts->tv_nsec = 0; } +void __weak read_persistent_clock64(struct timespec64 *ts64) +{ + struct timespec ts; + + read_persistent_clock(&ts); + *ts64 = timespec_to_timespec64(ts); +} + /** * read_boot_clock - Return time of the system start. * @@ -1205,10 +1213,8 @@ void __init timekeeping_init(void) struct clocksource *clock; unsigned long flags; struct timespec64 now, boot, tmp; - struct timespec ts; - read_persistent_clock(&ts); - now = timespec_to_timespec64(ts); + read_persistent_clock64(&now); if (!timespec64_valid_strict(&now)) { pr_warn("WARNING: Persistent clock returned invalid value!\n" " Check your CMOS/BIOS settings.\n"); @@ -1278,7 +1284,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values * @delta: pointer to a timespec64 delta value * - * This hook is for architectures that cannot support read_persistent_clock + * This hook is for architectures that cannot support read_persistent_clock64 * because their RTC/persistent clock is only accessible when irqs are enabled. * * This function should only be called by rtc_resume(), and allows @@ -1325,12 +1331,10 @@ void timekeeping_resume(void) struct clocksource *clock = tk->tkr_mono.clock; unsigned long flags; struct timespec64 ts_new, ts_delta; - struct timespec tmp; cycle_t cycle_now, cycle_delta; bool suspendtime_found = false; - read_persistent_clock(&tmp); - ts_new = timespec_to_timespec64(tmp); + read_persistent_clock64(&ts_new); clockevents_resume(); clocksource_resume(); @@ -1406,10 +1410,8 @@ int timekeeping_suspend(void) unsigned long flags; struct timespec64 delta, delta_delta; static struct timespec64 old_delta; - struct timespec tmp; - read_persistent_clock(&tmp); - timekeeping_suspend_time = timespec_to_timespec64(tmp); + read_persistent_clock64(&timekeeping_suspend_time); /* * On some systems the persistent_clock can not be detected at -- cgit v1.1 From 3c00a1fe8496ff29ab62764bb3f4ce4b48089004 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:23 -0700 Subject: time: Add y2038 safe update_persistent_clock64() As part of addressing in-kernel y2038 issues, this patch adds update_persistent_clock64() and replaces all the call sites of update_persistent_clock() with this function. This is a __weak implementation, which simply calls the existing y2038 unsafe update_persistent_clock(). This allows architecture specific implementations to be converted independently, and eventually y2038-unsafe update_persistent_clock() can be removed after all its architecture specific implementations have been converted to update_persistent_clock64(). Suggested-by: Arnd Bergmann Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/rtc/systohc.c | 2 +- include/linux/timekeeping.h | 1 + kernel/time/ntp.c | 13 ++++++++++++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c index eb71872..ef3c07a 100644 --- a/drivers/rtc/systohc.c +++ b/drivers/rtc/systohc.c @@ -11,7 +11,7 @@ * rtc_set_ntp_time - Save NTP synchronized time to the RTC * @now: Current time of day * - * Replacement for the NTP platform function update_persistent_clock + * Replacement for the NTP platform function update_persistent_clock64 * that stores time for later retrieval by rtc_hctosys. * * Returns 0 on successful RTC update, -ENODEV if a RTC update is not diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 4c0f76f4..7a2369d 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -272,6 +272,7 @@ extern void read_persistent_clock64(struct timespec64 *ts); extern void read_boot_clock(struct timespec *ts); extern void read_boot_clock64(struct timespec64 *ts); extern int update_persistent_clock(struct timespec now); +extern int update_persistent_clock64(struct timespec64 now); #endif diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 9ad60d0..7a68100 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -458,6 +458,16 @@ out: return leap; } +#ifdef CONFIG_GENERIC_CMOS_UPDATE +int __weak update_persistent_clock64(struct timespec64 now64) +{ + struct timespec now; + + now = timespec64_to_timespec(now64); + return update_persistent_clock(now); +} +#endif + #if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) static void sync_cmos_clock(struct work_struct *work); @@ -493,8 +503,9 @@ static void sync_cmos_clock(struct work_struct *work) if (persistent_clock_is_local) adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); #ifdef CONFIG_GENERIC_CMOS_UPDATE - fail = update_persistent_clock(timespec64_to_timespec(adjust)); + fail = update_persistent_clock64(adjust); #endif + #ifdef CONFIG_RTC_SYSTOHC if (fail == -ENODEV) fail = rtc_set_ntp_time(adjust); -- cgit v1.1 From a451570c008b9e19592e29f15cfd295bdf818c7a Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:24 -0700 Subject: ARM: OMAP: 32k counter: Provide y2038-safe omap_read_persistent_clock() replacement As part of addressing "y2038 problem" for in-kernel uses, this patch adds the y2038-safe omap_read_persistent_clock64() using timespec64. Because we rely on some subsequent changes to convert arm multiarch support, omap_read_persistent_clock() will be removed then. Also remove the needless spinlock, because read_persistent_clock() doesn't run simultaneously. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Acked-by: Tony Lindgren Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-5-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- arch/arm/plat-omap/counter_32k.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 43cf745..b7b7b07 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -44,24 +44,20 @@ static u64 notrace omap_32k_read_sched_clock(void) } /** - * omap_read_persistent_clock - Return time from a persistent clock. + * omap_read_persistent_clock64 - Return time from a persistent clock. * * Reads the time from a source which isn't disabled during PM, the * 32k sync timer. Convert the cycles elapsed since last read into - * nsecs and adds to a monotonically increasing timespec. + * nsecs and adds to a monotonically increasing timespec64. */ -static struct timespec persistent_ts; +static struct timespec64 persistent_ts; static cycles_t cycles; static unsigned int persistent_mult, persistent_shift; -static DEFINE_SPINLOCK(read_persistent_clock_lock); -static void omap_read_persistent_clock(struct timespec *ts) +static void omap_read_persistent_clock64(struct timespec64 *ts) { unsigned long long nsecs; cycles_t last_cycles; - unsigned long flags; - - spin_lock_irqsave(&read_persistent_clock_lock, flags); last_cycles = cycles; cycles = sync32k_cnt_reg ? readl_relaxed(sync32k_cnt_reg) : 0; @@ -69,11 +65,17 @@ static void omap_read_persistent_clock(struct timespec *ts) nsecs = clocksource_cyc2ns(cycles - last_cycles, persistent_mult, persistent_shift); - timespec_add_ns(&persistent_ts, nsecs); + timespec64_add_ns(&persistent_ts, nsecs); *ts = persistent_ts; +} + +static void omap_read_persistent_clock(struct timespec *ts) +{ + struct timespec64 ts64; - spin_unlock_irqrestore(&read_persistent_clock_lock, flags); + omap_read_persistent_clock64(&ts64); + *ts = timespec64_to_timespec(ts64); } /** -- cgit v1.1 From a0c2998f918e7e597d3c686c5f3d5a30d0382dd6 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:25 -0700 Subject: clocksource/drivers/tegra: Provide y2038-safe tegra_read_persistent_clock() replacement As part of addressing "y2038 problem" for in-kernel uses, this patch adds the y2038-safe tegra_read_persistent_clock64() using timespec64. Because we rely on some subsequent changes to convert arm multiarch support, tegra_read_persistent_clock() will be removed then. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Acked-by: Thierry Reding Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-6-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/tegra20_timer.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c index d8a3a4e..4a0a603 100644 --- a/drivers/clocksource/tegra20_timer.c +++ b/drivers/clocksource/tegra20_timer.c @@ -51,7 +51,7 @@ static void __iomem *timer_reg_base; static void __iomem *rtc_base; -static struct timespec persistent_ts; +static struct timespec64 persistent_ts; static u64 persistent_ms, last_persistent_ms; static struct delay_timer tegra_delay_timer; @@ -120,26 +120,33 @@ static u64 tegra_rtc_read_ms(void) } /* - * tegra_read_persistent_clock - Return time from a persistent clock. + * tegra_read_persistent_clock64 - Return time from a persistent clock. * * Reads the time from a source which isn't disabled during PM, the * 32k sync timer. Convert the cycles elapsed since last read into - * nsecs and adds to a monotonically increasing timespec. + * nsecs and adds to a monotonically increasing timespec64. * Care must be taken that this funciton is not called while the * tegra_rtc driver could be executing to avoid race conditions * on the RTC shadow register */ -static void tegra_read_persistent_clock(struct timespec *ts) +static void tegra_read_persistent_clock64(struct timespec64 *ts) { u64 delta; - struct timespec *tsp = &persistent_ts; last_persistent_ms = persistent_ms; persistent_ms = tegra_rtc_read_ms(); delta = persistent_ms - last_persistent_ms; - timespec_add_ns(tsp, delta * NSEC_PER_MSEC); - *ts = *tsp; + timespec64_add_ns(&persistent_ts, delta * NSEC_PER_MSEC); + *ts = persistent_ts; +} + +static void tegra_read_persistent_clock(struct timespec *ts) +{ + struct timespec ts64; + + tegra_read_persistent_clock64(&ts64); + *ts = timespec64_to_timespec(ts64); } static unsigned long tegra_delay_timer_read_counter_long(void) -- cgit v1.1 From cb850717b076d979058d52529e15f1736359d811 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:26 -0700 Subject: ARM, clocksource/drivers: Provide read_boot_clock64() and read_persistent_clock64() and use them As part of addressing "y2038 problem" for in-kernel uses, this patch converts read_boot_clock() to read_boot_clock64() and read_persistent_clock() to read_persistent_clock64() using timespec64 by converting clock_access_fn to use timespec64. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Acked-by: Thierry Reding (for tegra part) Cc: Russell King Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-7-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/mach/time.h | 3 +-- arch/arm/kernel/time.c | 6 +++--- arch/arm/plat-omap/counter_32k.c | 10 +--------- drivers/clocksource/tegra20_timer.c | 10 +--------- 4 files changed, 6 insertions(+), 23 deletions(-) diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h index 90c12e1..0f79e4d 100644 --- a/arch/arm/include/asm/mach/time.h +++ b/arch/arm/include/asm/mach/time.h @@ -12,8 +12,7 @@ extern void timer_tick(void); -struct timespec; -typedef void (*clock_access_fn)(struct timespec *); +typedef void (*clock_access_fn)(struct timespec64 *); extern int register_persistent_clock(clock_access_fn read_boot, clock_access_fn read_persistent); diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 0cc7e58..a66e37e 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -76,7 +76,7 @@ void timer_tick(void) } #endif -static void dummy_clock_access(struct timespec *ts) +static void dummy_clock_access(struct timespec64 *ts) { ts->tv_sec = 0; ts->tv_nsec = 0; @@ -85,12 +85,12 @@ static void dummy_clock_access(struct timespec *ts) static clock_access_fn __read_persistent_clock = dummy_clock_access; static clock_access_fn __read_boot_clock = dummy_clock_access;; -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { __read_persistent_clock(ts); } -void read_boot_clock(struct timespec *ts) +void read_boot_clock64(struct timespec64 *ts) { __read_boot_clock(ts); } diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index b7b7b07..2438b96 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -70,14 +70,6 @@ static void omap_read_persistent_clock64(struct timespec64 *ts) *ts = persistent_ts; } -static void omap_read_persistent_clock(struct timespec *ts) -{ - struct timespec64 ts64; - - omap_read_persistent_clock64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - /** * omap_init_clocksource_32k - setup and register counter 32k as a * kernel clocksource @@ -118,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase) } sched_clock_register(omap_32k_read_sched_clock, 32, 32768); - register_persistent_clock(NULL, omap_read_persistent_clock); + register_persistent_clock(NULL, omap_read_persistent_clock64); pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n"); return 0; diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c index 4a0a603..5a112d7 100644 --- a/drivers/clocksource/tegra20_timer.c +++ b/drivers/clocksource/tegra20_timer.c @@ -141,14 +141,6 @@ static void tegra_read_persistent_clock64(struct timespec64 *ts) *ts = persistent_ts; } -static void tegra_read_persistent_clock(struct timespec *ts) -{ - struct timespec ts64; - - tegra_read_persistent_clock64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - static unsigned long tegra_delay_timer_read_counter_long(void) { return readl(timer_reg_base + TIMERUS_CNTR_1US); @@ -259,7 +251,7 @@ static void __init tegra20_init_rtc(struct device_node *np) else clk_prepare_enable(clk); - register_persistent_clock(NULL, tegra_read_persistent_clock); + register_persistent_clock(NULL, tegra_read_persistent_clock64); } CLOCKSOURCE_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc); -- cgit v1.1 From 8e4ff1a81aa91d12856287c7103d0301ac91351a Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:27 -0700 Subject: drivers/rtc: Provide y2038 safe rtc_class_ops.set_mmss() replacement Currently the rtc_class_op's set_mmss() function takes a 32-bit second value (on 32-bit systems), which is problematic for dates past y2038. This patch provides a safe version named set_mmss64() using y2038 safe time64_t. After this patch, set_mmss() is deprecated and all its users will be fixed to use set_mmss64(), it can be removed when having no users. Signed-off-by: Xunlei Pang [jstultz: Add whitespace fix for checkpatch] Signed-off-by: John Stultz Acked-by: Alessandro Zummo Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-8-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/rtc/interface.c | 8 +++++++- drivers/rtc/systohc.c | 5 ++++- include/linux/rtc.h | 1 + 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 37215cf..d43ee40 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -72,7 +72,11 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm) err = -ENODEV; else if (rtc->ops->set_time) err = rtc->ops->set_time(rtc->dev.parent, tm); - else if (rtc->ops->set_mmss) { + else if (rtc->ops->set_mmss64) { + time64_t secs64 = rtc_tm_to_time64(tm); + + err = rtc->ops->set_mmss64(rtc->dev.parent, secs64); + } else if (rtc->ops->set_mmss) { time64_t secs64 = rtc_tm_to_time64(tm); err = rtc->ops->set_mmss(rtc->dev.parent, secs64); } else @@ -96,6 +100,8 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs) if (!rtc->ops) err = -ENODEV; + else if (rtc->ops->set_mmss64) + err = rtc->ops->set_mmss64(rtc->dev.parent, secs); else if (rtc->ops->set_mmss) err = rtc->ops->set_mmss(rtc->dev.parent, secs); else if (rtc->ops->read_time && rtc->ops->set_time) { diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c index ef3c07a..7728d5e 100644 --- a/drivers/rtc/systohc.c +++ b/drivers/rtc/systohc.c @@ -35,7 +35,10 @@ int rtc_set_ntp_time(struct timespec64 now) if (rtc) { /* rtc_hctosys exclusively uses UTC, so we call set_time here, * not set_mmss. */ - if (rtc->ops && (rtc->ops->set_time || rtc->ops->set_mmss)) + if (rtc->ops && + (rtc->ops->set_time || + rtc->ops->set_mmss64 || + rtc->ops->set_mmss)) err = rtc_set_time(rtc, &tm); rtc_class_close(rtc); } diff --git a/include/linux/rtc.h b/include/linux/rtc.h index dcad7ee..8dcf682 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -77,6 +77,7 @@ struct rtc_class_ops { int (*read_alarm)(struct device *, struct rtc_wkalrm *); int (*set_alarm)(struct device *, struct rtc_wkalrm *); int (*proc)(struct device *, struct seq_file *); + int (*set_mmss64)(struct device *, time64_t secs); int (*set_mmss)(struct device *, unsigned long secs); int (*read_callback)(struct device *, int data); int (*alarm_irq_enable)(struct device *, unsigned int enabled); -- cgit v1.1 From 4d644ab84c6ed66f7a628c74d83c34d85bec13bf Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:28 -0700 Subject: drivers/rtc/test: Update driver to address y2038/y2106 issues This driver has a number of y2038/y2106 issues. This patch resolves them by: - Replacing get_seconds() with ktime_get_real_seconds() - Replacing rtc_time_to_tm() with rtc_time64_to_tm() Also add test_rtc_set_mmss64() for testing rtc_class_ops's set_mmss64(), which can be activated by "test_mmss64" module parameter. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Acked-by: Alessandro Zummo Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-9-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/rtc/rtc-test.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index 8f86fa9..3a2da4c 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -13,6 +13,10 @@ #include #include +static int test_mmss64; +module_param(test_mmss64, int, 0644); +MODULE_PARM_DESC(test_mmss64, "Test struct rtc_class_ops.set_mmss64()."); + static struct platform_device *test0 = NULL, *test1 = NULL; static int test_rtc_read_alarm(struct device *dev, @@ -30,7 +34,13 @@ static int test_rtc_set_alarm(struct device *dev, static int test_rtc_read_time(struct device *dev, struct rtc_time *tm) { - rtc_time_to_tm(get_seconds(), tm); + rtc_time64_to_tm(ktime_get_real_seconds(), tm); + return 0; +} + +static int test_rtc_set_mmss64(struct device *dev, time64_t secs) +{ + dev_info(dev, "%s, secs = %lld\n", __func__, (long long)secs); return 0; } @@ -55,7 +65,7 @@ static int test_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) return 0; } -static const struct rtc_class_ops test_rtc_ops = { +static struct rtc_class_ops test_rtc_ops = { .proc = test_rtc_proc, .read_time = test_rtc_read_time, .read_alarm = test_rtc_read_alarm, @@ -101,6 +111,11 @@ static int test_probe(struct platform_device *plat_dev) int err; struct rtc_device *rtc; + if (test_mmss64) { + test_rtc_ops.set_mmss64 = test_rtc_set_mmss64; + test_rtc_ops.set_mmss = NULL; + } + rtc = devm_rtc_device_register(&plat_dev->dev, "test", &test_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { -- cgit v1.1 From 5c7e11bc66647f2e4bc95de9b4302fff6d612f3a Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:29 -0700 Subject: drivers/rtc/ab3100: Update driver to address y2038/y2106 issues This driver has a number of y2038/y2106 issues. This patch resolves them by: - Replacing rtc_tm_to_time() with rtc_tm_to_time64() - Replacing rtc_time_to_tm() with rtc_time64_to_tm() - Changing ab3100_rtc_set_mmss() to use rtc_class_ops's set_mmss64() After this patch, the driver should not have any remaining y2038/y2106 issues. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Acked-by: Alessandro Zummo Acked-by: Linus Walleij Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-10-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/rtc/rtc-ab3100.c | 55 ++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/drivers/rtc/rtc-ab3100.c b/drivers/rtc/rtc-ab3100.c index 1d0340f..9b725c5 100644 --- a/drivers/rtc/rtc-ab3100.c +++ b/drivers/rtc/rtc-ab3100.c @@ -43,21 +43,21 @@ /* * RTC clock functions and device struct declaration */ -static int ab3100_rtc_set_mmss(struct device *dev, unsigned long secs) +static int ab3100_rtc_set_mmss(struct device *dev, time64_t secs) { u8 regs[] = {AB3100_TI0, AB3100_TI1, AB3100_TI2, AB3100_TI3, AB3100_TI4, AB3100_TI5}; unsigned char buf[6]; - u64 fat_time = (u64) secs * AB3100_RTC_CLOCK_RATE * 2; + u64 hw_counter = secs * AB3100_RTC_CLOCK_RATE * 2; int err = 0; int i; - buf[0] = (fat_time) & 0xFF; - buf[1] = (fat_time >> 8) & 0xFF; - buf[2] = (fat_time >> 16) & 0xFF; - buf[3] = (fat_time >> 24) & 0xFF; - buf[4] = (fat_time >> 32) & 0xFF; - buf[5] = (fat_time >> 40) & 0xFF; + buf[0] = (hw_counter) & 0xFF; + buf[1] = (hw_counter >> 8) & 0xFF; + buf[2] = (hw_counter >> 16) & 0xFF; + buf[3] = (hw_counter >> 24) & 0xFF; + buf[4] = (hw_counter >> 32) & 0xFF; + buf[5] = (hw_counter >> 40) & 0xFF; for (i = 0; i < 6; i++) { err = abx500_set_register_interruptible(dev, 0, @@ -75,7 +75,7 @@ static int ab3100_rtc_set_mmss(struct device *dev, unsigned long secs) static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm) { - unsigned long time; + time64_t time; u8 rtcval; int err; @@ -88,7 +88,7 @@ static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm) dev_info(dev, "clock not set (lost power)"); return -EINVAL; } else { - u64 fat_time; + u64 hw_counter; u8 buf[6]; /* Read out time registers */ @@ -98,22 +98,21 @@ static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm) if (err != 0) return err; - fat_time = ((u64) buf[5] << 40) | ((u64) buf[4] << 32) | + hw_counter = ((u64) buf[5] << 40) | ((u64) buf[4] << 32) | ((u64) buf[3] << 24) | ((u64) buf[2] << 16) | ((u64) buf[1] << 8) | (u64) buf[0]; - time = (unsigned long) (fat_time / - (u64) (AB3100_RTC_CLOCK_RATE * 2)); + time = hw_counter / (u64) (AB3100_RTC_CLOCK_RATE * 2); } - rtc_time_to_tm(time, tm); + rtc_time64_to_tm(time, tm); return rtc_valid_tm(tm); } static int ab3100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) { - unsigned long time; - u64 fat_time; + time64_t time; + u64 hw_counter; u8 buf[6]; u8 rtcval; int err; @@ -134,11 +133,11 @@ static int ab3100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) AB3100_AL0, buf, 4); if (err) return err; - fat_time = ((u64) buf[3] << 40) | ((u64) buf[2] << 32) | + hw_counter = ((u64) buf[3] << 40) | ((u64) buf[2] << 32) | ((u64) buf[1] << 24) | ((u64) buf[0] << 16); - time = (unsigned long) (fat_time / (u64) (AB3100_RTC_CLOCK_RATE * 2)); + time = hw_counter / (u64) (AB3100_RTC_CLOCK_RATE * 2); - rtc_time_to_tm(time, &alarm->time); + rtc_time64_to_tm(time, &alarm->time); return rtc_valid_tm(&alarm->time); } @@ -147,17 +146,17 @@ static int ab3100_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) { u8 regs[] = {AB3100_AL0, AB3100_AL1, AB3100_AL2, AB3100_AL3}; unsigned char buf[4]; - unsigned long secs; - u64 fat_time; + time64_t secs; + u64 hw_counter; int err; int i; - rtc_tm_to_time(&alarm->time, &secs); - fat_time = (u64) secs * AB3100_RTC_CLOCK_RATE * 2; - buf[0] = (fat_time >> 16) & 0xFF; - buf[1] = (fat_time >> 24) & 0xFF; - buf[2] = (fat_time >> 32) & 0xFF; - buf[3] = (fat_time >> 40) & 0xFF; + secs = rtc_tm_to_time64(&alarm->time); + hw_counter = secs * AB3100_RTC_CLOCK_RATE * 2; + buf[0] = (hw_counter >> 16) & 0xFF; + buf[1] = (hw_counter >> 24) & 0xFF; + buf[2] = (hw_counter >> 32) & 0xFF; + buf[3] = (hw_counter >> 40) & 0xFF; /* Set the alarm */ for (i = 0; i < 4; i++) { @@ -193,7 +192,7 @@ static int ab3100_rtc_irq_enable(struct device *dev, unsigned int enabled) static const struct rtc_class_ops ab3100_rtc_ops = { .read_time = ab3100_rtc_read_time, - .set_mmss = ab3100_rtc_set_mmss, + .set_mmss64 = ab3100_rtc_set_mmss, .read_alarm = ab3100_rtc_read_alarm, .set_alarm = ab3100_rtc_set_alarm, .alarm_irq_enable = ab3100_rtc_irq_enable, -- cgit v1.1 From 0307b0d77a0830b0fd4a22b5db4a9fa723a5fa5f Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:30 -0700 Subject: drivers/rtc/mc13xxx: Update driver to address y2038/y2106 issues This driver has a number of y2038/y2106 issues. This patch resolves them by: - Replacing rtc_time_to_tm() with rtc_time64_to_tm() - Changing mc13xxx_rtc_set_mmss() to use rtc_class_ops's set_mmss64() After this patch, the driver should not have any remaining y2038/y2106 issues. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Acked-by: Alessandro Zummo Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-11-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/rtc/rtc-mc13xxx.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c index 5bce904..32df1d8 100644 --- a/drivers/rtc/rtc-mc13xxx.c +++ b/drivers/rtc/rtc-mc13xxx.c @@ -83,20 +83,19 @@ static int mc13xxx_rtc_read_time(struct device *dev, struct rtc_time *tm) return ret; } while (days1 != days2); - rtc_time_to_tm(days1 * SEC_PER_DAY + seconds, tm); + rtc_time64_to_tm((time64_t)days1 * SEC_PER_DAY + seconds, tm); return rtc_valid_tm(tm); } -static int mc13xxx_rtc_set_mmss(struct device *dev, unsigned long secs) +static int mc13xxx_rtc_set_mmss(struct device *dev, time64_t secs) { struct mc13xxx_rtc *priv = dev_get_drvdata(dev); unsigned int seconds, days; unsigned int alarmseconds; int ret; - seconds = secs % SEC_PER_DAY; - days = secs / SEC_PER_DAY; + days = div_s64_rem(secs, SEC_PER_DAY, &seconds); mc13xxx_lock(priv->mc13xxx); @@ -159,7 +158,7 @@ static int mc13xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) { struct mc13xxx_rtc *priv = dev_get_drvdata(dev); unsigned seconds, days; - unsigned long s1970; + time64_t s1970; int enabled, pending; int ret; @@ -189,10 +188,10 @@ out: alarm->enabled = enabled; alarm->pending = pending; - s1970 = days * SEC_PER_DAY + seconds; + s1970 = (time64_t)days * SEC_PER_DAY + seconds; - rtc_time_to_tm(s1970, &alarm->time); - dev_dbg(dev, "%s: %lu\n", __func__, s1970); + rtc_time64_to_tm(s1970, &alarm->time); + dev_dbg(dev, "%s: %lld\n", __func__, (long long)s1970); return 0; } @@ -200,8 +199,8 @@ out: static int mc13xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) { struct mc13xxx_rtc *priv = dev_get_drvdata(dev); - unsigned long s1970; - unsigned seconds, days; + time64_t s1970; + u32 seconds, days; int ret; mc13xxx_lock(priv->mc13xxx); @@ -215,20 +214,17 @@ static int mc13xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) if (unlikely(ret)) goto out; - ret = rtc_tm_to_time(&alarm->time, &s1970); - if (unlikely(ret)) - goto out; + s1970 = rtc_tm_to_time64(&alarm->time); - dev_dbg(dev, "%s: o%2.s %lu\n", __func__, alarm->enabled ? "n" : "ff", - s1970); + dev_dbg(dev, "%s: o%2.s %lld\n", __func__, alarm->enabled ? "n" : "ff", + (long long)s1970); ret = mc13xxx_rtc_irq_enable_unlocked(dev, alarm->enabled, MC13XXX_IRQ_TODA); if (unlikely(ret)) goto out; - seconds = s1970 % SEC_PER_DAY; - days = s1970 / SEC_PER_DAY; + days = div_s64_rem(s1970, SEC_PER_DAY, &seconds); ret = mc13xxx_reg_write(priv->mc13xxx, MC13XXX_RTCDAYA, days); if (unlikely(ret)) @@ -268,7 +264,7 @@ static irqreturn_t mc13xxx_rtc_update_handler(int irq, void *dev) static const struct rtc_class_ops mc13xxx_rtc_ops = { .read_time = mc13xxx_rtc_read_time, - .set_mmss = mc13xxx_rtc_set_mmss, + .set_mmss64 = mc13xxx_rtc_set_mmss, .read_alarm = mc13xxx_rtc_read_alarm, .set_alarm = mc13xxx_rtc_set_alarm, .alarm_irq_enable = mc13xxx_rtc_alarm_irq_enable, -- cgit v1.1 From 482494a8d395877c4776a3d76f89342d7ad7c4c6 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:31 -0700 Subject: drivers/rtc/mxc: Modify rtc_update_alarm() not to touch the alarm time rtc_class_ops's set_alarm() shouldn't deal with the alarm date, as this is handled in the rtc core. See rtc_dev_ioctl()'s RTC_ALM_SET and RTC_WKALM_SET cases. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Acked-by: Alessandro Zummo Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-12-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/rtc/rtc-mxc.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index 3c3f8d1..a7b218f 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -173,29 +173,18 @@ static void set_alarm_or_time(struct device *dev, int time_alarm, u32 time) * This function updates the RTC alarm registers and then clears all the * interrupt status bits. */ -static int rtc_update_alarm(struct device *dev, struct rtc_time *alrm) +static void rtc_update_alarm(struct device *dev, struct rtc_time *alrm) { - struct rtc_time alarm_tm, now_tm; - unsigned long now, time; + unsigned long time; struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); void __iomem *ioaddr = pdata->ioaddr; - now = get_alarm_or_time(dev, MXC_RTC_TIME); - rtc_time_to_tm(now, &now_tm); - alarm_tm.tm_year = now_tm.tm_year; - alarm_tm.tm_mon = now_tm.tm_mon; - alarm_tm.tm_mday = now_tm.tm_mday; - alarm_tm.tm_hour = alrm->tm_hour; - alarm_tm.tm_min = alrm->tm_min; - alarm_tm.tm_sec = alrm->tm_sec; - rtc_tm_to_time(&alarm_tm, &time); + rtc_tm_to_time(alrm, &time); /* clear all the interrupt status bits */ writew(readw(ioaddr + RTC_RTCISR), ioaddr + RTC_RTCISR); set_alarm_or_time(dev, MXC_RTC_ALARM, time); - - return 0; } static void mxc_rtc_irq_enable(struct device *dev, unsigned int bit, @@ -346,11 +335,8 @@ static int mxc_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) { struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - int ret; - ret = rtc_update_alarm(dev, &alrm->time); - if (ret) - return ret; + rtc_update_alarm(dev, &alrm->time); memcpy(&pdata->g_rtc_alarm, &alrm->time, sizeof(struct rtc_time)); mxc_rtc_irq_enable(dev, RTC_ALM_BIT, alrm->enabled); -- cgit v1.1 From a015b8aabfd2fb58875dea001f1eac8100eacc2e Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:32 -0700 Subject: drivers/rtc/mxc: Convert get_alarm_or_time()/set_alarm_or_time() to use time64_t We want to convert mxc_rtc_set_mmss() to use rtc_class_ops's set_mmss64(), but it uses get_alarm_or_time()/set_alarm_or_time() internal interfaces which are y2038 unsafe. So here as a separate patch, it converts these two internal interfaces of "mxc" to use safe time64_t to make some preparations. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Acked-by: Alessandro Zummo Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-13-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/rtc/rtc-mxc.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index a7b218f..83cba23 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -106,7 +106,7 @@ static inline int is_imx1_rtc(struct rtc_plat_data *data) * This function is used to obtain the RTC time or the alarm value in * second. */ -static u32 get_alarm_or_time(struct device *dev, int time_alarm) +static time64_t get_alarm_or_time(struct device *dev, int time_alarm) { struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); @@ -129,29 +129,28 @@ static u32 get_alarm_or_time(struct device *dev, int time_alarm) hr = hr_min >> 8; min = hr_min & 0xff; - return (((day * 24 + hr) * 60) + min) * 60 + sec; + return ((((time64_t)day * 24 + hr) * 60) + min) * 60 + sec; } /* * This function sets the RTC alarm value or the time value. */ -static void set_alarm_or_time(struct device *dev, int time_alarm, u32 time) +static void set_alarm_or_time(struct device *dev, int time_alarm, time64_t time) { - u32 day, hr, min, sec, temp; + u32 tod, day, hr, min, sec, temp; struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); void __iomem *ioaddr = pdata->ioaddr; - day = time / 86400; - time -= day * 86400; + day = div_s64_rem(time, 86400, &tod); /* time is within a day now */ - hr = time / 3600; - time -= hr * 3600; + hr = tod / 3600; + tod -= hr * 3600; /* time is within an hour now */ - min = time / 60; - sec = time - min * 60; + min = tod / 60; + sec = tod - min * 60; temp = (hr << 8) + min; @@ -175,12 +174,12 @@ static void set_alarm_or_time(struct device *dev, int time_alarm, u32 time) */ static void rtc_update_alarm(struct device *dev, struct rtc_time *alrm) { - unsigned long time; + time64_t time; struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); void __iomem *ioaddr = pdata->ioaddr; - rtc_tm_to_time(alrm, &time); + time = rtc_tm_to_time64(alrm); /* clear all the interrupt status bits */ writew(readw(ioaddr + RTC_RTCISR), ioaddr + RTC_RTCISR); @@ -272,14 +271,14 @@ static int mxc_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) */ static int mxc_rtc_read_time(struct device *dev, struct rtc_time *tm) { - u32 val; + time64_t val; /* Avoid roll-over from reading the different registers */ do { val = get_alarm_or_time(dev, MXC_RTC_TIME); } while (val != get_alarm_or_time(dev, MXC_RTC_TIME)); - rtc_time_to_tm(val, tm); + rtc_time64_to_tm(val, tm); return 0; } @@ -322,7 +321,7 @@ static int mxc_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct rtc_plat_data *pdata = platform_get_drvdata(pdev); void __iomem *ioaddr = pdata->ioaddr; - rtc_time_to_tm(get_alarm_or_time(dev, MXC_RTC_ALARM), &alrm->time); + rtc_time64_to_tm(get_alarm_or_time(dev, MXC_RTC_ALARM), &alrm->time); alrm->pending = ((readw(ioaddr + RTC_RTCISR) & RTC_ALM_BIT)) ? 1 : 0; return 0; -- cgit v1.1 From 933623c38f014b10db564f0ec44f9db64a5ced84 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:33 -0700 Subject: drivers/rtc/mxc: Update driver to address y2038/y2106 issues This driver has a number of y2038/y2106 issues. This patch resolves them by: - Replacing rtc_time_to_tm() with rtc_time64_to_tm() - Replacing rtc_tm_to_time() with rtc_tm_to_time64() - Changing mxc_rtc_set_mmss() to use rtc_class_ops's set_mmss64() After this patch, the driver should not have any remaining y2038/y2106 issues. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Acked-by: Alessandro Zummo Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-14-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/rtc/rtc-mxc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index 83cba23..09d422b 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -286,7 +286,7 @@ static int mxc_rtc_read_time(struct device *dev, struct rtc_time *tm) /* * This function sets the internal RTC time based on tm in Gregorian date. */ -static int mxc_rtc_set_mmss(struct device *dev, unsigned long time) +static int mxc_rtc_set_mmss(struct device *dev, time64_t time) { struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); @@ -297,9 +297,9 @@ static int mxc_rtc_set_mmss(struct device *dev, unsigned long time) if (is_imx1_rtc(pdata)) { struct rtc_time tm; - rtc_time_to_tm(time, &tm); + rtc_time64_to_tm(time, &tm); tm.tm_year = 70; - rtc_tm_to_time(&tm, &time); + time = rtc_tm_to_time64(&tm); } /* Avoid roll-over from reading the different registers */ @@ -347,7 +347,7 @@ static int mxc_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) static struct rtc_class_ops mxc_rtc_ops = { .release = mxc_rtc_release, .read_time = mxc_rtc_read_time, - .set_mmss = mxc_rtc_set_mmss, + .set_mmss64 = mxc_rtc_set_mmss, .read_alarm = mxc_rtc_read_alarm, .set_alarm = mxc_rtc_set_alarm, .alarm_irq_enable = mxc_rtc_alarm_irq_enable, -- cgit v1.1 From a5312f56e0a67deed5c7d1191140e00b6d367e01 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:34 -0700 Subject: alpha, rtc: Change to use rtc_class_ops's set_mmss64() Change alpha_rtc_set_mmss() and remote_set_mmss() to use rtc_class_ops's set_mmss64(), to be y2038 safe. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Acked-by: Alessandro Zummo Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Richard Henderson Link: http://lkml.kernel.org/r/1427945681-29972-15-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- arch/alpha/kernel/rtc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c index c8d284d..f535a3f 100644 --- a/arch/alpha/kernel/rtc.c +++ b/arch/alpha/kernel/rtc.c @@ -116,7 +116,7 @@ alpha_rtc_set_time(struct device *dev, struct rtc_time *tm) } static int -alpha_rtc_set_mmss(struct device *dev, unsigned long nowtime) +alpha_rtc_set_mmss(struct device *dev, time64_t nowtime) { int retval = 0; int real_seconds, real_minutes, cmos_minutes; @@ -211,7 +211,7 @@ alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) static const struct rtc_class_ops alpha_rtc_ops = { .read_time = alpha_rtc_read_time, .set_time = alpha_rtc_set_time, - .set_mmss = alpha_rtc_set_mmss, + .set_mmss64 = alpha_rtc_set_mmss, .ioctl = alpha_rtc_ioctl, }; @@ -276,7 +276,7 @@ do_remote_mmss(void *data) } static int -remote_set_mmss(struct device *dev, unsigned long now) +remote_set_mmss(struct device *dev, time64_t now) { union remote_data x; if (smp_processor_id() != boot_cpuid) { @@ -290,7 +290,7 @@ remote_set_mmss(struct device *dev, unsigned long now) static const struct rtc_class_ops remote_rtc_ops = { .read_time = remote_read_time, .set_time = remote_set_time, - .set_mmss = remote_set_mmss, + .set_mmss64 = remote_set_mmss, .ioctl = alpha_rtc_ioctl, }; #endif -- cgit v1.1 From 7f2981393af31a854879f2496cab4c978e886902 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:35 -0700 Subject: time: Don't build timekeeping_inject_sleeptime64() if no one uses it timekeeping_inject_sleeptime64() is only used by RTC suspend/resume, so add build dependencies on the necessary RTC related macros. Signed-off-by: Xunlei Pang [ Improve commit message clarity. ] Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-16-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b1dbfa5..3be559b 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1280,6 +1280,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, tk_debug_account_sleep_time(delta); } +#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) /** * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values * @delta: pointer to a timespec64 delta value @@ -1317,6 +1318,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) /* signal hrtimers about time change */ clock_was_set(); } +#endif /** * timekeeping_resume - Resumes the generic timekeeping subsystem. -- cgit v1.1 From 814dcf8ead04f5ebcec74af06c705b207887f0fa Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:36 -0700 Subject: drivers/rtc: Remove redundant rtc_valid_tm() from rtc_resume() rtc_read_time() has already judged valid tm by rtc_valid_tm(), so just remove it. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-17-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/rtc/class.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 472a5ad..d40760a 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -117,10 +117,6 @@ static int rtc_resume(struct device *dev) return 0; } - if (rtc_valid_tm(&tm) != 0) { - pr_debug("%s: bogus resume time\n", dev_name(&rtc->dev)); - return 0; - } new_rtc.tv_sec = rtc_tm_to_time64(&tm); new_rtc.tv_nsec = 0; -- cgit v1.1 From 264bb3f79f2a465477cdcd2f0554e21aedc443a3 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:37 -0700 Subject: time: Fix a bug in timekeeping_suspend() with no persistent clock When there's no persistent clock, normally timekeeping_suspend_time should always be zero, but this can break in timekeeping_suspend(). At T1, there was a system suspend, so old_delta was assigned T1. After some time, one time adjustment happened, and xtime got the value of T1-dt(0s Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-18-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3be559b..b7db491 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1255,7 +1255,7 @@ void __init timekeeping_init(void) raw_spin_unlock_irqrestore(&timekeeper_lock, flags); } -/* time in seconds when suspend began */ +/* time in seconds when suspend began for persistent clock */ static struct timespec64 timekeeping_suspend_time; /** @@ -1428,24 +1428,26 @@ int timekeeping_suspend(void) timekeeping_forward_now(tk); timekeeping_suspended = 1; - /* - * To avoid drift caused by repeated suspend/resumes, - * which each can add ~1 second drift error, - * try to compensate so the difference in system time - * and persistent_clock time stays close to constant. - */ - delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time); - delta_delta = timespec64_sub(delta, old_delta); - if (abs(delta_delta.tv_sec) >= 2) { + if (has_persistent_clock()) { /* - * if delta_delta is too large, assume time correction - * has occured and set old_delta to the current delta. + * To avoid drift caused by repeated suspend/resumes, + * which each can add ~1 second drift error, + * try to compensate so the difference in system time + * and persistent_clock time stays close to constant. */ - old_delta = delta; - } else { - /* Otherwise try to adjust old_system to compensate */ - timekeeping_suspend_time = - timespec64_add(timekeeping_suspend_time, delta_delta); + delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time); + delta_delta = timespec64_sub(delta, old_delta); + if (abs(delta_delta.tv_sec) >= 2) { + /* + * if delta_delta is too large, assume time correction + * has occurred and set old_delta to the current delta. + */ + old_delta = delta; + } else { + /* Otherwise try to adjust old_system to compensate */ + timekeeping_suspend_time = + timespec64_add(timekeeping_suspend_time, delta_delta); + } } timekeeping_update(tk, TK_MIRROR); -- cgit v1.1 From 0fa88cb4b82b5cf7429bc1cef9db006ca035754e Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:38 -0700 Subject: time, drivers/rtc: Don't bother with rtc_resume() for the nonstop clocksource If a system does not provide a persistent_clock(), the time will be updated on resume by rtc_resume(). With the addition of the non-stop clocksources for suspend timing, those systems set the time on resume in timekeeping_resume(), but may not provide a valid persistent_clock(). This results in the rtc_resume() logic thinking no one has set the time and it then will over-write the suspend time again, which is not necessary and only increases clock error. So, fix this for rtc_resume(). This patch also improves the name of persistent_clock_exist to make it more grammatical. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-19-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/rtc/class.c | 4 +-- include/linux/timekeeping.h | 9 +++---- kernel/time/timekeeping.c | 66 +++++++++++++++++++++++++++++++++------------ 3 files changed, 54 insertions(+), 25 deletions(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index d40760a..c29ba7e 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -55,7 +55,7 @@ static int rtc_suspend(struct device *dev) struct timespec64 delta, delta_delta; int err; - if (has_persistent_clock()) + if (timekeeping_rtc_skipsuspend()) return 0; if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0) @@ -102,7 +102,7 @@ static int rtc_resume(struct device *dev) struct timespec64 sleep_time; int err; - if (has_persistent_clock()) + if (timekeeping_rtc_skipresume()) return 0; rtc_hctosys_ret = -ENODEV; diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7a2369d..99176af 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -248,6 +248,9 @@ static inline void timekeeping_clocktai(struct timespec *ts) /* * RTC specific */ +extern bool timekeeping_rtc_skipsuspend(void); +extern bool timekeeping_rtc_skipresume(void); + extern void timekeeping_inject_sleeptime64(struct timespec64 *delta); /* @@ -259,14 +262,8 @@ extern void getnstime_raw_and_real(struct timespec *ts_raw, /* * Persistent clock related interfaces */ -extern bool persistent_clock_exist; extern int persistent_clock_is_local; -static inline bool has_persistent_clock(void) -{ - return persistent_clock_exist; -} - extern void read_persistent_clock(struct timespec *ts); extern void read_persistent_clock64(struct timespec64 *ts); extern void read_boot_clock(struct timespec *ts); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b7db491..79b9bc6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -64,9 +64,6 @@ static struct tk_fast tk_fast_raw ____cacheline_aligned; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; -/* Flag for if there is a persistent clock on this platform */ -bool __read_mostly persistent_clock_exist = false; - static inline void tk_normalize_xtime(struct timekeeper *tk) { while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { @@ -1204,6 +1201,12 @@ void __weak read_boot_clock64(struct timespec64 *ts64) *ts64 = timespec_to_timespec64(ts); } +/* Flag for if timekeeping_resume() has injected sleeptime */ +static bool sleeptime_injected; + +/* Flag for if there is a persistent clock on this platform */ +static bool persistent_clock_exists; + /* * timekeeping_init - Initializes the clocksource and common timekeeping values */ @@ -1221,7 +1224,7 @@ void __init timekeeping_init(void) now.tv_sec = 0; now.tv_nsec = 0; } else if (now.tv_sec || now.tv_nsec) - persistent_clock_exist = true; + persistent_clock_exists = true; read_boot_clock64(&boot); if (!timespec64_valid_strict(&boot)) { @@ -1282,11 +1285,47 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) /** + * We have three kinds of time sources to use for sleep time + * injection, the preference order is: + * 1) non-stop clocksource + * 2) persistent clock (ie: RTC accessible when irqs are off) + * 3) RTC + * + * 1) and 2) are used by timekeeping, 3) by RTC subsystem. + * If system has neither 1) nor 2), 3) will be used finally. + * + * + * If timekeeping has injected sleeptime via either 1) or 2), + * 3) becomes needless, so in this case we don't need to call + * rtc_resume(), and this is what timekeeping_rtc_skipresume() + * means. + */ +bool timekeeping_rtc_skipresume(void) +{ + return sleeptime_injected; +} + +/** + * 1) can be determined whether to use or not only when doing + * timekeeping_resume() which is invoked after rtc_suspend(), + * so we can't skip rtc_suspend() surely if system has 1). + * + * But if system has 2), 2) will definitely be used, so in this + * case we don't need to call rtc_suspend(), and this is what + * timekeeping_rtc_skipsuspend() means. + */ +bool timekeeping_rtc_skipsuspend(void) +{ + return persistent_clock_exists; +} + +/** * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values * @delta: pointer to a timespec64 delta value * * This hook is for architectures that cannot support read_persistent_clock64 * because their RTC/persistent clock is only accessible when irqs are enabled. + * and also don't have an effective nonstop clocksource. * * This function should only be called by rtc_resume(), and allows * a suspend offset to be injected into the timekeeping values. @@ -1296,13 +1335,6 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; - /* - * Make sure we don't set the clock twice, as timekeeping_resume() - * already did it - */ - if (has_persistent_clock()) - return; - raw_spin_lock_irqsave(&timekeeper_lock, flags); write_seqcount_begin(&tk_core.seq); @@ -1334,8 +1366,8 @@ void timekeeping_resume(void) unsigned long flags; struct timespec64 ts_new, ts_delta; cycle_t cycle_now, cycle_delta; - bool suspendtime_found = false; + sleeptime_injected = false; read_persistent_clock64(&ts_new); clockevents_resume(); @@ -1381,13 +1413,13 @@ void timekeeping_resume(void) nsec += ((u64) cycle_delta * mult) >> shift; ts_delta = ns_to_timespec64(nsec); - suspendtime_found = true; + sleeptime_injected = true; } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) { ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time); - suspendtime_found = true; + sleeptime_injected = true; } - if (suspendtime_found) + if (sleeptime_injected) __timekeeping_inject_sleeptime(tk, &ts_delta); /* Re-base the last cycle value */ @@ -1421,14 +1453,14 @@ int timekeeping_suspend(void) * value returned, update the persistent_clock_exists flag. */ if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec) - persistent_clock_exist = true; + persistent_clock_exists = true; raw_spin_lock_irqsave(&timekeeper_lock, flags); write_seqcount_begin(&tk_core.seq); timekeeping_forward_now(tk); timekeeping_suspended = 1; - if (has_persistent_clock()) { + if (persistent_clock_exists) { /* * To avoid drift caused by repeated suspend/resumes, * which each can add ~1 second drift error, -- cgit v1.1 From 8e56f33f8439b2f8e7f4ae7f3d0bfe683ecc3b09 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 1 Apr 2015 20:34:39 -0700 Subject: clocksource: Improve comment explaining clocks_calc_max_nsecs()'s 50% safety margin Ingo noted that the description of clocks_calc_max_nsecs()'s 50% safety margin was somewhat circular. So this patch tries to improve the comment to better explain what we mean by the 50% safety margin and why we need it. Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-20-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c3be3c7..15facb1 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -472,8 +472,11 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) * @max_cyc: maximum cycle value before potential overflow (does not include * any safety margin) * - * NOTE: This function includes a safety margin of 50%, so that bad clock values - * can be detected. + * NOTE: This function includes a safety margin of 50%, in other words, we + * return half the number of nanoseconds the hardware counter can technically + * cover. This is done so that we can potentially detect problems caused by + * delayed timers or bad hardware, which might result in time intervals that + * are larger then what the math used can handle without overflows. */ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc) { -- cgit v1.1 From 3ff70551a942b4c1d3c2e96e31a5c6e369a6d0be Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 01:46:34 +0200 Subject: ACPI/PAD: Remove the local APIC nonsense While looking through the (ab)use of the clockevents_notify() function I stumbled over the following gem in the acpi_pad code: if (lapic_detected_unstable && !lapic_marked_unstable) { /* LAPIC could halt in idle, so notify users */ for_each_online_cpu(i) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &i); lapic_marked_unstable = 1; } This code calls on the cpu which detects the lapic unstable condition first clockevents_notify() to tell the core code that the broadcast should be enabled on all online cpus. Brilliant stuff that as it notifies the core code a num_online_cpus() times that the broadcast should be enabled on the current cpu. This probably has never been noticed because that code got never tested with NOHZ=n and HIGHRES_TIMER=n or it just worked by chance because one of the other mechanisms told the core in the right way that the local apic timer is wreckaged. Sigh, this is: - The 4th incarnation of idle drivers which has their own mechanism to detect and deal with X86_FEATURE_ARAT. - The 2nd incarnation of fake idle mechanisms with a different set of brainmelting bugs. - Has been merged against an explicit NAK of the scheduler maintainer with the promise to improve it over time. - Another example of featuritis driven trainwreck engineering. - Another pointless waste of my time. Fix this nonsense by removing that lapic detection and notification logic and simply call into the clockevents code unconditonally. The ARAT feature is marked in the lapic clockevent already so the core code will just ignore the requests and return. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1887788.RObRuI4tSv@vostro.rjw.lan Signed-off-by: Ingo Molnar --- drivers/acpi/acpi_pad.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index c7b105c..1686e9f 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -41,8 +41,6 @@ static unsigned long power_saving_mwait_eax; static unsigned char tsc_detected_unstable; static unsigned char tsc_marked_unstable; -static unsigned char lapic_detected_unstable; -static unsigned char lapic_marked_unstable; static void power_saving_mwait_init(void) { @@ -82,13 +80,10 @@ static void power_saving_mwait_init(void) */ if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) tsc_detected_unstable = 1; - if (!boot_cpu_has(X86_FEATURE_ARAT)) - lapic_detected_unstable = 1; break; default: - /* TSC & LAPIC could halt in idle */ + /* TSC could halt in idle */ tsc_detected_unstable = 1; - lapic_detected_unstable = 1; } #endif } @@ -177,28 +172,17 @@ static int power_saving_thread(void *data) mark_tsc_unstable("TSC halts in idle"); tsc_marked_unstable = 1; } - if (lapic_detected_unstable && !lapic_marked_unstable) { - int i; - /* LAPIC could halt in idle, so notify users */ - for_each_online_cpu(i) - clockevents_notify( - CLOCK_EVT_NOTIFY_BROADCAST_ON, - &i); - lapic_marked_unstable = 1; - } + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu); + local_irq_disable(); cpu = smp_processor_id(); - if (lapic_marked_unstable) - clockevents_notify( - CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); stop_critical_timings(); mwait_idle_with_hints(power_saving_mwait_eax, 1); start_critical_timings(); - if (lapic_marked_unstable) - clockevents_notify( - CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); local_irq_enable(); if (time_before(expire_time, jiffies)) { -- cgit v1.1 From 592a438ff3fea61d303c5784c209b3f1fd3e16df Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:01:10 +0200 Subject: clockevents: Provide explicit broadcast control functions clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the broadcast control into a separate function and provide inline helpers. Switch clockevents_notify() over. This will go away once all callers are converted. This also gets rid of the nested locking of clockevents_lock and broadcast_lock. The broadcast control functions do not require clockevents_lock. Only the managing functions (setup/shutdown/suspend/resume of the broadcast device require clockevents_lock. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Daniel Lezcano Cc: Len Brown Cc: Peter Zijlstra Cc: Tony Lindgren Link: http://lkml.kernel.org/r/8086559.ttsuS0n1Xr@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/tick.h | 25 ++++++++++++++++++ kernel/time/clockevents.c | 6 ++++- kernel/time/tick-broadcast.c | 62 +++++++++++++++++++------------------------- kernel/time/tick-internal.h | 2 -- 4 files changed, 57 insertions(+), 38 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index f9ff225..4bb2363 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -42,6 +42,31 @@ extern void hotplug_cpu__broadcast_tick_pull(int dead_cpu); static inline void hotplug_cpu__broadcast_tick_pull(int dead_cpu) { } #endif +enum tick_broadcast_mode { + TICK_BROADCAST_OFF, + TICK_BROADCAST_ON, + TICK_BROADCAST_FORCE, +}; + +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +extern void tick_broadcast_control(enum tick_broadcast_mode mode); +#else +static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } +#endif /* BROADCAST */ + +static inline void tick_broadcast_enable(void) +{ + tick_broadcast_control(TICK_BROADCAST_ON); +} +static inline void tick_broadcast_disable(void) +{ + tick_broadcast_control(TICK_BROADCAST_OFF); +} +static inline void tick_broadcast_force(void) +{ + tick_broadcast_control(TICK_BROADCAST_FORCE); +} + #ifdef CONFIG_NO_HZ_COMMON extern int tick_nohz_tick_stopped(void); extern void tick_nohz_idle_enter(void); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 7af6148..599ff8d 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -656,9 +656,13 @@ int clockevents_notify(unsigned long reason, void *arg) switch (reason) { case CLOCK_EVT_NOTIFY_BROADCAST_ON: + tick_broadcast_enable(); + break; case CLOCK_EVT_NOTIFY_BROADCAST_OFF: + tick_broadcast_disable(); + break; case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: - tick_broadcast_on_off(reason, arg); + tick_broadcast_force(); break; case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f5e0fd5..1a0bee0 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -33,7 +33,7 @@ static cpumask_var_t tick_broadcast_mask; static cpumask_var_t tick_broadcast_on; static cpumask_var_t tmpmask; static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); -static int tick_broadcast_force; +static int tick_broadcast_forced; #ifdef CONFIG_TICK_ONESHOT static void tick_broadcast_clear_oneshot(int cpu); @@ -326,49 +326,54 @@ unlock: raw_spin_unlock(&tick_broadcast_lock); } -/* - * Powerstate information: The system enters/leaves a state, where - * affected devices might stop +/** + * tick_broadcast_control - Enable/disable or force broadcast mode + * @mode: The selected broadcast mode + * + * Called when the system enters a state where affected tick devices + * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. + * + * Called with interrupts disabled, so clockevents_lock is not + * required here because the local clock event device cannot go away + * under us. */ -static void tick_do_broadcast_on_off(unsigned long *reason) +void tick_broadcast_control(enum tick_broadcast_mode mode) { struct clock_event_device *bc, *dev; struct tick_device *td; - unsigned long flags; int cpu, bc_stopped; - raw_spin_lock_irqsave(&tick_broadcast_lock, flags); - - cpu = smp_processor_id(); - td = &per_cpu(tick_cpu_device, cpu); + td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; - bc = tick_broadcast_device.evtdev; /* * Is the device not affected by the powerstate ? */ if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) - goto out; + return; if (!tick_device_is_functional(dev)) - goto out; + return; + raw_spin_lock(&tick_broadcast_lock); + cpu = smp_processor_id(); + bc = tick_broadcast_device.evtdev; bc_stopped = cpumask_empty(tick_broadcast_mask); - switch (*reason) { - case CLOCK_EVT_NOTIFY_BROADCAST_ON: - case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: + switch (mode) { + case TICK_BROADCAST_FORCE: + tick_broadcast_forced = 1; + case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } - if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE) - tick_broadcast_force = 1; break; - case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - if (tick_broadcast_force) + + case TICK_BROADCAST_OFF: + if (tick_broadcast_forced) break; cpumask_clear_cpu(cpu, tick_broadcast_on); if (!tick_device_is_functional(dev)) @@ -390,22 +395,9 @@ static void tick_do_broadcast_on_off(unsigned long *reason) else tick_broadcast_setup_oneshot(bc); } -out: - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); -} - -/* - * Powerstate information: The system enters/leaves a state, where - * affected devices might stop. - */ -void tick_broadcast_on_off(unsigned long reason, int *oncpu) -{ - if (!cpumask_test_cpu(*oncpu, cpu_online_mask)) - printk(KERN_ERR "tick-broadcast: ignoring broadcast for " - "offline CPU #%d\n", *oncpu); - else - tick_do_broadcast_on_off(&reason); + raw_spin_unlock(&tick_broadcast_lock); } +EXPORT_SYMBOL_GPL(tick_broadcast_control); /* * Set the periodic handler depending on broadcast on/off diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index b6ba0a4..62e331d 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -53,7 +53,6 @@ extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); extern void tick_install_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); -extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); extern void tick_shutdown_broadcast(unsigned int *cpup); extern void tick_suspend_broadcast(void); extern void tick_resume_broadcast(void); @@ -68,7 +67,6 @@ static inline void tick_install_broadcast_device(struct clock_event_device *dev) static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; } static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; } static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } -static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { } static inline void tick_shutdown_broadcast(unsigned int *cpup) { } static inline void tick_suspend_broadcast(void) { } static inline void tick_resume_broadcast(void) { } -- cgit v1.1 From 162a688e84df49c5bcc855a5e5bf812d0ec89ad5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:01:28 +0200 Subject: x86/amd/idle, clockevents: Use explicit broadcast control function Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1528188.S1pjqkSL1P@vostro.rjw.lan Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 046e2d6..e94b733 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -377,11 +377,8 @@ static void amd_e400_idle(void) if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { cpumask_set_cpu(cpu, amd_e400_c1e_mask); - /* - * Force broadcast so ACPI can not interfere. - */ - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, - &cpu); + /* Force broadcast so ACPI can not interfere. */ + tick_broadcast_force(); pr_info("Switch to broadcast mode on CPU%d\n", cpu); } clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); -- cgit v1.1 From 979081e7440056da28b19e57acf20098caf49103 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:01:49 +0200 Subject: ACPI/PAD: Use explicit broadcast control function Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1521832.mm0ZfkTzTA@vostro.rjw.lan Signed-off-by: Ingo Molnar --- drivers/acpi/acpi_pad.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 1686e9f..244dbc9 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -172,9 +172,8 @@ static int power_saving_thread(void *data) mark_tsc_unstable("TSC halts in idle"); tsc_marked_unstable = 1; } - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu); - local_irq_disable(); + tick_broadcast_enable(); cpu = smp_processor_id(); clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); stop_critical_timings(); -- cgit v1.1 From ee41eebf9cef624b2e766a4b8688eeeedb65114c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:02:00 +0200 Subject: ACPI/processor: Use explicit broadcast control function Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/25071624.dkenaL3SGT@vostro.rjw.lan Signed-off-by: Ingo Molnar --- drivers/acpi/processor_idle.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index c6bb9f1..be54797 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -32,7 +32,7 @@ #include #include #include /* need_resched() */ -#include +#include #include #include #include @@ -157,12 +157,11 @@ static void lapic_timer_check_state(int state, struct acpi_processor *pr, static void __lapic_timer_propagate_broadcast(void *arg) { struct acpi_processor *pr = (struct acpi_processor *) arg; - unsigned long reason; - reason = pr->power.timer_broadcast_on_state < INT_MAX ? - CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; - - clockevents_notify(reason, &pr->id); + if (pr->power.timer_broadcast_on_state < INT_MAX) + tick_broadcast_enable(); + else + tick_broadcast_disable(); } static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) -- cgit v1.1 From ee7a1438b548fb5e206058d6bd0e2a5adf081dbf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:02:18 +0200 Subject: cpuidle: Use explicit broadcast control function Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Daniel Lezcano Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/2106401.cYdJzzA6Ic@vostro.rjw.lan Signed-off-by: Ingo Molnar --- drivers/cpuidle/driver.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 2697e87..5db1478 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include "cpuidle.h" @@ -130,21 +130,20 @@ static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv) #endif /** - * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer + * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer on a cpu * @arg: a void pointer used to match the SMP cross call API * - * @arg is used as a value of type 'long' with one of the two values: - * - CLOCK_EVT_NOTIFY_BROADCAST_ON - * - CLOCK_EVT_NOTIFY_BROADCAST_OFF + * If @arg is NULL broadcast is disabled otherwise enabled * - * Set the broadcast timer notification for the current CPU. This function - * is executed per CPU by an SMP cross call. It not supposed to be called - * directly. + * This function is executed per CPU by an SMP cross call. It's not + * supposed to be called directly. */ static void cpuidle_setup_broadcast_timer(void *arg) { - int cpu = smp_processor_id(); - clockevents_notify((long)(arg), &cpu); + if (arg) + tick_broadcast_enable(); + else + tick_broadcast_disable(); } /** @@ -239,7 +238,7 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv) if (drv->bctimer) on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer, - (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1); + (void *)1, 1); poll_idle_init(drv); @@ -263,7 +262,7 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv) if (drv->bctimer) { drv->bctimer = 0; on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer, - (void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1); + NULL, 1); } __cpuidle_unset_driver(drv); -- cgit v1.1 From 76962caa4b691ad09556903602143fc8b16802ae Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:02:34 +0200 Subject: intel_idle: Use explicit broadcast control function Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/3878165.rXNXrtVNuy@vostro.rjw.lan Signed-off-by: Ingo Molnar --- drivers/idle/intel_idle.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b0e5852..93f84f7 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -55,7 +55,7 @@ #include #include -#include +#include #include #include #include @@ -665,13 +665,12 @@ static void intel_idle_freeze(struct cpuidle_device *dev, static void __setup_broadcast_timer(void *arg) { - unsigned long reason = (unsigned long)arg; - int cpu = smp_processor_id(); - - reason = reason ? - CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; + unsigned long on = (unsigned long)arg; - clockevents_notify(reason, &cpu); + if (on) + tick_broadcast_enable(); + else + tick_broadcast_disable(); } static int cpu_hotplug_notify(struct notifier_block *n, -- cgit v1.1 From fa8589fe3bfafadd80677c8eabae97dc5dab22c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:02:47 +0200 Subject: ARM: OMAP: Use explicit broadcast control function Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Tony Lindgren Link: http://lkml.kernel.org/r/2124877.3nbWGILHCV@vostro.rjw.lan Signed-off-by: Ingo Molnar --- arch/arm/mach-omap2/cpuidle44xx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index 01e398a..9284dc5 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include @@ -184,8 +184,7 @@ fail: */ static void omap_setup_broadcast_timer(void *arg) { - int cpu = smp_processor_id(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu); + tick_broadcast_enable(); } static struct cpuidle_driver omap4_idle_driver = { -- cgit v1.1 From 89feddbfe7023ccfb4a6d7f5e3f5161d91b28b18 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:03:42 +0200 Subject: clockevents: Remove the broadcast control leftovers All users converted. Remove the notify leftovers. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/2076318.76XJZ8QYP3@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 3 --- kernel/time/clockevents.c | 10 ---------- 2 files changed, 13 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index caeca76..438fafa 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -11,9 +11,6 @@ /* Clock event notification values */ enum clock_event_nofitiers { CLOCK_EVT_NOTIFY_ADD, - CLOCK_EVT_NOTIFY_BROADCAST_ON, - CLOCK_EVT_NOTIFY_BROADCAST_OFF, - CLOCK_EVT_NOTIFY_BROADCAST_FORCE, CLOCK_EVT_NOTIFY_BROADCAST_ENTER, CLOCK_EVT_NOTIFY_BROADCAST_EXIT, CLOCK_EVT_NOTIFY_CPU_DYING, diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 599ff8d..dba0b83 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -655,16 +655,6 @@ int clockevents_notify(unsigned long reason, void *arg) raw_spin_lock_irqsave(&clockevents_lock, flags); switch (reason) { - case CLOCK_EVT_NOTIFY_BROADCAST_ON: - tick_broadcast_enable(); - break; - case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - tick_broadcast_disable(); - break; - case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: - tick_broadcast_force(); - break; - case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: ret = tick_broadcast_oneshot_control(reason); -- cgit v1.1 From 1fe5d5c3c9ba0c4ade18e3325cba0ffe35127941 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:05:15 +0200 Subject: clockevents: Provide explicit broadcast oneshot control functions clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the broadcast oneshot control into a separate function and provide inline helpers. Switch clockevents_notify() over. This will go away once all callers are converted. This also gets rid of the nested locking of clockevents_lock and broadcast_lock. The broadcast oneshot control functions do not require clockevents_lock. Only the managing functions (setup/shutdown/suspend/resume of the broadcast device require clockevents_lock. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Alexandre Courbot Cc: Daniel Lezcano Cc: Len Brown Cc: Peter Zijlstra Cc: Stephen Warren Cc: Thierry Reding Cc: Tony Lindgren Link: http://lkml.kernel.org/r/13000649.8qZuEDV0OA@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/tick.h | 19 +++++++++++++++++++ kernel/time/clockevents.c | 4 +++- kernel/time/tick-broadcast.c | 28 +++++++++++++++++----------- kernel/time/tick-internal.h | 2 -- 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index 4bb2363..6119321 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -48,12 +48,23 @@ enum tick_broadcast_mode { TICK_BROADCAST_FORCE, }; +enum tick_broadcast_state { + TICK_BROADCAST_EXIT, + TICK_BROADCAST_ENTER, +}; + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern void tick_broadcast_control(enum tick_broadcast_mode mode); #else static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ +#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; } +#endif + static inline void tick_broadcast_enable(void) { tick_broadcast_control(TICK_BROADCAST_ON); @@ -66,6 +77,14 @@ static inline void tick_broadcast_force(void) { tick_broadcast_control(TICK_BROADCAST_FORCE); } +static inline int tick_broadcast_enter(void) +{ + return tick_broadcast_oneshot_control(TICK_BROADCAST_ENTER); +} +static inline void tick_broadcast_exit(void) +{ + tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); +} #ifdef CONFIG_NO_HZ_COMMON extern int tick_nohz_tick_stopped(void); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index dba0b83..7791b1c 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -656,8 +656,10 @@ int clockevents_notify(unsigned long reason, void *arg) switch (reason) { case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: + tick_broadcast_enter(); + break; case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: - ret = tick_broadcast_oneshot_control(reason); + tick_broadcast_exit(); break; case CLOCK_EVT_NOTIFY_CPU_DYING: diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 1a0bee0..55e43f2 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -687,18 +687,23 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -/* - * Powerstate information: The system enters/leaves a state, where - * affected devices might stop +/** + * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode + * @state: The target state (enter/exit) + * + * The system enters/leaves a state, where affected devices might stop * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. + * + * Called with interrupts disabled, so clockevents_lock is not + * required here because the local clock event device cannot go away + * under us. */ -int tick_broadcast_oneshot_control(unsigned long reason) +int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; struct tick_device *td; - unsigned long flags; - ktime_t now; int cpu, ret = 0; + ktime_t now; /* * Periodic mode does not care about the enter/exit of power @@ -711,17 +716,17 @@ int tick_broadcast_oneshot_control(unsigned long reason) * We are called with preemtion disabled from the depth of the * idle code, so we can't be moved away. */ - cpu = smp_processor_id(); - td = &per_cpu(tick_cpu_device, cpu); + td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) return 0; + raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; + cpu = smp_processor_id(); - raw_spin_lock_irqsave(&tick_broadcast_lock, flags); - if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { + if (state == TICK_BROADCAST_ENTER) { if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); broadcast_shutdown_local(bc, dev); @@ -813,9 +818,10 @@ int tick_broadcast_oneshot_control(unsigned long reason) } } out: - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); + raw_spin_unlock(&tick_broadcast_lock); return ret; } +EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); /* * Reset the one shot broadcast for a cpu diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 62e331d..0266f9d 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -117,7 +117,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } /* Functions related to oneshot broadcasting */ #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); -extern int tick_broadcast_oneshot_control(unsigned long reason); extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); extern int tick_broadcast_oneshot_active(void); @@ -126,7 +125,6 @@ bool tick_broadcast_oneshot_available(void); extern struct cpumask *tick_get_broadcast_oneshot_mask(void); #else /* !(BROADCAST && ONESHOT): */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } -static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } -- cgit v1.1 From 435c350e8197488f12c97e7df28a9c2199bd1673 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:05:53 +0200 Subject: x86/amd/idle, clockevents: Use explicit broadcast oneshot control functions Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/8569669.lgxIty9PKW@vostro.rjw.lan Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e94b733..2f43c62 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -381,7 +381,7 @@ static void amd_e400_idle(void) tick_broadcast_force(); pr_info("Switch to broadcast mode on CPU%d\n", cpu); } - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); + tick_broadcast_enter(); default_idle(); @@ -390,7 +390,7 @@ static void amd_e400_idle(void) * called with interrupts disabled. */ local_irq_disable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + tick_broadcast_exit(); local_irq_enable(); } else default_idle(); -- cgit v1.1 From c79521354e71a1fbcee040ee3147cadc0f8e3c97 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:06:37 +0200 Subject: ACPI/PAD: Use explicit broadcast oneshot control function Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1552509.UntNmyqF5v@vostro.rjw.lan Signed-off-by: Ingo Molnar --- drivers/acpi/acpi_pad.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 244dbc9..6bc9cbc 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -150,7 +150,6 @@ static int power_saving_thread(void *data) sched_setscheduler(current, SCHED_RR, ¶m); while (!kthread_should_stop()) { - int cpu; unsigned long expire_time; try_to_freeze(); @@ -174,14 +173,13 @@ static int power_saving_thread(void *data) } local_irq_disable(); tick_broadcast_enable(); - cpu = smp_processor_id(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); + tick_broadcast_enter(); stop_critical_timings(); mwait_idle_with_hints(power_saving_mwait_eax, 1); start_critical_timings(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + tick_broadcast_exit(); local_irq_enable(); if (time_before(expire_time, jiffies)) { -- cgit v1.1 From 7815701c5cd7276b712d898b3cf49c55e587dbb1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:12:03 +0200 Subject: ACPI/idle: Use explicit broadcast control function Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/2653377.MSAlfA939I@vostro.rjw.lan Signed-off-by: Ingo Molnar --- drivers/acpi/processor_idle.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index be54797..5335519 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -178,11 +178,10 @@ static void lapic_timer_state_broadcast(struct acpi_processor *pr, int state = cx - pr->power.states; if (state >= pr->power.timer_broadcast_on_state) { - unsigned long reason; - - reason = broadcast ? CLOCK_EVT_NOTIFY_BROADCAST_ENTER : - CLOCK_EVT_NOTIFY_BROADCAST_EXIT; - clockevents_notify(reason, &pr->id); + if (broadcast) + tick_broadcast_enter(); + else + tick_broadcast_exit(); } } -- cgit v1.1 From f6cee191fc6b286f9056a13456c4c8ade0aeb890 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:14:23 +0200 Subject: intel_idle: Use explicit broadcast oneshot control function Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20714596.QMfNNPbuyU@vostro.rjw.lan Signed-off-by: Ingo Molnar --- drivers/idle/intel_idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 93f84f7..5c979d0 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -638,12 +638,12 @@ static int intel_idle(struct cpuidle_device *dev, leave_mm(cpu); if (!(lapic_timer_reliable_states & (1 << (cstate)))) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); + tick_broadcast_enter(); mwait_idle_with_hints(eax, ecx); if (!(lapic_timer_reliable_states & (1 << (cstate)))) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + tick_broadcast_exit(); return index; } -- cgit v1.1 From fb7f0398a98020def9429ddd7b4a8fc2d948b092 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:31:29 +0200 Subject: ARM: OMAP: Use explicit broadcast oneshot control function Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Tony Lindgren Link: http://lkml.kernel.org/r/3123047.uVjevtxDV7@vostro.rjw.lan Signed-off-by: Ingo Molnar --- arch/arm/mach-omap2/cpuidle44xx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index 9284dc5..57d4298 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -84,7 +84,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, { struct idle_statedata *cx = state_ptr + index; u32 mpuss_can_lose_context = 0; - int cpu_id = smp_processor_id(); /* * CPU0 has to wait and stay ON until CPU1 is OFF state. @@ -112,7 +111,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, mpuss_can_lose_context = (cx->mpu_state == PWRDM_POWER_RET) && (cx->mpu_logic_state == PWRDM_POWER_OFF); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu_id); + tick_broadcast_enter(); /* * Call idle CPU PM enter notifier chain so that @@ -169,7 +168,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, if (dev->cpu == 0 && mpuss_can_lose_context) cpu_cluster_pm_exit(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu_id); + tick_broadcast_exit(); fail: cpuidle_coupled_parallel_barrier(dev, &abort_barrier); -- cgit v1.1 From a0b4122447a3c1a467ce4e4f1bb863e1170394d5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:32:14 +0200 Subject: ARM: Tegra: Use explicit broadcast oneshot control function Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Alexandre Courbot Cc: Peter Zijlstra Cc: Stephen Warren Cc: Thierry Reding Link: http://lkml.kernel.org/r/2131111.rjxRLX1eZB@vostro.rjw.lan Signed-off-by: Ingo Molnar --- arch/arm/mach-tegra/cpuidle-tegra114.c | 6 +++--- arch/arm/mach-tegra/cpuidle-tegra20.c | 10 +++++----- arch/arm/mach-tegra/cpuidle-tegra30.c | 10 +++++----- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm/mach-tegra/cpuidle-tegra114.c b/arch/arm/mach-tegra/cpuidle-tegra114.c index f2b586d..155807f 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra114.c +++ b/arch/arm/mach-tegra/cpuidle-tegra114.c @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include #include @@ -44,7 +44,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev, tegra_set_cpu_in_lp2(); cpu_pm_enter(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); call_firmware_op(prepare_idle); @@ -52,7 +52,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev, if (call_firmware_op(do_idle, 0) == -ENOSYS) cpu_suspend(0, tegra30_sleep_cpu_secondary_finish); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); cpu_pm_exit(); tegra_clear_cpu_in_lp2(); diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c index 4f25a7c..48844ae 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra20.c +++ b/arch/arm/mach-tegra/cpuidle-tegra20.c @@ -20,7 +20,7 @@ */ #include -#include +#include #include #include #include @@ -136,11 +136,11 @@ static bool tegra20_cpu_cluster_power_down(struct cpuidle_device *dev, if (tegra20_reset_cpu_1() || !tegra_cpu_rail_off_ready()) return false; - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); tegra_idle_lp2_last(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); if (cpu_online(1)) tegra20_wake_cpu1_from_reset(); @@ -153,13 +153,13 @@ static bool tegra20_idle_enter_lp2_cpu_1(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); cpu_suspend(0, tegra20_sleep_cpu_secondary_finish); tegra20_cpu_clear_resettable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); return true; } diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c index f8815ed..84d809a 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra30.c +++ b/arch/arm/mach-tegra/cpuidle-tegra30.c @@ -20,7 +20,7 @@ */ #include -#include +#include #include #include #include @@ -76,11 +76,11 @@ static bool tegra30_cpu_cluster_power_down(struct cpuidle_device *dev, return false; } - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); tegra_idle_lp2_last(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); return true; } @@ -90,13 +90,13 @@ static bool tegra30_cpu_core_power_down(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); smp_wmb(); cpu_suspend(0, tegra30_sleep_cpu_secondary_finish); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); return true; } -- cgit v1.1 From 335f49196fd6011521f078cb44f445847e5aa183 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:34:49 +0200 Subject: sched/idle: Use explicit broadcast oneshot control function Replace the clockevents_notify() call with an explicit function call. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/6422336.RMm7oUHcXh@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/sched/idle.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 80014a1..4d207d2 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -158,8 +158,7 @@ static void cpuidle_idle_call(void) * is used from another cpu as a broadcast timer, this call may * fail if it is not available */ - if (broadcast && - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) + if (broadcast && tick_broadcast_enter()) goto use_default; /* Take note of the planned idle state. */ @@ -176,7 +175,7 @@ static void cpuidle_idle_call(void) idle_set_state(this_rq(), NULL); if (broadcast) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); /* * Give the governor an opportunity to reflect on the outcome -- cgit v1.1 From ffa48c0d76803057ee89bf220305466d74256d7b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Apr 2015 02:36:10 +0200 Subject: clockevents: Remove broadcast oneshot control leftovers Now that all users are converted over to explicit calls into the clockevents state machine, remove the notification chain leftovers. Original-from: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: John Stultz Link: http://lkml.kernel.org/r/14018863.NQUzkFuafr@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 2 -- kernel/time/clockevents.c | 7 ------- 2 files changed, 9 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 438fafa..f97f498 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -11,8 +11,6 @@ /* Clock event notification values */ enum clock_event_nofitiers { CLOCK_EVT_NOTIFY_ADD, - CLOCK_EVT_NOTIFY_BROADCAST_ENTER, - CLOCK_EVT_NOTIFY_BROADCAST_EXIT, CLOCK_EVT_NOTIFY_CPU_DYING, CLOCK_EVT_NOTIFY_CPU_DEAD, }; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 7791b1c..be9abf3 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -655,13 +655,6 @@ int clockevents_notify(unsigned long reason, void *arg) raw_spin_lock_irqsave(&clockevents_lock, flags); switch (reason) { - case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: - tick_broadcast_enter(); - break; - case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: - tick_broadcast_exit(); - break; - case CLOCK_EVT_NOTIFY_CPU_DYING: tick_handover_do_timer(arg); break; -- cgit v1.1 From 52c063d1adbc16c76e70fffa20727fcd4e9343b3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:37:24 +0200 Subject: clockevents: Make tick handover explicit clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the tick_handover call and invoke it explicitely from the hotplug code. Temporary solution will be cleaned up in later patches. Signed-off-by: Thomas Gleixner [ Rebase ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Cc: John Stultz Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1658173.RkEEILFiQZ@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 1 - include/linux/tick.h | 2 ++ kernel/cpu.c | 2 ++ kernel/time/clockevents.c | 4 ---- kernel/time/hrtimer.c | 4 ---- kernel/time/tick-common.c | 9 ++++++--- kernel/time/tick-internal.h | 1 - 7 files changed, 10 insertions(+), 13 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index f97f498..f4bde22 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -11,7 +11,6 @@ /* Clock event notification values */ enum clock_event_nofitiers { CLOCK_EVT_NOTIFY_ADD, - CLOCK_EVT_NOTIFY_CPU_DYING, CLOCK_EVT_NOTIFY_CPU_DEAD, }; diff --git a/include/linux/tick.h b/include/linux/tick.h index 6119321..2c68fa3 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -19,12 +19,14 @@ extern void tick_unfreeze(void); extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); +extern void tick_handover_do_timer(void); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_freeze(void) { } static inline void tick_unfreeze(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } +static inline void tick_handover_do_timer(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #ifdef CONFIG_TICK_ONESHOT diff --git a/kernel/cpu.c b/kernel/cpu.c index af5db20..eba7eaa 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -339,6 +339,8 @@ static int __ref take_cpu_down(void *_param) return err; cpu_notify(CPU_DYING | param->mod, param->hcpu); + /* Give up timekeeping duties */ + tick_handover_do_timer(); /* Park the stopper thread */ kthread_park(current); return 0; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index be9abf3..88fb3b9 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -655,10 +655,6 @@ int clockevents_notify(unsigned long reason, void *arg) raw_spin_lock_irqsave(&clockevents_lock, flags); switch (reason) { - case CLOCK_EVT_NOTIFY_CPU_DYING: - tick_handover_do_timer(arg); - break; - case CLOCK_EVT_NOTIFY_CPU_DEAD: tick_shutdown_broadcast_oneshot(arg); tick_shutdown_broadcast(arg); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 721d29b..6a7a64e 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1707,10 +1707,6 @@ static int hrtimer_cpu_notify(struct notifier_block *self, break; #ifdef CONFIG_HOTPLUG_CPU - case CPU_DYING: - case CPU_DYING_FROZEN: - clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu); - break; case CPU_DEAD: case CPU_DEAD_FROZEN: { diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index e28ba5c..055c868 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -332,20 +332,23 @@ out_bc: tick_install_broadcast_device(newdev); } +#ifdef CONFIG_HOTPLUG_CPU /* * Transfer the do_timer job away from a dying cpu. * - * Called with interrupts disabled. + * Called with interrupts disabled. Not locking required. If + * tick_do_timer_cpu is owned by this cpu, nothing can change it. */ -void tick_handover_do_timer(int *cpup) +void tick_handover_do_timer(void) { - if (*cpup == tick_do_timer_cpu) { + if (tick_do_timer_cpu == smp_processor_id()) { int cpu = cpumask_first(cpu_online_mask); tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : TICK_DO_TIMER_NONE; } } +#endif /* * Shutdown an event device on a given cpu: diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 0266f9d..aabcb5d 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -20,7 +20,6 @@ extern int tick_do_timer_cpu __read_mostly; extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); extern void tick_check_new_device(struct clock_event_device *dev); -extern void tick_handover_do_timer(int *cpup); extern void tick_shutdown(unsigned int *cpup); extern void tick_suspend(void); extern void tick_resume(void); -- cgit v1.1 From a49b116dcb1265f238f3169507424257b0519069 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:38:05 +0200 Subject: clockevents: Cleanup dead cpu explicitely clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the cleanup function for a dead cpu and invoke it directly from the cpu down code. Make it conditional on CPU_HOTPLUG as well. Temporary change, will be refined in the future. Signed-off-by: Thomas Gleixner [ Rebased, added clockevents_notify() removal ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1735025.raBZdQHM3m@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 6 ------ include/linux/tick.h | 2 ++ kernel/cpu.c | 1 + kernel/time/clockevents.c | 51 ++++++++++++++++++-------------------------- kernel/time/hrtimer.c | 3 --- kernel/time/tick-broadcast.c | 39 ++++++++++++++++----------------- kernel/time/tick-common.c | 6 +++--- kernel/time/tick-internal.h | 10 ++++----- 8 files changed, 52 insertions(+), 66 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index f4bde22..96c280b 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -8,12 +8,6 @@ #ifndef _LINUX_CLOCKCHIPS_H #define _LINUX_CLOCKCHIPS_H -/* Clock event notification values */ -enum clock_event_nofitiers { - CLOCK_EVT_NOTIFY_ADD, - CLOCK_EVT_NOTIFY_CPU_DEAD, -}; - #ifdef CONFIG_GENERIC_CLOCKEVENTS # include diff --git a/include/linux/tick.h b/include/linux/tick.h index 2c68fa3..f8492da5 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -20,6 +20,7 @@ extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); extern void tick_handover_do_timer(void); +extern void tick_cleanup_dead_cpu(int cpu); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_freeze(void) { } @@ -27,6 +28,7 @@ static inline void tick_unfreeze(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } static inline void tick_handover_do_timer(void) { } +static inline void tick_cleanup_dead_cpu(int cpu) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #ifdef CONFIG_TICK_ONESHOT diff --git a/kernel/cpu.c b/kernel/cpu.c index eba7eaa..82eea9c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -419,6 +419,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) __cpu_die(cpu); /* CPU is completely dead: tell everyone. Too late to complain. */ + tick_cleanup_dead_cpu(cpu); cpu_notify_nofail(CPU_DEAD | mod, hcpu); check_for_tasks(cpu); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 88fb3b9..25d942d 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -642,49 +642,40 @@ void clockevents_resume(void) dev->resume(dev); } +#ifdef CONFIG_HOTPLUG_CPU /** - * clockevents_notify - notification about relevant events - * Returns 0 on success, any other value on error + * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu */ -int clockevents_notify(unsigned long reason, void *arg) +void tick_cleanup_dead_cpu(int cpu) { struct clock_event_device *dev, *tmp; unsigned long flags; - int cpu, ret = 0; raw_spin_lock_irqsave(&clockevents_lock, flags); - switch (reason) { - case CLOCK_EVT_NOTIFY_CPU_DEAD: - tick_shutdown_broadcast_oneshot(arg); - tick_shutdown_broadcast(arg); - tick_shutdown(arg); - /* - * Unregister the clock event devices which were - * released from the users in the notify chain. - */ - list_for_each_entry_safe(dev, tmp, &clockevents_released, list) + tick_shutdown_broadcast_oneshot(cpu); + tick_shutdown_broadcast(cpu); + tick_shutdown(cpu); + /* + * Unregister the clock event devices which were + * released from the users in the notify chain. + */ + list_for_each_entry_safe(dev, tmp, &clockevents_released, list) + list_del(&dev->list); + /* + * Now check whether the CPU has left unused per cpu devices + */ + list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { + if (cpumask_test_cpu(cpu, dev->cpumask) && + cpumask_weight(dev->cpumask) == 1 && + !tick_is_broadcast_device(dev)) { + BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED); list_del(&dev->list); - /* - * Now check whether the CPU has left unused per cpu devices - */ - cpu = *((int *)arg); - list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { - if (cpumask_test_cpu(cpu, dev->cpumask) && - cpumask_weight(dev->cpumask) == 1 && - !tick_is_broadcast_device(dev)) { - BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED); - list_del(&dev->list); - } } - break; - default: - break; } raw_spin_unlock_irqrestore(&clockevents_lock, flags); - return ret; } -EXPORT_SYMBOL_GPL(clockevents_notify); +#endif #ifdef CONFIG_SYSFS struct bus_type clockevents_subsys = { diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 6a7a64e..76d4bd9 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1709,11 +1709,8 @@ static int hrtimer_cpu_notify(struct notifier_block *self, #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: case CPU_DEAD_FROZEN: - { - clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu); migrate_hrtimers(scpu); break; - } #endif default: diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 55e43f2..7e8ca4f 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -410,14 +410,14 @@ void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) dev->event_handler = tick_handle_periodic_broadcast; } +#ifdef CONFIG_HOTPLUG_CPU /* * Remove a CPU from broadcasting */ -void tick_shutdown_broadcast(unsigned int *cpup) +void tick_shutdown_broadcast(unsigned int cpu) { struct clock_event_device *bc; unsigned long flags; - unsigned int cpu = *cpup; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -432,6 +432,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +#endif void tick_suspend_broadcast(void) { @@ -672,21 +673,6 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } -void hotplug_cpu__broadcast_tick_pull(int deadcpu) -{ - struct clock_event_device *bc; - unsigned long flags; - - raw_spin_lock_irqsave(&tick_broadcast_lock, flags); - bc = tick_broadcast_device.evtdev; - - if (bc && broadcast_needs_cpu(bc, deadcpu)) { - /* This moves the broadcast assignment to this CPU: */ - clockevents_program_event(bc, bc->next_event, 1); - } - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); -} - /** * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode * @state: The target state (enter/exit) @@ -908,14 +894,28 @@ void tick_broadcast_switch_to_oneshot(void) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +#ifdef CONFIG_HOTPLUG_CPU +void hotplug_cpu__broadcast_tick_pull(int deadcpu) +{ + struct clock_event_device *bc; + unsigned long flags; + + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); + bc = tick_broadcast_device.evtdev; + + if (bc && broadcast_needs_cpu(bc, deadcpu)) { + /* This moves the broadcast assignment to this CPU: */ + clockevents_program_event(bc, bc->next_event, 1); + } + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); +} /* * Remove a dead CPU from broadcasting */ -void tick_shutdown_broadcast_oneshot(unsigned int *cpup) +void tick_shutdown_broadcast_oneshot(unsigned int cpu) { unsigned long flags; - unsigned int cpu = *cpup; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -929,6 +929,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +#endif /* * Check, whether the broadcast device is in one shot mode diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 055c868..fac3e98 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -348,7 +348,6 @@ void tick_handover_do_timer(void) TICK_DO_TIMER_NONE; } } -#endif /* * Shutdown an event device on a given cpu: @@ -357,9 +356,9 @@ void tick_handover_do_timer(void) * access the hardware device itself. * We just set the mode and remove it from the lists. */ -void tick_shutdown(unsigned int *cpup) +void tick_shutdown(unsigned int cpu) { - struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); + struct tick_device *td = &per_cpu(tick_cpu_device, cpu); struct clock_event_device *dev = td->evtdev; td->mode = TICKDEV_MODE_PERIODIC; @@ -375,6 +374,7 @@ void tick_shutdown(unsigned int *cpup) td->evtdev = NULL; } } +#endif /** * tick_suspend_local - Suspend the local tick device diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index aabcb5d..b64fdd8 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -20,7 +20,7 @@ extern int tick_do_timer_cpu __read_mostly; extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); extern void tick_check_new_device(struct clock_event_device *dev); -extern void tick_shutdown(unsigned int *cpup); +extern void tick_shutdown(unsigned int cpu); extern void tick_suspend(void); extern void tick_resume(void); extern bool tick_check_replacement(struct clock_event_device *curdev, @@ -52,7 +52,7 @@ extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); extern void tick_install_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); -extern void tick_shutdown_broadcast(unsigned int *cpup); +extern void tick_shutdown_broadcast(unsigned int cpu); extern void tick_suspend_broadcast(void); extern void tick_resume_broadcast(void); extern bool tick_resume_check_broadcast(void); @@ -66,7 +66,7 @@ static inline void tick_install_broadcast_device(struct clock_event_device *dev) static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; } static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; } static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } -static inline void tick_shutdown_broadcast(unsigned int *cpup) { } +static inline void tick_shutdown_broadcast(unsigned int cpu) { } static inline void tick_suspend_broadcast(void) { } static inline void tick_resume_broadcast(void) { } static inline bool tick_resume_check_broadcast(void) { return false; } @@ -117,7 +117,7 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); extern void tick_broadcast_switch_to_oneshot(void); -extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); +extern void tick_shutdown_broadcast_oneshot(unsigned int cpu); extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); @@ -125,7 +125,7 @@ extern struct cpumask *tick_get_broadcast_oneshot_mask(void); #else /* !(BROADCAST && ONESHOT): */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline void tick_broadcast_switch_to_oneshot(void) { } -static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } +static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline void tick_check_oneshot_broadcast_this_cpu(void) { } static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } -- cgit v1.1 From 347c6f6dda1098318088feb8e60188f0161e743d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:39:05 +0200 Subject: timekeeping: Get rid of stale comment Arch specific management of xtime/jiffies/wall_to_monotonic is gone for quite a while. Zap the stale comment. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Acked-by: John Stultz Cc: Peter Zijlstra Cc: John Stultz Link: http://lkml.kernel.org/r/2422730.dmO29q661S@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 79b9bc6..946acb7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1354,10 +1354,6 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) /** * timekeeping_resume - Resumes the generic timekeeping subsystem. - * - * This is for the generic clocksource timekeeping. - * xtime/wall_to_monotonic/jiffies/etc are - * still managed by arch specific suspend/resume code. */ void timekeeping_resume(void) { -- cgit v1.1 From 422fe7502e3f16dc1c680f22d31f59f022edc10d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Apr 2015 15:21:51 +0200 Subject: timers/PM: Fix up tick_unfreeze() A recent conflict resolution has left tick_resume() in tick_unfreeze() which leads to an unbalanced execution of tick_resume_broadcast() every time that function runs. Fix that by replacing the tick_resume() in tick_unfreeze() with tick_resume_local() as appropriate. Signed-off-by: Rafael J. Wysocki Cc: boris.ostrovsky@oracle.com Cc: david.vrabel@citrix.com Cc: konrad.wilk@oracle.com Cc: peterz@infradead.org Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/8099075.V0LvN3pQAV@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/tick-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index fac3e98..ad66a51 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -482,7 +482,7 @@ void tick_unfreeze(void) if (tick_freeze_depth == num_online_cpus()) timekeeping_resume(); else - tick_resume(); + tick_resume_local(); tick_freeze_depth--; -- cgit v1.1 From def747087e83aa5f6a71582cfa71e18341988688 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Apr 2015 15:31:32 +0200 Subject: timers/PM: Drop unnecessary braces from tick_freeze() Some braces in tick_freeze() are not necessary, so drop them. Signed-off-by: Rafael J. Wysocki Cc: peterz@infradead.org Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1534128.H5hN3KBFB4@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/tick-common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index ad66a51..3ae6afa 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -457,11 +457,10 @@ void tick_freeze(void) raw_spin_lock(&tick_freeze_lock); tick_freeze_depth++; - if (tick_freeze_depth == num_online_cpus()) { + if (tick_freeze_depth == num_online_cpus()) timekeeping_suspend(); - } else { + else tick_suspend_local(); - } raw_spin_unlock(&tick_freeze_lock); } -- cgit v1.1