From 4796cf9b02b5bea141632e21d64556a7eb883a65 Mon Sep 17 00:00:00 2001 From: Yingjoe Chen Date: Fri, 10 Apr 2015 21:55:50 +0800 Subject: time: Remove nonexistent function prototype The function clocksource_get_next() was removed in commit 75c5158f70 (timekeeping: Update clocksource with stop_machine), but the prototype was not removed with it. Remove the prototype. Signed-off-by: Yingjoe Chen Cc: Cc: Martin Schwidefsky Cc: Cc: John Stultz Link: http://lkml.kernel.org/r/1428674150-1780-1-git-send-email-yingjoe.chen@mediatek.com Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1355098..a25fc6e 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -181,7 +181,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) extern int clocksource_unregister(struct clocksource*); extern void clocksource_touch_watchdog(void); -extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_suspend(void); extern void clocksource_resume(void); -- cgit v1.1 From 91e5a2170e795989da9f90c18ba18984f23acc5b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 13 Apr 2015 21:02:22 +0000 Subject: hrtimer: Document hrtimer_forward[_now]() proper Document the calling context conditions. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20150413210035.178751779@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 05f6df1..7770676 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -418,7 +418,22 @@ static inline int hrtimer_callback_running(struct hrtimer *timer) extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); -/* Forward a hrtimer so it expires after the hrtimer's current now */ +/** + * hrtimer_forward_now - forward the timer expiry so it expires after now + * @timer: hrtimer to forward + * @interval: the interval to forward + * + * Forward the timer expiry so it will expire after the current time + * of the hrtimer clock base. Returns the number of overruns. + * + * Can be safely called from the callback function of @timer. If + * called from other contexts @timer must neither be enqueued nor + * running the callback and the caller needs to take care of + * serialization. + * + * Note: This only updates the timer expiry value and does not requeue + * the timer. + */ static inline u64 hrtimer_forward_now(struct hrtimer *timer, ktime_t interval) { -- cgit v1.1 From 398ca17fb54b212cdc9da7ff4a17a35c48dd2103 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:27 +0000 Subject: hrtimer: Get rid of the resolution field in hrtimer_clock_base The field has no value because all clock bases have the same resolution. The resolution only changes when we switch to high resolution timer mode. We can evaluate that from a single static variable as well. In the !HIGHRES case its simply a constant. Export the variable, so we can simplify the usage sites. Signed-off-by: Thomas Gleixner Reviewed-by: Preeti U Murthy Acked-by: Peter Zijlstra Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203500.645454122@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 7770676..bc6f91b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -137,7 +137,6 @@ struct hrtimer_sleeper { * timer to a base on another cpu. * @clockid: clock id for per_cpu support * @active: red black tree root node for the active timers - * @resolution: the resolution of the clock, in nanoseconds * @get_time: function to retrieve the current time of the clock * @softirq_time: the time when running the hrtimer queue in the softirq * @offset: offset of this clock to the monotonic base @@ -147,7 +146,6 @@ struct hrtimer_clock_base { int index; clockid_t clockid; struct timerqueue_head active; - ktime_t resolution; ktime_t (*get_time)(void); ktime_t softirq_time; ktime_t offset; @@ -295,11 +293,15 @@ extern void hrtimer_peek_ahead_timers(void); extern void clock_was_set_delayed(void); +extern unsigned int hrtimer_resolution; + #else # define MONOTONIC_RES_NSEC LOW_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_LOW_RES +#define hrtimer_resolution LOW_RES_NSEC + static inline void hrtimer_peek_ahead_timers(void) { } /* -- cgit v1.1 From 056a3cacbc46e5aca27b350ce4ecb3b33ebb0700 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:32 +0000 Subject: hrtimer: Get rid of hrtimer_get_res() The resolution is directly accessible now. So its simpler just to fill in the values of the timespec and be done with it. Text size reduction (combined with "hrtimer: Get rid of the resolution field in hrtimer_clock_base"): x8664 -61, i386 -221, ARM -60, power64 -48 Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203500.879888080@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bc6f91b..8025156 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -385,7 +385,6 @@ static inline int hrtimer_restart(struct hrtimer *timer) /* Query timers: */ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); -extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); extern ktime_t hrtimer_get_next_event(void); -- cgit v1.1 From a6ffebce7f89f6f97cc22838a5d4383b15d6774f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:34 +0000 Subject: hrtimer: Make the statistics fields smaller No point in having usigned long for /proc/timer_list statistics. Make them unsigned int. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203500.959773467@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 8025156..d39f284 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -187,10 +187,10 @@ struct hrtimer_cpu_base { int in_hrtirq; int hres_active; int hang_detected; - unsigned long nr_events; - unsigned long nr_retries; - unsigned long nr_hangs; - ktime_t max_hang_time; + unsigned int nr_events; + unsigned int nr_retries; + unsigned int nr_hangs; + unsigned int max_hang_time; #endif struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; }; -- cgit v1.1 From 21d6d52a1b7028e6a6840bd82e354aefa9a5e203 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:35 +0000 Subject: hrtimer: Get rid of softirq time The softirq time field in the clock bases is an optimization from the early days of hrtimers. It provides a coarse "jiffies" like time mostly for self rearming timers. But that comes with a price: - Larger code size - Extra storage space - Duplicated functions with really small differences The benefit of this is optimization is marginal for contemporary systems. Consolidate everything on the high resolution timer implementation. This makes further optimizations possible. Text size reduction: x8664 -95, i386 -356, ARM -148, ARM64 -40, power64 -16 Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203501.039977424@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index d39f284..e292830 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -138,7 +138,6 @@ struct hrtimer_sleeper { * @clockid: clock id for per_cpu support * @active: red black tree root node for the active timers * @get_time: function to retrieve the current time of the clock - * @softirq_time: the time when running the hrtimer queue in the softirq * @offset: offset of this clock to the monotonic base */ struct hrtimer_clock_base { @@ -147,7 +146,6 @@ struct hrtimer_clock_base { clockid_t clockid; struct timerqueue_head active; ktime_t (*get_time)(void); - ktime_t softirq_time; ktime_t offset; }; @@ -260,19 +258,16 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) return ktime_sub(timer->node.expires, timer->base->get_time()); } -#ifdef CONFIG_HIGH_RES_TIMERS -struct clock_event_device; - -extern void hrtimer_interrupt(struct clock_event_device *dev); - -/* - * In high resolution mode the time reference must be read accurate - */ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) { return timer->base->get_time(); } +#ifdef CONFIG_HIGH_RES_TIMERS +struct clock_event_device; + +extern void hrtimer_interrupt(struct clock_event_device *dev); + static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return timer->base->cpu_base->hres_active; @@ -304,15 +299,6 @@ extern unsigned int hrtimer_resolution; static inline void hrtimer_peek_ahead_timers(void) { } -/* - * In non high resolution mode the time reference is taken from - * the base softirq time variable. - */ -static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) -{ - return timer->base->softirq_time; -} - static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return 0; -- cgit v1.1 From 868a3e915f7f5eba8f8cb4f7da2276760807c51c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:37 +0000 Subject: hrtimer: Make offset update smarter On every tick/hrtimer interrupt we update the offset variables of the clock bases. That's silly because these offsets change very seldom. Add a sequence counter to the time keeping code which keeps track of the offset updates (clock_was_set()). Have a sequence cache in the hrtimer cpu bases to evaluate whether the offsets must be updated or not. This allows us later to avoid pointless cacheline pollution. Signed-off-by: Thomas Gleixner Reviewed-by: Preeti U Murthy Acked-by: Peter Zijlstra Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Cc: John Stultz Link: http://lkml.kernel.org/r/20150414203501.132820245@linutronix.de Signed-off-by: Thomas Gleixner Cc: John Stultz --- include/linux/hrtimer.h | 4 ++-- include/linux/timekeeper_internal.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index e292830..5e04f8f 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -163,7 +163,7 @@ enum hrtimer_base_type { * and timers * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers - * @clock_was_set: Indicates that clock was set from irq context. + * @clock_was_set_seq: Sequence counter of clock was set events * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @in_hrtirq: hrtimer_interrupt() is currently executing @@ -179,7 +179,7 @@ struct hrtimer_cpu_base { raw_spinlock_t lock; unsigned int cpu; unsigned int active_bases; - unsigned int clock_was_set; + unsigned int clock_was_set_seq; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int in_hrtirq; diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index fb86963..6f8276a 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -49,6 +49,7 @@ struct tk_read_base { * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai * @tai_offset: The current UTC to TAI offset in seconds + * @clock_was_set_seq: The sequence number of clock was set events * @raw_time: Monotonic raw base time in timespec64 format * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP @@ -85,6 +86,7 @@ struct timekeeper { ktime_t offs_boot; ktime_t offs_tai; s32 tai_offset; + unsigned int clock_was_set_seq; struct timespec64 raw_time; /* The following members are for timekeeping internal use */ -- cgit v1.1 From e19ffe8be2cd0a1f726b235443eba21e64f6be5e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:39 +0000 Subject: hrtimer: Use bits for various boolean indicators No point in wasting 12 byte storage space. Generates better code as well. Text size reduction: x8664 -64, i386 -16, ARM -132, ARM64 -0, power64 -48 Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203501.227955358@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5e04f8f..17a59dd 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -181,10 +181,10 @@ struct hrtimer_cpu_base { unsigned int active_bases; unsigned int clock_was_set_seq; #ifdef CONFIG_HIGH_RES_TIMERS + unsigned int in_hrtirq : 1, + hres_active : 1, + hang_detected : 1; ktime_t expires_next; - int in_hrtirq; - int hres_active; - int hang_detected; unsigned int nr_events; unsigned int nr_retries; unsigned int nr_hangs; -- cgit v1.1 From 6d9a1411393d51f17bee3fe163430b21b2cb2de9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:42 +0000 Subject: hrtimer: Cache line align the hrtimer cpu base We really want that data structure to start at a cache line boundary. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203501.417597627@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 17a59dd..0853f52 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -191,7 +191,7 @@ struct hrtimer_cpu_base { unsigned int max_hang_time; #endif struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; -}; +} ____cacheline_aligned; static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { -- cgit v1.1 From b8e38413ac2c33c497e72895fcd5da709fd1b908 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:44 +0000 Subject: hrtimer: Align the hrtimer clock bases as well We don't use cacheline_align here because that might waste lot of space on 32bit machine with 64 bytes cachelines and on 64bit machines with 128 bytes cachelines. The size of struct hrtimer_clock_base is 64byte on 64bit and 32byte on 32bit machines. So we utilize the cache lines proper. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203501.498165771@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0853f52..e5c22d6 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -130,6 +130,12 @@ struct hrtimer_sleeper { struct task_struct *task; }; +#ifdef CONFIG_64BIT +# define HRTIMER_CLOCK_BASE_ALIGN 64 +#else +# define HRTIMER_CLOCK_BASE_ALIGN 32 +#endif + /** * struct hrtimer_clock_base - the timer base for a specific clock * @cpu_base: per cpu clock base @@ -147,7 +153,7 @@ struct hrtimer_clock_base { struct timerqueue_head active; ktime_t (*get_time)(void); ktime_t offset; -}; +} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, @@ -195,6 +201,8 @@ struct hrtimer_cpu_base { static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { + BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN); + timer->node.expires = time; timer->_softexpires = time; } -- cgit v1.1 From c320642e1ced3b81592610e374894fea995f475b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:46 +0000 Subject: timerqueue: Let timerqueue_add/del return information The hrtimer code is interested whether the added timer is the first one to expire and whether the removed timer was the last one in the tree. The add/del routines have that information already. So we can return it right away instead of reevaluating it at the call site. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Cc: John Stultz Link: http://lkml.kernel.org/r/20150414203501.579063647@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/timerqueue.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index a520fd7..7eec17a 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -16,10 +16,10 @@ struct timerqueue_head { }; -extern void timerqueue_add(struct timerqueue_head *head, - struct timerqueue_node *node); -extern void timerqueue_del(struct timerqueue_head *head, - struct timerqueue_node *node); +extern bool timerqueue_add(struct timerqueue_head *head, + struct timerqueue_node *node); +extern bool timerqueue_del(struct timerqueue_head *head, + struct timerqueue_node *node); extern struct timerqueue_node *timerqueue_iterate_next( struct timerqueue_node *node); -- cgit v1.1 From 895bdfa793f6e912d1a58fc445b3dd4d686f7bd3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:49 +0000 Subject: hrtimer: Keep pointer to first timer and simplify __remove_hrtimer() __remove_hrtimer() needs to evaluate the expiry time to figure out whether the timer which is removed is eventually the first expiring timer on the cpu. Keep a pointer to it, which is lazily updated, so we can avoid the evaluation dance and retrieve the information from there. Generates slightly better code. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203501.752838019@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index e5c22d6..d194c1d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -172,6 +172,7 @@ enum hrtimer_base_type { * @clock_was_set_seq: Sequence counter of clock was set events * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() + * @next_timer: Pointer to the first expiring timer * @in_hrtirq: hrtimer_interrupt() is currently executing * @hres_active: State of high resolution mode * @hang_detected: The last hrtimer interrupt detected a hang @@ -180,6 +181,10 @@ enum hrtimer_base_type { * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt * @clock_base: array of clock bases for this cpu + * + * Note: next_timer is just an optimization for __remove_hrtimer(). + * Do not dereference the pointer because it is not reliable on + * cross cpu removals. */ struct hrtimer_cpu_base { raw_spinlock_t lock; @@ -191,6 +196,7 @@ struct hrtimer_cpu_base { hres_active : 1, hang_detected : 1; ktime_t expires_next; + struct hrtimer *next_timer; unsigned int nr_events; unsigned int nr_retries; unsigned int nr_hangs; -- cgit v1.1 From c6eb3f70d4482806dc2d3e1e3c7736f497b1d418 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:51 +0000 Subject: hrtimer: Get rid of hrtimer softirq hrtimer softirq is a leftover from the initial implementation and serves only the purpose to handle the enqueueing of already expired timers in the high resolution timer mode. We discussed whether we change the return value and force all start sites to handle that the timer is already expired, but that would be a Herculean task and I'm not sure whether its a good idea to enforce that handling on everyone. A simpler solution is to enforce a timer interrupt instead of raising and scheduling a softirq. Just use the existing infrastructure to do so and remove all the softirq leftovers. The HRTIMER softirq enum is now unused, but kept around because trace parsers rely on the existing numbering. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203501.840834708@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 1 - include/linux/interrupt.h | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index d194c1d..048270a 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -459,7 +459,6 @@ extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); /* Soft interrupt function to run the hrtimer queues: */ extern void hrtimer_run_queues(void); -extern void hrtimer_run_pending(void); /* Bootup initialization: */ extern void __init hrtimers_init(void); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 950ae45..6bf15a6 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -413,7 +413,8 @@ enum BLOCK_IOPOLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, - HRTIMER_SOFTIRQ, + HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the + numbering. Sigh! */ RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS -- cgit v1.1 From c1ad348b452aacd784fb97403d03d71723c72ee1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:58 +0000 Subject: tick: Nohz: Rework next timer evaluation The evaluation of the next timer in the nohz code is based on jiffies while all the tick internals are nano seconds based. We have also to convert hrtimer nanoseconds to jiffies in the !highres case. That's just wrong and introduces interesting corner cases. Turn it around and convert the next timer wheel timer expiry and the rcu event to clock monotonic and base all calculations on nanoseconds. That identifies the case where no timer is pending clearly with an absolute expiry value of KTIME_MAX. Makes the code more readable and gets rid of the jiffies magic in the nohz code. Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Cc: Josh Triplett Cc: Lai Jiangshan Cc: John Stultz Cc: Marcelo Tosatti Link: http://lkml.kernel.org/r/20150414203502.184198593@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 +- include/linux/rcupdate.h | 6 ++++-- include/linux/rcutree.h | 2 +- include/linux/timer.h | 7 ------- 4 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 048270a..2c68f71 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -386,7 +386,7 @@ static inline int hrtimer_restart(struct hrtimer *timer) /* Query timers: */ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); -extern ktime_t hrtimer_get_next_event(void); +extern u64 hrtimer_get_next_event(void); /* * A timer is active, when it is enqueued into the rbtree or the diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 573a5af..0627a44 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -44,6 +44,8 @@ #include #include #include +#include + #include extern int rcu_expedited; /* for sysctl */ @@ -1154,9 +1156,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) -static inline int rcu_needs_cpu(unsigned long *delta_jiffies) +static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) { - *delta_jiffies = ULONG_MAX; + *nextevt = KTIME_MAX; return 0; } #endif /* #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d2e583a..db2e31be 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -32,7 +32,7 @@ void rcu_note_context_switch(void); #ifndef CONFIG_RCU_NOCB_CPU_ALL -int rcu_needs_cpu(unsigned long *delta_jiffies); +int rcu_needs_cpu(u64 basem, u64 *nextevt); #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ void rcu_cpu_stall_reset(void); diff --git a/include/linux/timer.h b/include/linux/timer.h index 8c5a197..fbb80e0 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -188,13 +188,6 @@ extern void set_timer_slack(struct timer_list *time, int slack_hz); #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) /* - * Return when the next timer-wheel timeout occurs (in absolute jiffies), - * locks the timer base and does the comparison against the given - * jiffie. - */ -extern unsigned long get_next_timer_interrupt(unsigned long now); - -/* * Timer-statistics info: */ #ifdef CONFIG_TIMER_STATS -- cgit v1.1 From 58f1f803f1d6ef9ab280de13246d65970a09cb95 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:09:08 +0000 Subject: hrtimer: Get rid of __hrtimer_start_range_ns() No more callers. Remove the leftovers. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203502.707871492@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2c68f71..a80baa8 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -359,10 +359,6 @@ extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode); extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long range_ns, const enum hrtimer_mode mode); -extern int -__hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, - unsigned long delta_ns, - const enum hrtimer_mode mode, int wakeup); extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); -- cgit v1.1 From 02a171af1a46966dcdb5b38cdc33e4f43e92c778 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:09:10 +0000 Subject: hrtimer: Make hrtimer_start() a inline wrapper No point for an extra export just to set the extra argument of hrtimer_start_range_ns() to 0. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203502.808544539@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index a80baa8..42074ab 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -355,11 +355,26 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif /* Basic timer operations: */ -extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, - const enum hrtimer_mode mode); extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long range_ns, const enum hrtimer_mode mode); +/** + * hrtimer_start - (re)start an hrtimer on the current CPU + * @timer: the timer to be added + * @tim: expiry time + * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL) + * + * Returns: + * 0 on success + * 1 when the timer was active + */ +static inline int hrtimer_start(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode) +{ + return hrtimer_start_range_ns(timer, tim, 0, mode); +} + extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); -- cgit v1.1 From b193217e6dc3f88b599b573b53e0e0f6671d969a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:09:18 +0000 Subject: alarmtimer: Get rid of unused return value We want to get rid of the hrtimer_start() return value and the alarm timer return value is nowhere used. Remove it. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Cc: John Stultz Link: http://lkml.kernel.org/r/20150414203503.243910615@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/alarmtimer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index a899402..52f3b7d 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -43,8 +43,8 @@ struct alarm { void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); -int alarm_start(struct alarm *alarm, ktime_t start); -int alarm_start_relative(struct alarm *alarm, ktime_t start); +void alarm_start(struct alarm *alarm, ktime_t start); +void alarm_start_relative(struct alarm *alarm, ktime_t start); void alarm_restart(struct alarm *alarm); int alarm_try_to_cancel(struct alarm *alarm); int alarm_cancel(struct alarm *alarm); -- cgit v1.1 From 61699e13072a89880aa584dcc64c6da465fb2ccc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:09:23 +0000 Subject: hrtimer: Remove hrtimer_start() return value No user was ever interested whether the timer was active or not when it was started. All abusers of the return value are gone, so get rid of it. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203503.483556394@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 22 +++++++++------------- include/linux/interrupt.h | 6 +++--- 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 42074ab..470d876 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -355,7 +355,7 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif /* Basic timer operations: */ -extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, +extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long range_ns, const enum hrtimer_mode mode); /** @@ -364,34 +364,30 @@ extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * @tim: expiry time * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or * relative (HRTIMER_MODE_REL) - * - * Returns: - * 0 on success - * 1 when the timer was active */ -static inline int hrtimer_start(struct hrtimer *timer, ktime_t tim, - const enum hrtimer_mode mode) +static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode) { - return hrtimer_start_range_ns(timer, tim, 0, mode); + hrtimer_start_range_ns(timer, tim, 0, mode); } extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); -static inline int hrtimer_start_expires(struct hrtimer *timer, - enum hrtimer_mode mode) +static inline void hrtimer_start_expires(struct hrtimer *timer, + enum hrtimer_mode mode) { unsigned long delta; ktime_t soft, hard; soft = hrtimer_get_softexpires(timer); hard = hrtimer_get_expires(timer); delta = ktime_to_ns(ktime_sub(hard, soft)); - return hrtimer_start_range_ns(timer, soft, delta, mode); + hrtimer_start_range_ns(timer, soft, delta, mode); } -static inline int hrtimer_restart(struct hrtimer *timer) +static inline void hrtimer_restart(struct hrtimer *timer) { - return hrtimer_start_expires(timer, HRTIMER_MODE_ABS); + hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } /* Query timers: */ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 6bf15a6..be7e75c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -593,10 +593,10 @@ tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer, clockid_t which_clock, enum hrtimer_mode mode); static inline -int tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, - const enum hrtimer_mode mode) +void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, + const enum hrtimer_mode mode) { - return hrtimer_start(&ttimer->timer, time, mode); + hrtimer_start(&ttimer->timer, time, mode); } static inline -- cgit v1.1 From 4cfafd3082afc707653aeb82e9f8e7b596fbbfd6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 May 2015 12:23:11 +0200 Subject: sched,perf: Fix periodic timers In the below two commits (see Fixes) we have periodic timers that can stop themselves when they're no longer required, but need to be (re)-started when their idle condition changes. Further complications is that we want the timer handler to always do the forward such that it will always correctly deal with the overruns, and we do not want to race such that the handler has already decided to stop, but the (external) restart sees the timer still active and we end up with a 'lost' timer. The problem with the current code is that the re-start can come before the callback does the forward, at which point the forward from the callback will WARN about forwarding an enqueued timer. Now, conceptually its easy to detect if you're before or after the fwd by comparing the expiration time against the current time. Of course, that's expensive (and racy) because we don't have the current time. Alternatively one could cache this state inside the timer, but then everybody pays the overhead of maintaining this extra state, and that is undesired. The only other option that I could see is the external timer_active variable, which I tried to kill before. I would love a nicer interface for this seemingly simple 'problem' but alas. Fixes: 272325c4821f ("perf: Fix mux_interval hrtimer wreckage") Fixes: 77a4d1a1b9a1 ("sched: Cleanup bandwidth timers") Cc: pjt@google.com Cc: tglx@linutronix.de Cc: klamm@yandex-team.ru Cc: mingo@kernel.org Cc: bsegall@google.com Cc: hpa@zytor.com Cc: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Link: http://lkml.kernel.org/r/20150514102311.GX21418@twins.programming.kicks-ass.net --- include/linux/perf_event.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61992cf..cf3342a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -566,8 +566,12 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; + + raw_spinlock_t hrtimer_lock; struct hrtimer hrtimer; ktime_t hrtimer_interval; + unsigned int hrtimer_active; + struct pmu *unique_pmu; struct perf_cgroup *cgrp; }; -- cgit v1.1 From ca42aaf0c8616cde6161ea4391dff364efeee46a Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 18 May 2015 14:19:13 +0200 Subject: time: Refactor msecs_to_jiffies Refactor the msecs_to_jiffies conditional code part in time.c and jiffies.h putting it into conditional functions rather than #ifdefs to improve readability. [ tglx: Verified that there is no binary code change ] Signed-off-by: Nicholas Mc Guire Cc: Masahiro Yamada Cc: Sam Ravnborg Cc: Joe Perches Cc: John Stultz Cc: Andrew Hunter Cc: Paul Turner Cc: Michal Marek Link: http://lkml.kernel.org/r/1431951554-5563-2-git-send-email-hofrat@osadl.org Signed-off-by: Thomas Gleixner --- include/linux/jiffies.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index c367cbd..9527ddb 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -7,6 +7,7 @@ #include #include #include /* for HZ */ +#include /* * The following defines establish the engineering parameters of the PLL @@ -288,7 +289,68 @@ static inline u64 jiffies_to_nsecs(const unsigned long j) return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC; } -extern unsigned long msecs_to_jiffies(const unsigned int m); +extern unsigned long __msecs_to_jiffies(const unsigned int m); +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) +/* + * HZ is equal to or smaller than 1000, and 1000 is a nice round + * multiple of HZ, divide with the factor between them, but round + * upwards: + */ +static inline unsigned long _msecs_to_jiffies(const unsigned int m) +{ + return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); +} +#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) +/* + * HZ is larger than 1000, and HZ is a nice round multiple of 1000 - + * simply multiply with the factor between them. + * + * But first make sure the multiplication result cannot overflow: + */ +static inline unsigned long _msecs_to_jiffies(const unsigned int m) +{ + if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + return m * (HZ / MSEC_PER_SEC); +} +#else +/* + * Generic case - multiply, round and divide. But first check that if + * we are doing a net multiplication, that we wouldn't overflow: + */ +static inline unsigned long _msecs_to_jiffies(const unsigned int m) +{ + if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + + return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) + >> MSEC_TO_HZ_SHR32; +} +#endif +/** + * msecs_to_jiffies: - convert milliseconds to jiffies + * @m: time in milliseconds + * + * conversion is done as follows: + * + * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET) + * + * - 'too large' values [that would result in larger than + * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. + * + * - all other values are converted to jiffies by either multiplying + * the input value by a factor or dividing it with a factor and + * handling any 32-bit overflows. + * for the details see __msecs_to_jiffies() + * + * the HZ range specific helpers _msecs_to_jiffies() are called from + * __msecs_to_jiffies(). + */ +static inline unsigned long msecs_to_jiffies(const unsigned int m) +{ + return __msecs_to_jiffies(m); +} + extern unsigned long usecs_to_jiffies(const unsigned int u); extern unsigned long timespec_to_jiffies(const struct timespec *value); extern void jiffies_to_timespec(const unsigned long jiffies, -- cgit v1.1 From daa67b4b70568a07fef3cffacb2055891bf42ddb Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 18 May 2015 14:19:14 +0200 Subject: time: Allow gcc to fold constants when possible To allow constant folding in msecs_to_jiffies() conditionally calls the HZ dependent _msecs_to_jiffies() helpers or, when gcc can not figure out constant folding, __msecs_to_jiffies which is the renamed original msecs_to_jiffies() function. Signed-off-by: Nicholas Mc Guire Cc: Masahiro Yamada Cc: Sam Ravnborg Cc: Joe Perches Cc: John Stultz Cc: Andrew Hunter Cc: Paul Turner Cc: Michal Marek Link: http://lkml.kernel.org/r/1431951554-5563-3-git-send-email-hofrat@osadl.org Signed-off-by: Thomas Gleixner --- include/linux/jiffies.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 9527ddb..5e75af6 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -343,12 +343,24 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m) * handling any 32-bit overflows. * for the details see __msecs_to_jiffies() * - * the HZ range specific helpers _msecs_to_jiffies() are called from - * __msecs_to_jiffies(). + * msecs_to_jiffies() checks for the passed in value being a constant + * via __builtin_constant_p() allowing gcc to eliminate most of the + * code, __msecs_to_jiffies() is called if the value passed does not + * allow constant folding and the actual conversion must be done at + * runtime. + * the HZ range specific helpers _msecs_to_jiffies() are called both + * directly here and from __msecs_to_jiffies() in the case where + * constant folding is not possible. */ static inline unsigned long msecs_to_jiffies(const unsigned int m) { - return __msecs_to_jiffies(m); + if (__builtin_constant_p(m)) { + if ((int)m < 0) + return MAX_JIFFY_OFFSET; + return _msecs_to_jiffies(m); + } else { + return __msecs_to_jiffies(m); + } } extern unsigned long usecs_to_jiffies(const unsigned int u); -- cgit v1.1 From 8fff52fd50934580c5108afed12043a774edf728 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 3 Apr 2015 09:04:04 +0530 Subject: clockevents: Introduce CLOCK_EVT_STATE_ONESHOT_STOPPED state When no timers/hrtimers are pending, the expiry time is set to a special value: 'KTIME_MAX'. This normally happens with NO_HZ_{IDLE|FULL} in both LOWRES/HIGHRES modes. When 'expiry == KTIME_MAX', we either cancel the 'tick-sched' hrtimer (NOHZ_MODE_HIGHRES) or skip reprogramming clockevent device (NOHZ_MODE_LOWRES). But, the clockevent device is already reprogrammed from the tick-handler for next tick. As the clock event device is programmed in ONESHOT mode it will at least fire one more time (unnecessarily). Timers on few implementations (like arm_arch_timer, etc.) only support PERIODIC mode and their drivers emulate ONESHOT over that. Which means that on these platforms we will get spurious interrupts periodically (at last programmed interval rate, normally tick rate). In order to avoid spurious interrupts, the clockevent device should be stopped or its interrupts should be masked. A simple (yet hacky) solution to get this fixed could be: update hrtimer_force_reprogram() to always reprogram clockevent device and update clockevent drivers to STOP generating events (or delay it to max time) when 'expires' is set to KTIME_MAX. But the drawback here is that every clockevent driver has to be hacked for this particular case and its very easy for new ones to miss this. However, Thomas suggested to add an optional state ONESHOT_STOPPED to solve this problem: lkml.org/lkml/2014/5/9/508. This patch adds support for ONESHOT_STOPPED state in clockevents core. It will only be available to drivers that implement the state-specific callbacks instead of the legacy ->set_mode() callback. Signed-off-by: Viresh Kumar Reviewed-by: Preeti U. Murthy Cc: linaro-kernel@lists.linaro.org Cc: Frederic Weisbecker Cc: Kevin Hilman Cc: Daniel Lezcano Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/b8b383a03ac07b13312c16850b5106b82e4245b5.1428031396.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 96c280b..271fa4c 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -37,12 +37,15 @@ enum clock_event_mode { * reached from DETACHED or SHUTDOWN. * ONESHOT: Device is programmed to generate event only once. Can be reached * from DETACHED or SHUTDOWN. + * ONESHOT_STOPPED: Device was programmed in ONESHOT mode and is temporarily + * stopped. */ enum clock_event_state { CLOCK_EVT_STATE_DETACHED, CLOCK_EVT_STATE_SHUTDOWN, CLOCK_EVT_STATE_PERIODIC, CLOCK_EVT_STATE_ONESHOT, + CLOCK_EVT_STATE_ONESHOT_STOPPED, }; /* @@ -90,6 +93,7 @@ enum clock_event_state { * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. * @set_state_periodic: switch state to periodic, if !set_mode * @set_state_oneshot: switch state to oneshot, if !set_mode + * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode * @set_state_shutdown: switch state to shutdown, if !set_mode * @tick_resume: resume clkevt device, if !set_mode * @broadcast: function to broadcast events @@ -121,11 +125,12 @@ struct clock_event_device { * State transition callback(s): Only one of the two groups should be * defined: * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. - * - set_state_{shutdown|periodic|oneshot}(), tick_resume(). + * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume(). */ void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); + int (*set_state_oneshot_stopped)(struct clock_event_device *); int (*set_state_shutdown)(struct clock_event_device *); int (*tick_resume)(struct clock_event_device *); -- cgit v1.1 From 4e3d9cb0134fea035e6eb1707e5e7d8aaffa186d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 May 2015 17:14:51 +0200 Subject: jiffies: Remove the extra indentation level Somehow I missed to clean that up when applying the patches. Fix it up now. Reported-by: Joe Perches Signed-off-by: Thomas Gleixner Cc: Nicholas Mc Guire --- include/linux/jiffies.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 5e75af6..3bde5eb 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -298,7 +298,7 @@ extern unsigned long __msecs_to_jiffies(const unsigned int m); */ static inline unsigned long _msecs_to_jiffies(const unsigned int m) { - return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); + return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); } #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) /* @@ -309,9 +309,9 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m) */ static inline unsigned long _msecs_to_jiffies(const unsigned int m) { - if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) - return MAX_JIFFY_OFFSET; - return m * (HZ / MSEC_PER_SEC); + if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + return m * (HZ / MSEC_PER_SEC); } #else /* @@ -320,11 +320,10 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m) */ static inline unsigned long _msecs_to_jiffies(const unsigned int m) { - if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) - return MAX_JIFFY_OFFSET; + if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; - return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) - >> MSEC_TO_HZ_SHR32; + return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) >> MSEC_TO_HZ_SHR32; } #endif /** -- cgit v1.1 From 6374f9124efea5fae9cba263108583c39e22f86b Mon Sep 17 00:00:00 2001 From: Harald Geyer Date: Tue, 7 Apr 2015 11:12:35 +0000 Subject: timekeeping: Provide new API to get the current time resolution This patch series introduces a new function u32 ktime_get_resolution_ns(void) which allows to clean up some driver code. In particular the IIO subsystem has a function to provide timestamps for events but no means to get their resolution. So currently the dht11 driver tries to guess the resolution in a rather messy and convoluted way. We can do much better with the new code. This API is not designed to be exposed to user space. This has been tested on i386, sunxi and mxs. Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Harald Geyer [jstultz: Tweaked to make it build after upstream changes] Signed-off-by: John Stultz --- include/linux/timekeeping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 99176af..9af5c12 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -163,6 +163,7 @@ extern ktime_t ktime_get(void); extern ktime_t ktime_get_with_offset(enum tk_offsets offs); extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs); extern ktime_t ktime_get_raw(void); +extern u32 ktime_get_resolution_ns(void); /** * ktime_get_real - get the real (wall-) time in ktime_t format -- cgit v1.1 From 57d05a93ada77c4f8a6112cbc867a2948dce7991 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 13 May 2015 16:04:47 -0700 Subject: time: Rework debugging variables so they aren't global Ingo suggested that the timekeeping debugging variables recently added should not be global, and should be tied to the timekeeper's read_base. Thus this patch implements that suggestion. This version is different from the earlier versions as it keeps the variables in the timekeeper structure rather then in the tkr. Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 6f8276a..e1f5a11 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -61,6 +61,9 @@ struct tk_read_base { * shifted nano seconds. * @ntp_error_shift: Shift conversion between clock shifted nano seconds and * ntp shifted nano seconds. + * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) + * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) + * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffie times) @@ -106,6 +109,18 @@ struct timekeeper { s64 ntp_error; u32 ntp_error_shift; u32 ntp_err_mult; +#ifdef CONFIG_DEBUG_TIMEKEEPING + long last_warning; + /* + * These simple flag variables are managed + * without locks, which is racy, but they are + * ok since we don't really care about being + * super precise about how many events were + * seen, just that a problem was observed. + */ + int underflow_seen; + int overflow_seen; +#endif }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL -- cgit v1.1 From 4e413e8526aa53393d0b3d9ecbdb0436203586ee Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Thu, 7 May 2015 16:20:34 -0700 Subject: tracing: timer: Add deferrable flag to timer_start The timer_start event now shows whether the timer is deferrable in case of a low-res timer. The debug_activate function now includes a deferrable flag while calling the trace_timer_start event. Cc: Thomas Gleixner Cc: Ingo Molnar Acked-by: Steven Rostedt Signed-off-by: Badhri Jagan Sridharan [jstultz: Fixed minor whitespace and grammer tweaks pointed out by Ingo] Signed-off-by: John Stultz --- include/trace/events/timer.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 68c2c20..d7abef1 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -43,15 +43,18 @@ DEFINE_EVENT(timer_class, timer_init, */ TRACE_EVENT(timer_start, - TP_PROTO(struct timer_list *timer, unsigned long expires), + TP_PROTO(struct timer_list *timer, + unsigned long expires, + unsigned int deferrable), - TP_ARGS(timer, expires), + TP_ARGS(timer, expires, deferrable), TP_STRUCT__entry( __field( void *, timer ) __field( void *, function ) __field( unsigned long, expires ) __field( unsigned long, now ) + __field( unsigned int, deferrable ) ), TP_fast_assign( @@ -59,11 +62,13 @@ TRACE_EVENT(timer_start, __entry->function = timer->function; __entry->expires = expires; __entry->now = jiffies; + __entry->deferrable = deferrable; ), - TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", + TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] defer=%c", __entry->timer, __entry->function, __entry->expires, - (long)__entry->expires - __entry->now) + (long)__entry->expires - __entry->now, + __entry->deferrable > 0 ? 'y':'n') ); /** -- cgit v1.1 From 30f3b3f9836c7c7e1f42ca855bbe8127fff4b99a Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Thu, 9 Apr 2015 09:04:40 +0800 Subject: time: Include math64.h in time64.h On 32-bit systems, timespec64_add_ns() calls __iter_div_u64_rem() which needs math64.h, and we want to include time64.h in some cases. Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Xunlei Pang Signed-off-by: John Stultz --- include/linux/time64.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/time64.h b/include/linux/time64.h index a383147..12d4e82 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -2,6 +2,7 @@ #define _LINUX_TIME64_H #include +#include typedef __s64 time64_t; -- cgit v1.1 From e83d0a4106d81dd08b70318f078f3bad6acdc110 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Thu, 9 Apr 2015 09:04:42 +0800 Subject: time: Remove read_boot_clock() Now that we have a read_boot_clock64() function available on every architecture, and converted all the users to it, it's time to remove the (now unused) read_boot_clock() completely from the kernel. Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Xunlei Pang [jstultz: Minor commit message tweak suggested by Ingo] Signed-off-by: John Stultz --- include/linux/timekeeping.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 9af5c12..3aa72e6 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -267,7 +267,6 @@ extern int persistent_clock_is_local; extern void read_persistent_clock(struct timespec *ts); extern void read_persistent_clock64(struct timespec64 *ts); -extern void read_boot_clock(struct timespec *ts); extern void read_boot_clock64(struct timespec64 *ts); extern int update_persistent_clock(struct timespec now); extern int update_persistent_clock64(struct timespec64 now); -- cgit v1.1 From 3434d23b694e5cb6e44e966914563406c31c4053 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 21 May 2015 13:33:45 +0530 Subject: clockevents: Add helpers to check the state of a clockevent device Some clockevent drivers, once migrated to use per-state callbacks, need to check the state of the clockevent device in their callbacks or interrupt handler. Add accessor functions clockevent_state_*() to get this information. Signed-off-by: Viresh Kumar Cc: linaro-kernel@lists.linaro.org Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/04a717d490335c688dd7af899fbcede97e1bb8ee.1432192527.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 271fa4c..64214ad 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -149,6 +149,32 @@ struct clock_event_device { struct module *owner; } ____cacheline_aligned; +/* Helpers to verify state of a clockevent device */ +static inline bool clockevent_state_detached(struct clock_event_device *dev) +{ + return dev->state == CLOCK_EVT_STATE_DETACHED; +} + +static inline bool clockevent_state_shutdown(struct clock_event_device *dev) +{ + return dev->state == CLOCK_EVT_STATE_SHUTDOWN; +} + +static inline bool clockevent_state_periodic(struct clock_event_device *dev) +{ + return dev->state == CLOCK_EVT_STATE_PERIODIC; +} + +static inline bool clockevent_state_oneshot(struct clock_event_device *dev) +{ + return dev->state == CLOCK_EVT_STATE_ONESHOT; +} + +static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev) +{ + return dev->state == CLOCK_EVT_STATE_ONESHOT_STOPPED; +} + /* * Calculate a multiplication factor for scaled math, which is used to convert * nanoseconds based values to clock ticks: -- cgit v1.1 From be3ef76e9d9b97962c70bd6351787d29071ae481 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 2 Jun 2015 14:30:11 +0200 Subject: clockevents: Rename state to state_use_accessors The only sensible way to make abuse of core internal fields obvious and easy to grep for. Signed-off-by: Thomas Gleixner Cc: Viresh Kumar Cc: Peter Zijlstra --- include/linux/clockchips.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 64214ad..597a1e8 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -87,7 +87,7 @@ enum clock_event_state { * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE - * @state: current state of the device, assigned by the core code + * @state_use_accessors:current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. @@ -117,7 +117,7 @@ struct clock_event_device { u32 mult; u32 shift; enum clock_event_mode mode; - enum clock_event_state state; + enum clock_event_state state_use_accessors; unsigned int features; unsigned long retries; @@ -152,27 +152,27 @@ struct clock_event_device { /* Helpers to verify state of a clockevent device */ static inline bool clockevent_state_detached(struct clock_event_device *dev) { - return dev->state == CLOCK_EVT_STATE_DETACHED; + return dev->state_use_accessors == CLOCK_EVT_STATE_DETACHED; } static inline bool clockevent_state_shutdown(struct clock_event_device *dev) { - return dev->state == CLOCK_EVT_STATE_SHUTDOWN; + return dev->state_use_accessors == CLOCK_EVT_STATE_SHUTDOWN; } static inline bool clockevent_state_periodic(struct clock_event_device *dev) { - return dev->state == CLOCK_EVT_STATE_PERIODIC; + return dev->state_use_accessors == CLOCK_EVT_STATE_PERIODIC; } static inline bool clockevent_state_oneshot(struct clock_event_device *dev) { - return dev->state == CLOCK_EVT_STATE_ONESHOT; + return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT; } static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev) { - return dev->state == CLOCK_EVT_STATE_ONESHOT_STOPPED; + return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT_STOPPED; } /* -- cgit v1.1 From d711b8b30c803b1b2aedf6a3474758798078f9e1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 6 Jun 2015 11:30:00 +0200 Subject: hrtimers: Make sure hrtimer_resolution is unsigned int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... in the !CONFIG_HIGH_RES_TIMERS case too. And thus fix warnings like this one: net/sched/sch_api.c: In function ‘psched_show’: net/sched/sch_api.c:1891:6: warning: format ‘%x’ expects argument of type ‘unsigned int’, but argument 6 has type ‘long int’ [-Wformat=] (u32)NSEC_PER_SEC / hrtimer_resolution); Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1433583000-32090-1-git-send-email-bp@alien8.de Signed-off-by: Thomas Gleixner Cc: Thomas Gleixner --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 470d876..3f82a7e 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -309,7 +309,7 @@ extern unsigned int hrtimer_resolution; # define MONOTONIC_RES_NSEC LOW_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_LOW_RES -#define hrtimer_resolution LOW_RES_NSEC +#define hrtimer_resolution (unsigned int)LOW_RES_NSEC static inline void hrtimer_peek_ahead_timers(void) { } -- cgit v1.1 From ae60d6a0e3a9197d37f8c8c4584a8ecd18518cd6 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Thu, 28 May 2015 19:09:55 +0200 Subject: time: Refactor usecs_to_jiffies Refactor the usecs_to_jiffies conditional code part in time.c and jiffies.h putting it into conditional functions rather than #ifdefs to improve readability. This is analogous to the msecs_to_jiffies() cleanup in commit ca42aaf0c861 ("time: Refactor msecs_to_jiffies") Signed-off-by: Nicholas Mc Guire Cc: Masahiro Yamada Cc: Sam Ravnborg Cc: Joe Perches Cc: John Stultz Cc: Andrew Hunter Cc: Paul Turner Cc: Michal Marek Link: http://lkml.kernel.org/r/1432832996-12129-1-git-send-email-hofrat@osadl.org Signed-off-by: Thomas Gleixner --- include/linux/jiffies.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 3bde5eb..a316ebe 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -362,7 +362,32 @@ static inline unsigned long msecs_to_jiffies(const unsigned int m) } } -extern unsigned long usecs_to_jiffies(const unsigned int u); +extern unsigned long __usecs_to_jiffies(const unsigned int u); +#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ + return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ); +} +#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ + return u * (HZ / USEC_PER_SEC); +} +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ +#else +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ + return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32) + >> USEC_TO_HZ_SHR32; +} +#endif + +static inline unsigned long usecs_to_jiffies(const unsigned int u) +{ + return __usecs_to_jiffies(u); +} + extern unsigned long timespec_to_jiffies(const struct timespec *value); extern void jiffies_to_timespec(const unsigned long jiffies, struct timespec *value); -- cgit v1.1 From c569a23d65ac2900d9998d3fe04044fe95be6b2f Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Thu, 28 May 2015 19:09:56 +0200 Subject: time: Allow gcc to fold usecs_to_jiffies(constant) To allow constant folding in usecs_to_jiffies() conditionally calls the HZ dependent _usecs_to_jiffies() helpers or, when gcc can not figure out constant folding, __usecs_to_jiffies, which is the renamed original usecs_to_jiffies() function. Signed-off-by: Nicholas Mc Guire Cc: Masahiro Yamada Cc: Sam Ravnborg Cc: Joe Perches Cc: John Stultz Cc: Andrew Hunter Cc: Paul Turner Cc: Michal Marek Link: http://lkml.kernel.org/r/1432832996-12129-2-git-send-email-hofrat@osadl.org Signed-off-by: Thomas Gleixner --- include/linux/jiffies.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index a316ebe..535fd3b 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -383,9 +383,37 @@ static inline unsigned long _usecs_to_jiffies(const unsigned int u) } #endif +/** + * usecs_to_jiffies: - convert microseconds to jiffies + * @u: time in microseconds + * + * conversion is done as follows: + * + * - 'too large' values [that would result in larger than + * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. + * + * - all other values are converted to jiffies by either multiplying + * the input value by a factor or dividing it with a factor and + * handling any 32-bit overflows as for msecs_to_jiffies. + * + * usecs_to_jiffies() checks for the passed in value being a constant + * via __builtin_constant_p() allowing gcc to eliminate most of the + * code, __usecs_to_jiffies() is called if the value passed does not + * allow constant folding and the actual conversion must be done at + * runtime. + * the HZ range specific helpers _usecs_to_jiffies() are called both + * directly here and from __msecs_to_jiffies() in the case where + * constant folding is not possible. + */ static inline unsigned long usecs_to_jiffies(const unsigned int u) { - return __usecs_to_jiffies(u); + if (__builtin_constant_p(u)) { + if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + return _usecs_to_jiffies(u); + } else { + return __usecs_to_jiffies(u); + } } extern unsigned long timespec_to_jiffies(const struct timespec *value); -- cgit v1.1 From 833f32d763028c1bb371c64f457788b933773b3e Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 11 Jun 2015 15:54:55 -0700 Subject: time: Prevent early expiry of hrtimers[CLOCK_REALTIME] at the leap second edge Currently, leapsecond adjustments are done at tick time. As a result, the leapsecond was applied at the first timer tick *after* the leapsecond (~1-10ms late depending on HZ), rather then exactly on the second edge. This was in part historical from back when we were always tick based, but correcting this since has been avoided since it adds extra conditional checks in the gettime fastpath, which has performance overhead. However, it was recently pointed out that ABS_TIME CLOCK_REALTIME timers set for right after the leapsecond could fire a second early, since some timers may be expired before we trigger the timekeeping timer, which then applies the leapsecond. This isn't quite as bad as it sounds, since behaviorally it is similar to what is possible w/ ntpd made leapsecond adjustments done w/o using the kernel discipline. Where due to latencies, timers may fire just prior to the settimeofday call. (Also, one should note that all applications using CLOCK_REALTIME timers should always be careful, since they are prone to quirks from settimeofday() disturbances.) However, the purpose of having the kernel do the leap adjustment is to avoid such latencies, so I think this is worth fixing. So in order to properly keep those timers from firing a second early, this patch modifies the ntp and timekeeping logic so that we keep enough state so that the update_base_offsets_now accessor, which provides the hrtimer core the current time, can check and apply the leapsecond adjustment on the second edge. This prevents the hrtimer core from expiring timers too early. This patch does not modify any other time read path, so no additional overhead is incurred. However, this also means that the leap-second continues to be applied at tick time for all other read-paths. Apologies to Richard Cochran, who pushed for similar changes years ago, which I resisted due to the concerns about the performance overhead. While I suspect this isn't extremely critical, folks who care about strict leap-second correctness will likely want to watch this. Potentially a -stable candidate eventually. Originally-suggested-by: Richard Cochran Reported-by: Daniel Bristot de Oliveira Reported-by: Prarit Bhargava Signed-off-by: John Stultz Cc: Richard Cochran Cc: Jan Kara Cc: Jiri Bohac Cc: Shuah Khan Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1434063297-28657-4-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/time64.h | 1 + include/linux/timekeeper_internal.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/time64.h b/include/linux/time64.h index 12d4e82..77b5df2 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -29,6 +29,7 @@ struct timespec64 { #define FSEC_PER_SEC 1000000000000000LL /* Located here for timespec[64]_valid_strict */ +#define TIME64_MAX ((s64)~((u64)1 << 63)) #define KTIME_MAX ((s64)~((u64)1 << 63)) #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index e1f5a11..2524722 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -50,6 +50,7 @@ struct tk_read_base { * @offs_tai: Offset clock monotonic -> clock tai * @tai_offset: The current UTC to TAI offset in seconds * @clock_was_set_seq: The sequence number of clock was set events + * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second * @raw_time: Monotonic raw base time in timespec64 format * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP @@ -90,6 +91,7 @@ struct timekeeper { ktime_t offs_tai; s32 tai_offset; unsigned int clock_was_set_seq; + ktime_t next_leap_ktime; struct timespec64 raw_time; /* The following members are for timekeeping internal use */ -- cgit v1.1 From c04dca02bc73096435a5c36efd5ccb2171edcbe1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 11 Jun 2015 14:46:44 +0200 Subject: hrtimer: Remove HRTIMER_STATE_MIGRATE I do not understand HRTIMER_STATE_MIGRATE. Unless I am totally confused it looks buggy and simply unneeded. migrate_hrtimer_list() sets it to keep hrtimer_active() == T, but this is not enough: this can fool, say, hrtimer_is_queued() in dequeue_signal(). Can't migrate_hrtimer_list() simply use HRTIMER_STATE_ENQUEUED? This fixes the race and we can kill STATE_MIGRATE. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: ktkhai@parallels.com Cc: rostedt@goodmis.org Cc: juri.lelli@gmail.com Cc: pang.xunlei@linaro.org Cc: wanpeng.li@linux.intel.com Cc: umgwanakikbuti@gmail.com Link: http://lkml.kernel.org/r/20150611124743.072387650@infradead.org Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 3f82a7e..2f9e57d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -70,17 +70,13 @@ enum hrtimer_restart { * the handling of the timer. * * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state - * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This - * also affects HRTIMER_STATE_MIGRATE where the preservation is not - * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is - * enqueued on the new cpu. + * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. * * All state transitions are protected by cpu_base->lock. */ #define HRTIMER_STATE_INACTIVE 0x00 #define HRTIMER_STATE_ENQUEUED 0x01 #define HRTIMER_STATE_CALLBACK 0x02 -#define HRTIMER_STATE_MIGRATE 0x04 /** * struct hrtimer - the basic hrtimer structure -- cgit v1.1 From a7c6f571ff51cc77d90dd54968f7c5c938c43998 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2015 14:46:46 +0200 Subject: seqcount: Rename write_seqcount_barrier() I'll shortly be introducing another seqcount primitive that's useful to provide ordering semantics and would like to use the write_seqcount_barrier() name for that. Seeing how there's only one user of the current primitive, lets rename it to invalidate, as that appears what its doing. While there, employ lockdep_assert_held() instead of assert_spin_locked() to not generate debug code for regular kernels. Signed-off-by: Peter Zijlstra (Intel) Cc: ktkhai@parallels.com Cc: rostedt@goodmis.org Cc: juri.lelli@gmail.com Cc: pang.xunlei@linaro.org Cc: Oleg Nesterov Cc: wanpeng.li@linux.intel.com Cc: Paul McKenney Cc: Al Viro Cc: Linus Torvalds Cc: umgwanakikbuti@gmail.com Link: http://lkml.kernel.org/r/20150611124743.279926217@infradead.org Signed-off-by: Thomas Gleixner --- include/linux/seqlock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 5f68d0a..c07e3a5 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -266,13 +266,13 @@ static inline void write_seqcount_end(seqcount_t *s) } /** - * write_seqcount_barrier - invalidate in-progress read-side seq operations + * write_seqcount_invalidate - invalidate in-progress read-side seq operations * @s: pointer to seqcount_t * - * After write_seqcount_barrier, no read-side seq operations will complete + * After write_seqcount_invalidate, no read-side seq operations will complete * successfully and see data older than this. */ -static inline void write_seqcount_barrier(seqcount_t *s) +static inline void write_seqcount_invalidate(seqcount_t *s) { smp_wmb(); s->sequence+=2; -- cgit v1.1 From c4bfa3f5f906aee2e084c5b1fb15caf876338ef8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Jun 2015 14:29:24 +0200 Subject: seqcount: Introduce raw_write_seqcount_barrier() Introduce raw_write_seqcount_barrier(), a new construct that can be used to provide write barrier semantics in seqcount read loops instead of the usual consistency guarantee. raw_write_seqcount_barier() is equivalent to: raw_write_seqcount_begin(); raw_write_seqcount_end(); But avoids issueing two back-to-back smp_wmb() instructions. This construct works because the read side will 'stall' when observing odd values. This means that -- referring to the example in the comment below -- even though there is no (matching) read barrier between the loads of X and Y, we cannot observe !x && !y, because: - if we observe Y == false we must observe the first sequence increment, which makes us loop, until - we observe !(seq & 1) -- the second sequence increment -- at which time we must also observe T == true. Suggested-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: umgwanakikbuti@gmail.com Cc: ktkhai@parallels.com Cc: rostedt@goodmis.org Cc: juri.lelli@gmail.com Cc: pang.xunlei@linaro.org Cc: oleg@redhat.com Cc: wanpeng.li@linux.intel.com Cc: Al Viro Cc: Linus Torvalds Cc: Paul E. McKenney Link: http://lkml.kernel.org/r/20150617122924.GP3644@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner --- include/linux/seqlock.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index c07e3a5..486e685 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -233,6 +233,47 @@ static inline void raw_write_seqcount_end(seqcount_t *s) s->sequence++; } +/** + * raw_write_seqcount_barrier - do a seq write barrier + * @s: pointer to seqcount_t + * + * This can be used to provide an ordering guarantee instead of the + * usual consistency guarantee. It is one wmb cheaper, because we can + * collapse the two back-to-back wmb()s. + * + * seqcount_t seq; + * bool X = true, Y = false; + * + * void read(void) + * { + * bool x, y; + * + * do { + * int s = read_seqcount_begin(&seq); + * + * x = X; y = Y; + * + * } while (read_seqcount_retry(&seq, s)); + * + * BUG_ON(!x && !y); + * } + * + * void write(void) + * { + * Y = true; + * + * raw_write_seqcount_barrier(seq); + * + * X = false; + * } + */ +static inline void raw_write_seqcount_barrier(seqcount_t *s) +{ + s->sequence++; + smp_wmb(); + s->sequence++; +} + /* * raw_write_seqcount_latch - redirect readers to even/odd copy * @s: pointer to seqcount_t -- cgit v1.1 From 887d9dc989eb0154492e41e7c07492edbb088ba1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2015 14:46:48 +0200 Subject: hrtimer: Allow hrtimer::function() to free the timer Currently an hrtimer callback function cannot free its own timer because __run_hrtimer() still needs to clear HRTIMER_STATE_CALLBACK after it. Freeing the timer would result in a clear use-after-free. Solve this by using a scheme similar to regular timers; track the current running timer in hrtimer_clock_base::running. Suggested-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: ktkhai@parallels.com Cc: rostedt@goodmis.org Cc: juri.lelli@gmail.com Cc: pang.xunlei@linaro.org Cc: wanpeng.li@linux.intel.com Cc: Al Viro Cc: Linus Torvalds Cc: Paul McKenney Cc: Oleg Nesterov Cc: umgwanakikbuti@gmail.com Link: http://lkml.kernel.org/r/20150611124743.471563047@infradead.org Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2f9e57d..5db0558 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -53,30 +53,25 @@ enum hrtimer_restart { * * 0x00 inactive * 0x01 enqueued into rbtree - * 0x02 callback function running - * 0x04 timer is migrated to another cpu * - * Special cases: - * 0x03 callback function running and enqueued - * (was requeued on another CPU) - * 0x05 timer was migrated on CPU hotunplug + * The callback state is not part of the timer->state because clearing it would + * mean touching the timer after the callback, this makes it impossible to free + * the timer from the callback function. * - * The "callback function running and enqueued" status is only possible on - * SMP. It happens for example when a posix timer expired and the callback + * Therefore we track the callback state in: + * + * timer->base->cpu_base->running == timer + * + * On SMP it is possible to have a "callback function running and enqueued" + * status. It happens for example when a posix timer expired and the callback * queued a signal. Between dropping the lock which protects the posix timer * and reacquiring the base lock of the hrtimer, another CPU can deliver the - * signal and rearm the timer. We have to preserve the callback running state, - * as otherwise the timer could be removed before the softirq code finishes the - * the handling of the timer. - * - * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state - * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. + * signal and rearm the timer. * * All state transitions are protected by cpu_base->lock. */ #define HRTIMER_STATE_INACTIVE 0x00 #define HRTIMER_STATE_ENQUEUED 0x01 -#define HRTIMER_STATE_CALLBACK 0x02 /** * struct hrtimer - the basic hrtimer structure @@ -163,6 +158,8 @@ enum hrtimer_base_type { * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers + * @seq: seqcount around __run_hrtimer + * @running: pointer to the currently running hrtimer * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events @@ -184,6 +181,8 @@ enum hrtimer_base_type { */ struct hrtimer_cpu_base { raw_spinlock_t lock; + seqcount_t seq; + struct hrtimer *running; unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; @@ -391,15 +390,7 @@ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); extern u64 hrtimer_get_next_event(void); -/* - * A timer is active, when it is enqueued into the rbtree or the - * callback function is running or it's in the state of being migrated - * to another cpu. - */ -static inline int hrtimer_active(const struct hrtimer *timer) -{ - return timer->state != HRTIMER_STATE_INACTIVE; -} +extern bool hrtimer_active(const struct hrtimer *timer); /* * Helper function to check, whether the timer is on one of the queues @@ -415,7 +406,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) */ static inline int hrtimer_callback_running(struct hrtimer *timer) { - return timer->state & HRTIMER_STATE_CALLBACK; + return timer->base->cpu_base->running == timer; } /* Forward a hrtimer so it expires after now: */ -- cgit v1.1 From 1dabbcec2c0a36fe43509d06499b9e512e70a028 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 May 2015 22:50:28 +0000 Subject: timer: Use hlist for the timer wheel hash buckets This reduces the size of struct tvec_base by 50% and results in slightly smaller code as well. Before: struct tvec_base: size: 8256, cachelines: 129 text data bss dec hex filename 17698 13297 8256 39251 9953 ../build/kernel/time/timer.o After: struct tvec_base: 4160, cachelines: 65 text data bss dec hex filename 17491 9201 4160 30852 7884 ../build/kernel/time/timer.o Signed-off-by: Thomas Gleixner Reviewed-by: Viresh Kumar Cc: Peter Zijlstra Cc: Paul McKenney Cc: Frederic Weisbecker Cc: Eric Dumazet Cc: John Stultz Cc: Joonwoo Park Cc: Wenbo Wang Link: http://lkml.kernel.org/r/20150526224511.854731214@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index fbb80e0..064ee24 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -14,7 +14,7 @@ struct timer_list { * All fields that change during normal runtime grouped to the * same cacheline */ - struct list_head entry; + struct hlist_node entry; unsigned long expires; struct tvec_base *base; @@ -71,7 +71,7 @@ extern struct tvec_base boot_tvec_bases; #define TIMER_FLAG_MASK 0x3LU #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ - .entry = { .prev = TIMER_ENTRY_STATIC }, \ + .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .expires = (_expires), \ .data = (_data), \ @@ -168,7 +168,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, */ static inline int timer_pending(const struct timer_list * timer) { - return timer->entry.next != NULL; + return timer->entry.pprev != NULL; } extern void add_timer_on(struct timer_list *timer, int cpu); -- cgit v1.1 From 0eeda71bc30d74f66f8231f45621d5ace3419186 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 May 2015 22:50:29 +0000 Subject: timer: Replace timer base by a cpu index Instead of storing a pointer to the per cpu tvec_base we can simply cache a CPU index in the timer_list and use that to get hold of the correct per cpu tvec_base. This is only used in lock_timer_base() and the slightly larger code is peanuts versus the spinlock operation and the d-cache foot print of the timer wheel. Aside of that this allows to get rid of following nuisances: - boot_tvec_base That statically allocated 4k bss data is just kept around so the timer has a home when it gets statically initialized. It serves no other purpose. With the CPU index we assign the timer to CPU0 at static initialization time and therefor can avoid the whole boot_tvec_base dance. That also simplifies the init code, which just can use the per cpu base. Before: text data bss dec hex filename 17491 9201 4160 30852 7884 ../build/kernel/time/timer.o After: text data bss dec hex filename 17440 9193 0 26633 6809 ../build/kernel/time/timer.o - Overloading the base pointer with various flags The CPU index has enough space to hold the flags (deferrable, irqsafe) so we can get rid of the extra masking and bit fiddling with the base pointer. As a benefit we reduce the size of struct timer_list on 64 bit machines. 4 - 8 bytes, a size reduction up to 15% per struct timer_list, which is a real win as we have tons of them embedded in other structs. This changes also the newly added deferrable printout of the timer start trace point to capture and print all timer->flags, which allows us to decode the target cpu of the timer as well. We might have used bitfields for this, but that would change the static initializers and the init function for no value to accomodate big endian bitfields. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul McKenney Cc: Frederic Weisbecker Cc: Eric Dumazet Cc: Viresh Kumar Cc: John Stultz Cc: Joonwoo Park Cc: Wenbo Wang Cc: Steven Rostedt Cc: Badhri Jagan Sridharan Link: http://lkml.kernel.org/r/20150526224511.950084301@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 38 ++++++++++++++++---------------------- include/trace/events/timer.h | 13 ++++++------- 2 files changed, 22 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 064ee24..4a0d52b 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -14,27 +14,23 @@ struct timer_list { * All fields that change during normal runtime grouped to the * same cacheline */ - struct hlist_node entry; - unsigned long expires; - struct tvec_base *base; - - void (*function)(unsigned long); - unsigned long data; - - int slack; + struct hlist_node entry; + unsigned long expires; + void (*function)(unsigned long); + unsigned long data; + u32 flags; + int slack; #ifdef CONFIG_TIMER_STATS - int start_pid; - void *start_site; - char start_comm[16]; + int start_pid; + void *start_site; + char start_comm[16]; #endif #ifdef CONFIG_LOCKDEP - struct lockdep_map lockdep_map; + struct lockdep_map lockdep_map; #endif }; -extern struct tvec_base boot_tvec_bases; - #ifdef CONFIG_LOCKDEP /* * NB: because we have to copy the lockdep_map, setting the lockdep_map key @@ -49,9 +45,6 @@ extern struct tvec_base boot_tvec_bases; #endif /* - * Note that all tvec_bases are at least 4 byte aligned and lower two bits - * of base in timer_list is guaranteed to be zero. Use them for flags. - * * A deferrable timer will work normally when the system is busy, but * will not cause a CPU to come out of idle just to service it; instead, * the timer will be serviced when the CPU eventually wakes up with a @@ -65,17 +58,18 @@ extern struct tvec_base boot_tvec_bases; * workqueue locking issues. It's not meant for executing random crap * with interrupts disabled. Abuse is monitored! */ -#define TIMER_DEFERRABLE 0x1LU -#define TIMER_IRQSAFE 0x2LU - -#define TIMER_FLAG_MASK 0x3LU +#define TIMER_CPUMASK 0x0007FFFF +#define TIMER_MIGRATING 0x00080000 +#define TIMER_BASEMASK (TIMER_CPUMASK | TIMER_MIGRATING) +#define TIMER_DEFERRABLE 0x00100000 +#define TIMER_IRQSAFE 0x00200000 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .expires = (_expires), \ .data = (_data), \ - .base = (void *)((unsigned long)&boot_tvec_bases + (_flags)), \ + .flags = (_flags), \ .slack = -1, \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ __FILE__ ":" __stringify(__LINE__)) \ diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index d7abef1..073b9ac 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -45,16 +45,16 @@ TRACE_EVENT(timer_start, TP_PROTO(struct timer_list *timer, unsigned long expires, - unsigned int deferrable), + unsigned int flags), - TP_ARGS(timer, expires, deferrable), + TP_ARGS(timer, expires, flags), TP_STRUCT__entry( __field( void *, timer ) __field( void *, function ) __field( unsigned long, expires ) __field( unsigned long, now ) - __field( unsigned int, deferrable ) + __field( unsigned int, flags ) ), TP_fast_assign( @@ -62,13 +62,12 @@ TRACE_EVENT(timer_start, __entry->function = timer->function; __entry->expires = expires; __entry->now = jiffies; - __entry->deferrable = deferrable; + __entry->flags = flags; ), - TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] defer=%c", + TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] flags=0x%08x", __entry->timer, __entry->function, __entry->expires, - (long)__entry->expires - __entry->now, - __entry->deferrable > 0 ? 'y':'n') + (long)__entry->expires - __entry->now, __entry->flags) ); /** -- cgit v1.1 From c74441a17eb975b604e339ca6c11b9ab9aaca11f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 May 2015 22:50:31 +0000 Subject: timer: Stats: Simplify the flags handling Simplify the handling of the flag storage for the timer statistics. No intermediate storage anymore. Just hand over the flags field. I left the printout of 'deferrable' for now because changing this would be an ABI update and I have no idea how strong people feel about that. OTOH, I wonder whether we should kill the whole timer stats stuff because all of that information can be retrieved via ftrace/perf as well. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul McKenney Cc: Frederic Weisbecker Cc: Eric Dumazet Cc: Viresh Kumar Cc: John Stultz Cc: Joonwoo Park Cc: Wenbo Wang Link: http://lkml.kernel.org/r/20150526224512.046626248@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 4a0d52b..ff0689b 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -188,13 +188,10 @@ extern void set_timer_slack(struct timer_list *time, int slack_hz); extern int timer_stats_active; -#define TIMER_STATS_FLAG_DEFERRABLE 0x1 - extern void init_timer_stats(void); extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char *comm, - unsigned int timer_flag); + void *timerf, char *comm, u32 flags); extern void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr); -- cgit v1.1 From bc7a34b8b9ebfb0f4b8a35a72a0b134fd6c5ef50 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 May 2015 22:50:33 +0000 Subject: timer: Reduce timer migration overhead if disabled Eric reported that the timer_migration sysctl is not really nice performance wise as it needs to check at every timer insertion whether the feature is enabled or not. Further the check does not live in the timer code, so we have an extra function call which checks an extra cache line to figure out that it is disabled. We can do better and store that information in the per cpu (hr)timer bases. I pondered to use a static key, but that's a nightmare to update from the nohz code and the timer base cache line is hot anyway when we select a timer base. The old logic enabled the timer migration unconditionally if CONFIG_NO_HZ was set even if nohz was disabled on the kernel command line. With this modification, we start off with migration disabled. The user visible sysctl is still set to enabled. If the kernel switches to NOHZ migration is enabled, if the user did not disable it via the sysctl prior to the switch. If nohz=off is on the kernel command line, migration stays disabled no matter what. Before: 47.76% hog [.] main 14.84% [kernel] [k] _raw_spin_lock_irqsave 9.55% [kernel] [k] _raw_spin_unlock_irqrestore 6.71% [kernel] [k] mod_timer 6.24% [kernel] [k] lock_timer_base.isra.38 3.76% [kernel] [k] detach_if_pending 3.71% [kernel] [k] del_timer 2.50% [kernel] [k] internal_add_timer 1.51% [kernel] [k] get_nohz_timer_target 1.28% [kernel] [k] __internal_add_timer 0.78% [kernel] [k] timerfn 0.48% [kernel] [k] wake_up_nohz_cpu After: 48.10% hog [.] main 15.25% [kernel] [k] _raw_spin_lock_irqsave 9.76% [kernel] [k] _raw_spin_unlock_irqrestore 6.50% [kernel] [k] mod_timer 6.44% [kernel] [k] lock_timer_base.isra.38 3.87% [kernel] [k] detach_if_pending 3.80% [kernel] [k] del_timer 2.67% [kernel] [k] internal_add_timer 1.33% [kernel] [k] __internal_add_timer 0.73% [kernel] [k] timerfn 0.54% [kernel] [k] wake_up_nohz_cpu Reported-by: Eric Dumazet Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul McKenney Cc: Frederic Weisbecker Cc: Viresh Kumar Cc: John Stultz Cc: Joonwoo Park Cc: Wenbo Wang Link: http://lkml.kernel.org/r/20150526224512.127050787@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 ++ include/linux/sched.h | 6 +----- include/linux/sched/sysctl.h | 12 ------------ include/linux/timer.h | 9 +++++++++ 4 files changed, 12 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5db0558..6955102 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -163,6 +163,7 @@ enum hrtimer_base_type { * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events + * @migration_enabled: The migration of hrtimers to other cpus is enabled * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @next_timer: Pointer to the first expiring timer @@ -186,6 +187,7 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; + bool migration_enabled; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hres_active : 1, diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e61..d715146 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -335,14 +335,10 @@ extern int runqueue_is_locked(int cpu); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); -extern int get_nohz_timer_target(int pinned); +extern int get_nohz_timer_target(void); #else static inline void nohz_balance_enter_idle(int cpu) { } static inline void set_cpu_sd_state_idle(void) { } -static inline int get_nohz_timer_target(int pinned) -{ - return smp_processor_id(); -} #endif /* diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 596a0e0..c9e4731 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -57,24 +57,12 @@ extern unsigned int sysctl_numa_balancing_scan_size; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_time_avg; -extern unsigned int sysctl_timer_migration; extern unsigned int sysctl_sched_shares_window; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif -#ifdef CONFIG_SCHED_DEBUG -static inline unsigned int get_sysctl_timer_migration(void) -{ - return sysctl_timer_migration; -} -#else -static inline unsigned int get_sysctl_timer_migration(void) -{ - return 1; -} -#endif /* * control realtime throttling: diff --git a/include/linux/timer.h b/include/linux/timer.h index ff0689b..61aa61d 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -238,6 +238,15 @@ extern void run_local_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +#include + +extern unsigned int sysctl_timer_migration; +int timer_migration_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +#endif + unsigned long __round_jiffies(unsigned long j, int cpu); unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); -- cgit v1.1 From 683be13a284720205228e29207ef11a1c3c322b9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 May 2015 22:50:35 +0000 Subject: timer: Minimize nohz off overhead If nohz is disabled on the kernel command line the [hr]timer code still calls wake_up_nohz_cpu() and tick_nohz_full_cpu(), a pretty pointless exercise. Cache nohz_active in [hr]timer per cpu bases and avoid the overhead. Before: 48.10% hog [.] main 15.25% [kernel] [k] _raw_spin_lock_irqsave 9.76% [kernel] [k] _raw_spin_unlock_irqrestore 6.50% [kernel] [k] mod_timer 6.44% [kernel] [k] lock_timer_base.isra.38 3.87% [kernel] [k] detach_if_pending 3.80% [kernel] [k] del_timer 2.67% [kernel] [k] internal_add_timer 1.33% [kernel] [k] __internal_add_timer 0.73% [kernel] [k] timerfn 0.54% [kernel] [k] wake_up_nohz_cpu After: 48.73% hog [.] main 15.36% [kernel] [k] _raw_spin_lock_irqsave 9.77% [kernel] [k] _raw_spin_unlock_irqrestore 6.61% [kernel] [k] lock_timer_base.isra.38 6.42% [kernel] [k] mod_timer 3.90% [kernel] [k] detach_if_pending 3.76% [kernel] [k] del_timer 2.41% [kernel] [k] internal_add_timer 1.39% [kernel] [k] __internal_add_timer 0.76% [kernel] [k] timerfn We probably should have a cached value for nohz full in the per cpu bases as well to avoid the cpumask check. The base cache line is hot already, the cpumask not necessarily. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul McKenney Cc: Frederic Weisbecker Cc: Eric Dumazet Cc: Viresh Kumar Cc: John Stultz Cc: Joonwoo Park Cc: Wenbo Wang Link: http://lkml.kernel.org/r/20150526224512.207378134@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6955102..76dd4f0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -164,6 +164,7 @@ enum hrtimer_base_type { * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events * @migration_enabled: The migration of hrtimers to other cpus is enabled + * @nohz_active: The nohz functionality is enabled * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @next_timer: Pointer to the first expiring timer @@ -188,6 +189,7 @@ struct hrtimer_cpu_base { unsigned int active_bases; unsigned int clock_was_set_seq; bool migration_enabled; + bool nohz_active; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hres_active : 1, -- cgit v1.1