From 29f1b2b0fecfae69e31833836f1da3136696eee5 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 28 Dec 2017 22:11:36 -0500 Subject: posix-timers: Prevent UB from shifting negative signed value Shifting a negative signed number is undefined behavior. Looking at the macros MAKE_PROCESS_CPUCLOCK and FD_TO_CLOCKID, it seems that the subexpression: (~(clockid_t) (pid) << 3) where clockid_t resolves to a signed int, which once negated, is undefined behavior to shift the value of if the results thus far are negative. It was further suggested to make these macros into inline functions. Suggested-by: Thomas Gleixner Signed-off-by: Nick Desaulniers Signed-off-by: Thomas Gleixner Cc: Dimitri Sivanich Cc: Frederic Weisbecker Cc: Al Viro Cc: linux-kselftest@vger.kernel.org Cc: Shuah Khan Cc: Deepa Dinamani Link: https://lkml.kernel.org/r/1514517100-18051-1-git-send-email-nick.desaulniers@gmail.com --- include/linux/posix-timers.h | 25 +++++++++++++++++++------ kernel/time/posix-clock.c | 2 +- kernel/time/posix-cpu-timers.c | 4 ++-- tools/testing/selftests/ptp/testptp.c | 4 +--- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 672c4f3..c85704f 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -42,13 +42,26 @@ struct cpu_timer_list { #define CLOCKFD CPUCLOCK_MAX #define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK) -#define MAKE_PROCESS_CPUCLOCK(pid, clock) \ - ((~(clockid_t) (pid) << 3) | (clockid_t) (clock)) -#define MAKE_THREAD_CPUCLOCK(tid, clock) \ - MAKE_PROCESS_CPUCLOCK((tid), (clock) | CPUCLOCK_PERTHREAD_MASK) +static inline clockid_t make_process_cpuclock(const unsigned int pid, + const clockid_t clock) +{ + return ((~pid) << 3) | clock; +} +static inline clockid_t make_thread_cpuclock(const unsigned int tid, + const clockid_t clock) +{ + return make_process_cpuclock(tid, clock | CPUCLOCK_PERTHREAD_MASK); +} -#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) -#define CLOCKID_TO_FD(clk) ((unsigned int) ~((clk) >> 3)) +static inline clockid_t fd_to_clockid(const int fd) +{ + return make_process_cpuclock((unsigned int) fd, CLOCKFD); +} + +static inline int clockid_to_fd(const clockid_t clk) +{ + return ~(clk >> 3); +} #define REQUEUE_PENDING 1 diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 17cdc55..cc91d90 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -216,7 +216,7 @@ struct posix_clock_desc { static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd) { - struct file *fp = fget(CLOCKID_TO_FD(id)); + struct file *fp = fget(clockid_to_fd(id)); int err = -EINVAL; if (!fp) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 1f27887a..cef79ca 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1363,8 +1363,8 @@ static long posix_cpu_nsleep_restart(struct restart_block *restart_block) return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t); } -#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED) -#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED) +#define PROCESS_CLOCK make_process_cpuclock(0, CPUCLOCK_SCHED) +#define THREAD_CLOCK make_thread_cpuclock(0, CPUCLOCK_SCHED) static int process_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index 5d2eae1..a5d8f0a 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -60,9 +60,7 @@ static int clock_adjtime(clockid_t id, struct timex *tx) static clockid_t get_clockid(int fd) { #define CLOCKFD 3 -#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) - - return FD_TO_CLOCKID(fd); + return (((unsigned int) ~fd) << 3) | CLOCKFD; } static void handle_alarm(int s) -- cgit v1.1 From f3074a2825d45868e77c5926913986141394d8dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Mon, 8 Jan 2018 14:28:40 +0100 Subject: dt-bindings/clocksource: Add Actions Semi S700 timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define a compatible string for the Actions Semi S700 SoC timer. Signed-off-by: Andreas Färber Signed-off-by: Daniel Lezcano Acked-by: Rob Herring Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Rob Herring Cc: Thomas Gleixner Cc: devicetree@vger.kernel.org Link: http://lkml.kernel.org/r/1515418139-23276-1-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- Documentation/devicetree/bindings/timer/actions,owl-timer.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/timer/actions,owl-timer.txt b/Documentation/devicetree/bindings/timer/actions,owl-timer.txt index e3c28da..977054f 100644 --- a/Documentation/devicetree/bindings/timer/actions,owl-timer.txt +++ b/Documentation/devicetree/bindings/timer/actions,owl-timer.txt @@ -2,6 +2,7 @@ Actions Semi Owl Timer Required properties: - compatible : "actions,s500-timer" for S500 + "actions,s700-timer" for S700 "actions,s900-timer" for S900 - reg : Offset and length of the register set for the device. - interrupts : Should contain the interrupts. -- cgit v1.1 From f58639ac29d9fea7c54afca3c8c03596eafbec20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Mon, 8 Jan 2018 14:28:41 +0100 Subject: clocksource/drivers/owl: Adopt TIMER_OF_DECLARE() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following commit: 1727339590fd ("clocksource/drivers: Rename CLOCKSOURCE_OF_DECLARE to TIMER_OF_DECLARE") deprecated CLOCKSOURCE_OF_DECLARE(), so adopt the new TIMER_OF_DECLARE() macro instead. Reported-by: Daniel Lezcano Signed-off-by: Andreas Färber Signed-off-by: Daniel Lezcano Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-2-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/owl-timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/owl-timer.c b/drivers/clocksource/owl-timer.c index c686305..9fb4333 100644 --- a/drivers/clocksource/owl-timer.c +++ b/drivers/clocksource/owl-timer.c @@ -168,5 +168,5 @@ static int __init owl_timer_init(struct device_node *node) return 0; } -CLOCKSOURCE_OF_DECLARE(owl_s500, "actions,s500-timer", owl_timer_init); -CLOCKSOURCE_OF_DECLARE(owl_s900, "actions,s900-timer", owl_timer_init); +TIMER_OF_DECLARE(owl_s500, "actions,s500-timer", owl_timer_init); +TIMER_OF_DECLARE(owl_s900, "actions,s900-timer", owl_timer_init); -- cgit v1.1 From 0ec5477c3e0e86b3b2750d82355af422a468dc46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Mon, 8 Jan 2018 14:28:42 +0100 Subject: clocksource/drivers/owl: Add the S700 timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Actions S700 has two 2Hz timers like S500, and four TIMx timers like S900. Signed-off-by: Andreas Färber Signed-off-by: Daniel Lezcano Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-3-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/owl-timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/owl-timer.c b/drivers/clocksource/owl-timer.c index 9fb4333..ea00a5e 100644 --- a/drivers/clocksource/owl-timer.c +++ b/drivers/clocksource/owl-timer.c @@ -169,4 +169,5 @@ static int __init owl_timer_init(struct device_node *node) return 0; } TIMER_OF_DECLARE(owl_s500, "actions,s500-timer", owl_timer_init); +TIMER_OF_DECLARE(owl_s700, "actions,s700-timer", owl_timer_init); TIMER_OF_DECLARE(owl_s900, "actions,s900-timer", owl_timer_init); -- cgit v1.1 From 542f824607a6968ea443208ccfef3b7daf503559 Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Mon, 8 Jan 2018 14:28:43 +0100 Subject: clocksource/drivers/tcb_clksrc: Fix clock speed message The clock speed displayed at boot in an information message was 500 kHz too high compared to its real value. As the value is not used anywhere, there is no functional impact. Fix the rounding formula to display the correct value. Signed-off-by: Romain Izard Signed-off-by: Daniel Lezcano Acked-by: Nicolas Ferre Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-4-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/tcb_clksrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index 9de47d4..43f4d5c 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -384,7 +384,7 @@ static int __init tcb_clksrc_init(void) printk(bootinfo, clksrc.name, CONFIG_ATMEL_TCB_CLKSRC_BLOCK, divided_rate / 1000000, - ((divided_rate + 500000) % 1000000) / 1000); + ((divided_rate % 1000000) + 500) / 1000); if (tc->tcb_config && tc->tcb_config->counter_width == 32) { /* use apropriate function to read 32 bit counter */ -- cgit v1.1 From 5bbf4ad945a9bb353e77ef71c753ca9bb1e3d978 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:44 +0100 Subject: clocksource/drivers/timer-of: Fix function names All the functions are not prefixed with 'timer_of_', fix the naming in order to have the code consistent. Signed-off-by: Daniel Lezcano Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-5-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-of.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index a319904..ad55654 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -24,7 +24,7 @@ #include "timer-of.h" -static __init void timer_irq_exit(struct of_timer_irq *of_irq) +static __init void timer_of_irq_exit(struct of_timer_irq *of_irq) { struct timer_of *to = container_of(of_irq, struct timer_of, of_irq); @@ -34,8 +34,8 @@ static __init void timer_irq_exit(struct of_timer_irq *of_irq) free_irq(of_irq->irq, clkevt); } -static __init int timer_irq_init(struct device_node *np, - struct of_timer_irq *of_irq) +static __init int timer_of_irq_init(struct device_node *np, + struct of_timer_irq *of_irq) { int ret; struct timer_of *to = container_of(of_irq, struct timer_of, of_irq); @@ -72,15 +72,15 @@ static __init int timer_irq_init(struct device_node *np, return 0; } -static __init void timer_clk_exit(struct of_timer_clk *of_clk) +static __init void timer_of_clk_exit(struct of_timer_clk *of_clk) { of_clk->rate = 0; clk_disable_unprepare(of_clk->clk); clk_put(of_clk->clk); } -static __init int timer_clk_init(struct device_node *np, - struct of_timer_clk *of_clk) +static __init int timer_of_clk_init(struct device_node *np, + struct of_timer_clk *of_clk) { int ret; @@ -116,13 +116,13 @@ out_clk_put: goto out; } -static __init void timer_base_exit(struct of_timer_base *of_base) +static __init void timer_of_base_exit(struct of_timer_base *of_base) { iounmap(of_base->base); } -static __init int timer_base_init(struct device_node *np, - struct of_timer_base *of_base) +static __init int timer_of_base_init(struct device_node *np, + struct of_timer_base *of_base) { const char *name = of_base->name ? of_base->name : np->full_name; @@ -141,21 +141,21 @@ int __init timer_of_init(struct device_node *np, struct timer_of *to) int flags = 0; if (to->flags & TIMER_OF_BASE) { - ret = timer_base_init(np, &to->of_base); + ret = timer_of_base_init(np, &to->of_base); if (ret) goto out_fail; flags |= TIMER_OF_BASE; } if (to->flags & TIMER_OF_CLOCK) { - ret = timer_clk_init(np, &to->of_clk); + ret = timer_of_clk_init(np, &to->of_clk); if (ret) goto out_fail; flags |= TIMER_OF_CLOCK; } if (to->flags & TIMER_OF_IRQ) { - ret = timer_irq_init(np, &to->of_irq); + ret = timer_of_irq_init(np, &to->of_irq); if (ret) goto out_fail; flags |= TIMER_OF_IRQ; @@ -167,13 +167,13 @@ int __init timer_of_init(struct device_node *np, struct timer_of *to) out_fail: if (flags & TIMER_OF_IRQ) - timer_irq_exit(&to->of_irq); + timer_of_irq_exit(&to->of_irq); if (flags & TIMER_OF_CLOCK) - timer_clk_exit(&to->of_clk); + timer_of_clk_exit(&to->of_clk); if (flags & TIMER_OF_BASE) - timer_base_exit(&to->of_base); + timer_of_base_exit(&to->of_base); return ret; } @@ -187,11 +187,11 @@ out_fail: void __init timer_of_cleanup(struct timer_of *to) { if (to->flags & TIMER_OF_IRQ) - timer_irq_exit(&to->of_irq); + timer_of_irq_exit(&to->of_irq); if (to->flags & TIMER_OF_CLOCK) - timer_clk_exit(&to->of_clk); + timer_of_clk_exit(&to->of_clk); if (to->flags & TIMER_OF_BASE) - timer_base_exit(&to->of_base); + timer_of_base_exit(&to->of_base); } -- cgit v1.1 From cf7f46b9b12269d204b6acd0925704543adb6e05 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:45 +0100 Subject: clocksource/drivers/timer-of: Add kernel documentation The current code has no comments, neither any function descriptions. Fix this by adding function descriptions in kernel doc format. Signed-off-by: Daniel Lezcano Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-6-git-send-email-daniel.lezcano@linaro.org [ Spelling and style fixes. ] Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-of.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index ad55654..c1045b9 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -24,6 +24,12 @@ #include "timer-of.h" +/** + * timer_of_irq_exit - Release the interrupt + * @of_irq: an of_timer_irq structure pointer + * + * Free the irq resource + */ static __init void timer_of_irq_exit(struct of_timer_irq *of_irq) { struct timer_of *to = container_of(of_irq, struct timer_of, of_irq); @@ -34,6 +40,22 @@ static __init void timer_of_irq_exit(struct of_timer_irq *of_irq) free_irq(of_irq->irq, clkevt); } +/** + * timer_of_irq_init - Request the interrupt + * @np: a device tree node pointer + * @of_irq: an of_timer_irq structure pointer + * + * Get the interrupt number from the DT from its definition and + * request it. The interrupt is gotten by falling back the following way: + * + * - Get interrupt number by name + * - Get interrupt number by index + * + * When the interrupt is per CPU, 'request_percpu_irq()' is called, + * otherwise 'request_irq()' is used. + * + * Returns 0 on success, < 0 otherwise + */ static __init int timer_of_irq_init(struct device_node *np, struct of_timer_irq *of_irq) { @@ -72,6 +94,12 @@ static __init int timer_of_irq_init(struct device_node *np, return 0; } +/** + * timer_of_clk_exit - Release the clock resources + * @of_clk: a of_timer_clk structure pointer + * + * Disables and releases the refcount on the clk + */ static __init void timer_of_clk_exit(struct of_timer_clk *of_clk) { of_clk->rate = 0; @@ -79,6 +107,15 @@ static __init void timer_of_clk_exit(struct of_timer_clk *of_clk) clk_put(of_clk->clk); } +/** + * timer_of_clk_init - Initialize the clock resources + * @np: a device tree node pointer + * @of_clk: a of_timer_clk structure pointer + * + * Get the clock by name or by index, enable it and get the rate + * + * Returns 0 on success, < 0 otherwise + */ static __init int timer_of_clk_init(struct device_node *np, struct of_timer_clk *of_clk) { -- cgit v1.1 From 286f30db8b713b17e048bb86df1e257fd8695498 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 8 Jan 2018 14:28:46 +0100 Subject: dt-bindings/clocksource: Add Spreadtrum SC9860 timer documentation This patch adds documentation of device tree bindings for the timers found on the Spreadtrum SC9860 platform. Signed-off-by: Baolin Wang Signed-off-by: Daniel Lezcano Acked-by: Rob Herring Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Rob Herring Cc: Thomas Gleixner Cc: devicetree@vger.kernel.org Link: http://lkml.kernel.org/r/1515418139-23276-7-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- .../bindings/timer/spreadtrum,sprd-timer.txt | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt diff --git a/Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt b/Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt new file mode 100644 index 0000000..6d97e7d --- /dev/null +++ b/Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt @@ -0,0 +1,20 @@ +Spreadtrum timers + +The Spreadtrum SC9860 platform provides 3 general-purpose timers. +These timers can support 32bit or 64bit counter, as well as supporting +period mode or one-shot mode, and they are can be wakeup source +during deep sleep. + +Required properties: +- compatible: should be "sprd,sc9860-timer" for SC9860 platform. +- reg: The register address of the timer device. +- interrupts: Should contain the interrupt for the timer device. +- clocks: The phandle to the source clock (usually a 32.768 KHz fixed clock). + +Example: + timer@40050000 { + compatible = "sprd,sc9860-timer"; + reg = <0 0x40050000 0 0x20>; + interrupts = ; + clocks = <&ext_32k>; + }; -- cgit v1.1 From 067bc9144766495650e621b79bd2bc199cee0769 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 8 Jan 2018 14:28:47 +0100 Subject: clocksource/drivers/spreadtrum: Add timer driver for the Spreadtrum SC9860 platform The Spreadtrum SC9860 platform will use the architected timers as local clock events, but we also need a broadcast timer device to wake up the CPUs when the CPUs are in sleep mode. The Spreadtrum timer can support 32-bit or 64-bit counters, as well as supporting period mode or one-shot mode. Signed-off-by: Baolin Wang Signed-off-by: Daniel Lezcano Acked-by: Philippe Ombredanne Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-8-git-send-email-daniel.lezcano@linaro.org [ Minor readability edits. ] Signed-off-by: Ingo Molnar --- drivers/clocksource/Kconfig | 7 ++ drivers/clocksource/Makefile | 1 + drivers/clocksource/timer-sprd.c | 159 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 drivers/clocksource/timer-sprd.c diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index c729a88..0359812 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -441,6 +441,13 @@ config MTK_TIMER help Support for Mediatek timer driver. +config SPRD_TIMER + bool "Spreadtrum timer driver" if COMPILE_TEST + depends on HAS_IOMEM + select TIMER_OF + help + Enables support for the Spreadtrum timer driver. + config SYS_SUPPORTS_SH_MTU2 bool diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 72711f1..d6dec44 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -54,6 +54,7 @@ obj-$(CONFIG_CLKSRC_TI_32K) += timer-ti-32k.o obj-$(CONFIG_CLKSRC_NPS) += timer-nps.o obj-$(CONFIG_OXNAS_RPS_TIMER) += timer-oxnas-rps.o obj-$(CONFIG_OWL_TIMER) += owl-timer.o +obj-$(CONFIG_SPRD_TIMER) += timer-sprd.o obj-$(CONFIG_ARC_TIMERS) += arc_timer.o obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o diff --git a/drivers/clocksource/timer-sprd.c b/drivers/clocksource/timer-sprd.c new file mode 100644 index 0000000..ef9ebea --- /dev/null +++ b/drivers/clocksource/timer-sprd.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 Spreadtrum Communications Inc. + */ + +#include +#include + +#include "timer-of.h" + +#define TIMER_NAME "sprd_timer" + +#define TIMER_LOAD_LO 0x0 +#define TIMER_LOAD_HI 0x4 +#define TIMER_VALUE_LO 0x8 +#define TIMER_VALUE_HI 0xc + +#define TIMER_CTL 0x10 +#define TIMER_CTL_PERIOD_MODE BIT(0) +#define TIMER_CTL_ENABLE BIT(1) +#define TIMER_CTL_64BIT_WIDTH BIT(16) + +#define TIMER_INT 0x14 +#define TIMER_INT_EN BIT(0) +#define TIMER_INT_RAW_STS BIT(1) +#define TIMER_INT_MASK_STS BIT(2) +#define TIMER_INT_CLR BIT(3) + +#define TIMER_VALUE_SHDW_LO 0x18 +#define TIMER_VALUE_SHDW_HI 0x1c + +#define TIMER_VALUE_LO_MASK GENMASK(31, 0) + +static void sprd_timer_enable(void __iomem *base, u32 flag) +{ + u32 val = readl_relaxed(base + TIMER_CTL); + + val |= TIMER_CTL_ENABLE; + if (flag & TIMER_CTL_64BIT_WIDTH) + val |= TIMER_CTL_64BIT_WIDTH; + else + val &= ~TIMER_CTL_64BIT_WIDTH; + + if (flag & TIMER_CTL_PERIOD_MODE) + val |= TIMER_CTL_PERIOD_MODE; + else + val &= ~TIMER_CTL_PERIOD_MODE; + + writel_relaxed(val, base + TIMER_CTL); +} + +static void sprd_timer_disable(void __iomem *base) +{ + u32 val = readl_relaxed(base + TIMER_CTL); + + val &= ~TIMER_CTL_ENABLE; + writel_relaxed(val, base + TIMER_CTL); +} + +static void sprd_timer_update_counter(void __iomem *base, unsigned long cycles) +{ + writel_relaxed(cycles & TIMER_VALUE_LO_MASK, base + TIMER_LOAD_LO); + writel_relaxed(0, base + TIMER_LOAD_HI); +} + +static void sprd_timer_enable_interrupt(void __iomem *base) +{ + writel_relaxed(TIMER_INT_EN, base + TIMER_INT); +} + +static void sprd_timer_clear_interrupt(void __iomem *base) +{ + u32 val = readl_relaxed(base + TIMER_INT); + + val |= TIMER_INT_CLR; + writel_relaxed(val, base + TIMER_INT); +} + +static int sprd_timer_set_next_event(unsigned long cycles, + struct clock_event_device *ce) +{ + struct timer_of *to = to_timer_of(ce); + + sprd_timer_disable(timer_of_base(to)); + sprd_timer_update_counter(timer_of_base(to), cycles); + sprd_timer_enable(timer_of_base(to), 0); + + return 0; +} + +static int sprd_timer_set_periodic(struct clock_event_device *ce) +{ + struct timer_of *to = to_timer_of(ce); + + sprd_timer_disable(timer_of_base(to)); + sprd_timer_update_counter(timer_of_base(to), timer_of_period(to)); + sprd_timer_enable(timer_of_base(to), TIMER_CTL_PERIOD_MODE); + + return 0; +} + +static int sprd_timer_shutdown(struct clock_event_device *ce) +{ + struct timer_of *to = to_timer_of(ce); + + sprd_timer_disable(timer_of_base(to)); + return 0; +} + +static irqreturn_t sprd_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *ce = (struct clock_event_device *)dev_id; + struct timer_of *to = to_timer_of(ce); + + sprd_timer_clear_interrupt(timer_of_base(to)); + + if (clockevent_state_oneshot(ce)) + sprd_timer_disable(timer_of_base(to)); + + ce->event_handler(ce); + return IRQ_HANDLED; +} + +static struct timer_of to = { + .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK, + + .clkevt = { + .name = TIMER_NAME, + .rating = 300, + .features = CLOCK_EVT_FEAT_DYNIRQ | CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT, + .set_state_shutdown = sprd_timer_shutdown, + .set_state_periodic = sprd_timer_set_periodic, + .set_next_event = sprd_timer_set_next_event, + .cpumask = cpu_possible_mask, + }, + + .of_irq = { + .handler = sprd_timer_interrupt, + .flags = IRQF_TIMER | IRQF_IRQPOLL, + }, +}; + +static int __init sprd_timer_init(struct device_node *np) +{ + int ret; + + ret = timer_of_init(np, &to); + if (ret) + return ret; + + sprd_timer_enable_interrupt(timer_of_base(&to)); + clockevents_config_and_register(&to.clkevt, timer_of_rate(&to), + 1, UINT_MAX); + + return 0; +} + +TIMER_OF_DECLARE(sc9860_timer, "sprd,sc9860-timer", sprd_timer_init); -- cgit v1.1 From 1c63c1c089a48e1b1821a73dc36a3997ced2f82d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:48 +0100 Subject: clocksource/drivers/timer-of: Store the device node pointer in 'struct timer_of' Under certain circumstances, some specific operations must be done with the device node pointer, which forces the timer code to propagate the pointer to the functions which need it. In order to consolidate the function signatures in the different drivers by using the timer-of structure, let's store it in the timer-of structure as a handy pointer when it is needed. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-9-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-of.c | 3 +++ drivers/clocksource/timer-of.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index c1045b9..25008d2 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -200,6 +200,9 @@ int __init timer_of_init(struct device_node *np, struct timer_of *to) if (!to->clkevt.name) to->clkevt.name = np->name; + + to->np = np; + return ret; out_fail: diff --git a/drivers/clocksource/timer-of.h b/drivers/clocksource/timer-of.h index 3f708f1..a5478f3 100644 --- a/drivers/clocksource/timer-of.h +++ b/drivers/clocksource/timer-of.h @@ -33,6 +33,7 @@ struct of_timer_clk { struct timer_of { unsigned int flags; + struct device_node *np; struct clock_event_device clkevt; struct of_timer_base of_base; struct of_timer_irq of_irq; -- cgit v1.1 From 9aea417afa6bf52f15a5b194944b6a646d61af04 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:49 +0100 Subject: clocksource/drivers/timer-of: Don't request the resource by name When the driver does not specify a name for the resource, don't use of_io_request_and_map() but of_iomap(). That prevents resource name allocation conflicts on some platforms which have the same name as the node. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-10-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-of.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index 25008d2..06ed88a 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -161,11 +161,11 @@ static __init void timer_of_base_exit(struct of_timer_base *of_base) static __init int timer_of_base_init(struct device_node *np, struct of_timer_base *of_base) { - const char *name = of_base->name ? of_base->name : np->full_name; - - of_base->base = of_io_request_and_map(np, of_base->index, name); + of_base->base = of_base->name ? + of_io_request_and_map(np, of_base->index, of_base->name) : + of_iomap(np, of_base->index); if (IS_ERR(of_base->base)) { - pr_err("Failed to iomap (%s)\n", name); + pr_err("Failed to iomap (%s)\n", of_base->name); return PTR_ERR(of_base->base); } -- cgit v1.1 From e0aeca3d8cbaea514eb98df1149faa918f9ec42d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:50 +0100 Subject: clocksource/drivers/stm32: Fix kernel panic with multiple timers The current code hides a couple of bugs: - The global variable 'clock_event_ddata' is overwritten each time the init function is invoked. This is fixed with a kmemdup() instead of assigning the global variable. That prevents a memory corruption when several timers are defined in the DT. - The clockevent's event_handler is NULL if the time framework does not select the clockevent when registering it, this is fine but the init code generates in any case an interrupt leading to dereference this NULL pointer. The stm32 timer works with shadow registers, a mechanism to cache the registers. When a change is done in one buffered register, we need to artificially generate an event to force the timer to copy the content of the register to the shadowed register. The auto-reload register (ARR) is one of the shadowed register as well as the prescaler register (PSC), so in order to force the copy, we issue an event which in turn leads to an interrupt and the NULL dereference. This is fixed by inverting two lines where we clear the status register before enabling the update event interrupt. As this kernel crash is resulting from the combination of these two bugs, the fixes are grouped into a single patch. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1515418139-23276-11-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 8f24237..4bfeb99 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -106,6 +106,10 @@ static int __init stm32_clockevent_init(struct device_node *np) unsigned long rate, max_delta; int irq, ret, bits, prescaler = 1; + data = kmemdup(&clock_event_ddata, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + clk = of_clk_get(np, 0); if (IS_ERR(clk)) { ret = PTR_ERR(clk); @@ -156,8 +160,8 @@ static int __init stm32_clockevent_init(struct device_node *np) writel_relaxed(prescaler - 1, data->base + TIM_PSC); writel_relaxed(TIM_EGR_UG, data->base + TIM_EGR); - writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); writel_relaxed(0, data->base + TIM_SR); + writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); data->periodic_top = DIV_ROUND_CLOSEST(rate, prescaler * HZ); @@ -184,6 +188,7 @@ err_iomap: err_clk_enable: clk_put(clk); err_clk_get: + kfree(data); return ret; } -- cgit v1.1 From d04af4908a7283bc6ae0dd9475ccf807d094f8ba Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 8 Jan 2018 14:28:51 +0100 Subject: clocksource/drivers/stm32: Convert the driver to timer_of primitives Convert the driver to use the timer_of() helpers. This allows the removal of a custom private structure, factors out and simplifies the code. [Daniel Lezcano]: Respin against the critical fix patch and massaged the changelog. Tested-by: Benjamin Gaignard Signed-off-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-12-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/Kconfig | 1 + drivers/clocksource/timer-stm32.c | 187 +++++++++++++++----------------------- 2 files changed, 74 insertions(+), 114 deletions(-) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 0359812..b3b4ed9 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -269,6 +269,7 @@ config CLKSRC_STM32 bool "Clocksource for STM32 SoCs" if !ARCH_STM32 depends on OF && ARM && (ARCH_STM32 || COMPILE_TEST) select CLKSRC_MMIO + select TIMER_OF config CLKSRC_MPS2 bool "Clocksource for MPS2 SoCs" if COMPILE_TEST diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 4bfeb99..3e4ab07 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -16,6 +16,9 @@ #include #include #include +#include + +#include "timer-of.h" #define TIM_CR1 0x00 #define TIM_DIER 0x0c @@ -34,162 +37,118 @@ #define TIM_EGR_UG BIT(0) -struct stm32_clock_event_ddata { - struct clock_event_device evtdev; - unsigned periodic_top; - void __iomem *base; -}; - -static int stm32_clock_event_shutdown(struct clock_event_device *evtdev) +static int stm32_clock_event_shutdown(struct clock_event_device *clkevt) { - struct stm32_clock_event_ddata *data = - container_of(evtdev, struct stm32_clock_event_ddata, evtdev); - void *base = data->base; + struct timer_of *to = to_timer_of(clkevt); + + writel_relaxed(0, timer_of_base(to) + TIM_CR1); - writel_relaxed(0, base + TIM_CR1); return 0; } -static int stm32_clock_event_set_periodic(struct clock_event_device *evtdev) +static int stm32_clock_event_set_periodic(struct clock_event_device *clkevt) { - struct stm32_clock_event_ddata *data = - container_of(evtdev, struct stm32_clock_event_ddata, evtdev); - void *base = data->base; + struct timer_of *to = to_timer_of(clkevt); + + writel_relaxed(timer_of_period(to), timer_of_base(to) + TIM_ARR); + writel_relaxed(TIM_CR1_ARPE | TIM_CR1_CEN, timer_of_base(to) + TIM_CR1); - writel_relaxed(data->periodic_top, base + TIM_ARR); - writel_relaxed(TIM_CR1_ARPE | TIM_CR1_CEN, base + TIM_CR1); return 0; } static int stm32_clock_event_set_next_event(unsigned long evt, - struct clock_event_device *evtdev) + struct clock_event_device *clkevt) { - struct stm32_clock_event_ddata *data = - container_of(evtdev, struct stm32_clock_event_ddata, evtdev); + struct timer_of *to = to_timer_of(clkevt); - writel_relaxed(evt, data->base + TIM_ARR); + writel_relaxed(evt, timer_of_base(to) + TIM_ARR); writel_relaxed(TIM_CR1_ARPE | TIM_CR1_OPM | TIM_CR1_CEN, - data->base + TIM_CR1); + timer_of_base(to) + TIM_CR1); return 0; } static irqreturn_t stm32_clock_event_handler(int irq, void *dev_id) { - struct stm32_clock_event_ddata *data = dev_id; + struct clock_event_device *clkevt = (struct clock_event_device *)dev_id; + struct timer_of *to = to_timer_of(clkevt); - writel_relaxed(0, data->base + TIM_SR); + writel_relaxed(0, timer_of_base(to) + TIM_SR); - data->evtdev.event_handler(&data->evtdev); + clkevt->event_handler(clkevt); return IRQ_HANDLED; } -static struct stm32_clock_event_ddata clock_event_ddata = { - .evtdev = { - .name = "stm32 clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, - .set_state_shutdown = stm32_clock_event_shutdown, - .set_state_periodic = stm32_clock_event_set_periodic, - .set_state_oneshot = stm32_clock_event_shutdown, - .tick_resume = stm32_clock_event_shutdown, - .set_next_event = stm32_clock_event_set_next_event, - .rating = 200, - }, -}; - -static int __init stm32_clockevent_init(struct device_node *np) +static void __init stm32_clockevent_init(struct timer_of *to) { - struct stm32_clock_event_ddata *data = &clock_event_ddata; - struct clk *clk; - struct reset_control *rstc; - unsigned long rate, max_delta; - int irq, ret, bits, prescaler = 1; - - data = kmemdup(&clock_event_ddata, sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - clk = of_clk_get(np, 0); - if (IS_ERR(clk)) { - ret = PTR_ERR(clk); - pr_err("failed to get clock for clockevent (%d)\n", ret); - goto err_clk_get; - } - - ret = clk_prepare_enable(clk); - if (ret) { - pr_err("failed to enable timer clock for clockevent (%d)\n", - ret); - goto err_clk_enable; - } - - rate = clk_get_rate(clk); - - rstc = of_reset_control_get(np, NULL); - if (!IS_ERR(rstc)) { - reset_control_assert(rstc); - reset_control_deassert(rstc); - } - - data->base = of_iomap(np, 0); - if (!data->base) { - ret = -ENXIO; - pr_err("failed to map registers for clockevent\n"); - goto err_iomap; - } + unsigned long max_delta; + int prescaler; - irq = irq_of_parse_and_map(np, 0); - if (!irq) { - ret = -EINVAL; - pr_err("%pOF: failed to get irq.\n", np); - goto err_get_irq; - } + to->clkevt.name = "stm32_clockevent"; + to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; + to->clkevt.set_state_shutdown = stm32_clock_event_shutdown; + to->clkevt.set_state_periodic = stm32_clock_event_set_periodic; + to->clkevt.set_state_oneshot = stm32_clock_event_shutdown; + to->clkevt.tick_resume = stm32_clock_event_shutdown; + to->clkevt.set_next_event = stm32_clock_event_set_next_event; /* Detect whether the timer is 16 or 32 bits */ - writel_relaxed(~0U, data->base + TIM_ARR); - max_delta = readl_relaxed(data->base + TIM_ARR); + writel_relaxed(~0U, timer_of_base(to) + TIM_ARR); + max_delta = readl_relaxed(timer_of_base(to) + TIM_ARR); if (max_delta == ~0U) { prescaler = 1; - bits = 32; + to->clkevt.rating = 250; } else { prescaler = 1024; - bits = 16; + to->clkevt.rating = 100; } - writel_relaxed(0, data->base + TIM_ARR); + writel_relaxed(0, timer_of_base(to) + TIM_ARR); - writel_relaxed(prescaler - 1, data->base + TIM_PSC); - writel_relaxed(TIM_EGR_UG, data->base + TIM_EGR); - writel_relaxed(0, data->base + TIM_SR); - writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); + writel_relaxed(prescaler - 1, timer_of_base(to) + TIM_PSC); + writel_relaxed(TIM_EGR_UG, timer_of_base(to) + TIM_EGR); + writel_relaxed(0, timer_of_base(to) + TIM_SR); + writel_relaxed(TIM_DIER_UIE, timer_of_base(to) + TIM_DIER); - data->periodic_top = DIV_ROUND_CLOSEST(rate, prescaler * HZ); + /* Adjust rate and period given the prescaler value */ + to->of_clk.rate = DIV_ROUND_CLOSEST(to->of_clk.rate, prescaler); + to->of_clk.period = DIV_ROUND_UP(to->of_clk.rate, HZ); - clockevents_config_and_register(&data->evtdev, - DIV_ROUND_CLOSEST(rate, prescaler), - 0x1, max_delta); - - ret = request_irq(irq, stm32_clock_event_handler, IRQF_TIMER, - "stm32 clockevent", data); - if (ret) { - pr_err("%pOF: failed to request irq.\n", np); - goto err_get_irq; - } + clockevents_config_and_register(&to->clkevt, + timer_of_rate(to), 0x1, max_delta); pr_info("%pOF: STM32 clockevent driver initialized (%d bits)\n", - np, bits); + to->np, max_delta == UINT_MAX ? 32 : 16); +} - return ret; +static int __init stm32_timer_init(struct device_node *node) +{ + struct reset_control *rstc; + struct timer_of *to; + int ret; + + to = kzalloc(sizeof(*to), GFP_KERNEL); + if (!to) + return -ENOMEM; + + to->flags = TIMER_OF_IRQ | TIMER_OF_CLOCK | TIMER_OF_BASE; + to->of_irq.handler = stm32_clock_event_handler; + + ret = timer_of_init(node, to); + if (ret) + goto err; -err_get_irq: - iounmap(data->base); -err_iomap: - clk_disable_unprepare(clk); -err_clk_enable: - clk_put(clk); -err_clk_get: - kfree(data); + rstc = of_reset_control_get(node, NULL); + if (!IS_ERR(rstc)) { + reset_control_assert(rstc); + reset_control_deassert(rstc); + } + + stm32_clockevent_init(to); + return 0; +err: + kfree(to); return ret; } -TIMER_OF_DECLARE(stm32, "st,stm32-timer", stm32_clockevent_init); +TIMER_OF_DECLARE(stm32, "st,stm32-timer", stm32_timer_init); -- cgit v1.1 From f2ed8ef1cea41c7e7e5d52199db9c822951ab101 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:52 +0100 Subject: clocksource/drivers/stm32: Use the node name as timer name As there are different timers on the stm32, use the node name for the timer name in order to give the indication of which timer the kernel is using. /proc/timer_list gives all the information with the right name, otherwise we end up digging in the kernel log and /proc/interrupt to do the connection between the used timer. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-13-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 3e4ab07..14b7a2b 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -85,7 +85,7 @@ static void __init stm32_clockevent_init(struct timer_of *to) unsigned long max_delta; int prescaler; - to->clkevt.name = "stm32_clockevent"; + to->clkevt.name = to->np->full_name; to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; to->clkevt.set_state_shutdown = stm32_clock_event_shutdown; to->clkevt.set_state_periodic = stm32_clock_event_set_periodic; -- cgit v1.1 From 70c62cf910aeba7cb79f4ebc7e6c8edbb37a77f6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:53 +0100 Subject: clocksource/drivers/stm32: Factor out the timer width sorting code In order to clarify and encapsulate the code for upcoming changes, move the timer width check into a function and add some documentation. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-14-git-send-email-daniel.lezcano@linaro.org [ Spelling fixes. ] Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 14b7a2b..33c7c90 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -80,9 +80,27 @@ static irqreturn_t stm32_clock_event_handler(int irq, void *dev_id) return IRQ_HANDLED; } +/** + * stm32_timer_width - Sort out the timer width (32/16) + * @to: a pointer to a timer-of structure + * + * Write the 32-bit max value and read/return the result. If the timer + * is 32 bits wide, the result will be UINT_MAX, otherwise it will + * be truncated by the 16-bit register to USHRT_MAX. + * + * Returns UINT_MAX if the timer is 32 bits wide, USHRT_MAX if it is a + * 16 bits wide. + */ +static u32 __init stm32_timer_width(struct timer_of *to) +{ + writel_relaxed(UINT_MAX, timer_of_base(to) + TIM_ARR); + + return readl_relaxed(timer_of_base(to) + TIM_ARR); +} + static void __init stm32_clockevent_init(struct timer_of *to) { - unsigned long max_delta; + u32 width = 0; int prescaler; to->clkevt.name = to->np->full_name; @@ -93,10 +111,8 @@ static void __init stm32_clockevent_init(struct timer_of *to) to->clkevt.tick_resume = stm32_clock_event_shutdown; to->clkevt.set_next_event = stm32_clock_event_set_next_event; - /* Detect whether the timer is 16 or 32 bits */ - writel_relaxed(~0U, timer_of_base(to) + TIM_ARR); - max_delta = readl_relaxed(timer_of_base(to) + TIM_ARR); - if (max_delta == ~0U) { + width = stm32_timer_width(to); + if (width == UINT_MAX) { prescaler = 1; to->clkevt.rating = 250; } else { @@ -115,10 +131,10 @@ static void __init stm32_clockevent_init(struct timer_of *to) to->of_clk.period = DIV_ROUND_UP(to->of_clk.rate, HZ); clockevents_config_and_register(&to->clkevt, - timer_of_rate(to), 0x1, max_delta); + timer_of_rate(to), 0x1, width); pr_info("%pOF: STM32 clockevent driver initialized (%d bits)\n", - to->np, max_delta == UINT_MAX ? 32 : 16); + to->np, width == UINT_MAX ? 32 : 16); } static int __init stm32_timer_init(struct device_node *node) -- cgit v1.1 From 4744daa10dcd3a1470fbeba4945fbf44dcb1b0d1 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 8 Jan 2018 14:28:54 +0100 Subject: clocksource/drivers/stm32: Compute a prescaler value with a targeted rate The prescaler value is arbitrarily set to 1024 without any regard to the timer frequency. For 32-bit timers, there is no need to set a prescaler value as they wrap in an acceptable interval and give the opportunity to have precise timers on this platform. However, for 16-bit timers a prescaler value is needed if we don't want to wrap too often per second which is inefficient and adds more and more error margin. With a targeted clock of 10MHz, the 16 bits are precise enough whatever the timer frequency is as we will compute the prescaler. Tested-by: Benjamin Gaignard Signed-off-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-15-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 33c7c90..928ac28 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -37,6 +37,9 @@ #define TIM_EGR_UG BIT(0) +#define TIM_PSC_MAX USHRT_MAX +#define TIM_PSC_CLKRATE 10000 + static int stm32_clock_event_shutdown(struct clock_event_device *clkevt) { struct timer_of *to = to_timer_of(clkevt); @@ -116,7 +119,14 @@ static void __init stm32_clockevent_init(struct timer_of *to) prescaler = 1; to->clkevt.rating = 250; } else { - prescaler = 1024; + prescaler = DIV_ROUND_CLOSEST(timer_of_rate(to), + TIM_PSC_CLKRATE); + /* + * The prescaler register is an u16, the variable + * can't be greater than TIM_PSC_MAX, let's cap it in + * this case. + */ + prescaler = prescaler < TIM_PSC_MAX ? prescaler : TIM_PSC_MAX; to->clkevt.rating = 100; } writel_relaxed(0, timer_of_base(to) + TIM_ARR); -- cgit v1.1 From 8e82df381b676ae5f6c93ab4a75f56d8f61babc4 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 8 Jan 2018 14:28:55 +0100 Subject: clocksource/drivers/stm32: Add oneshot mode The stm32 timer block is able to have a counter and a comparator. Instead of using the auto-reload register for periodic events, we switch to oneshot mode by using the comparator register. The timer is able to generate an interrupt when the counter overflows but we don't want that as this counter will be use as a clocksource in the next patches. So it is disabled by the UDIS bit of the control register. Tested-by: Benjamin Gaignard Signed-off-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-16-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 56 ++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 928ac28..882037f 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -24,14 +24,18 @@ #define TIM_DIER 0x0c #define TIM_SR 0x10 #define TIM_EGR 0x14 +#define TIM_CNT 0x24 #define TIM_PSC 0x28 #define TIM_ARR 0x2c +#define TIM_CCR1 0x34 #define TIM_CR1_CEN BIT(0) +#define TIM_CR1_UDIS BIT(1) #define TIM_CR1_OPM BIT(3) #define TIM_CR1_ARPE BIT(7) #define TIM_DIER_UIE BIT(0) +#define TIM_DIER_CC1IE BIT(1) #define TIM_SR_UIF BIT(0) @@ -40,33 +44,57 @@ #define TIM_PSC_MAX USHRT_MAX #define TIM_PSC_CLKRATE 10000 +static void stm32_clock_event_disable(struct timer_of *to) +{ + writel_relaxed(0, timer_of_base(to) + TIM_DIER); +} + +static void stm32_clock_event_enable(struct timer_of *to) +{ + writel_relaxed(TIM_CR1_UDIS | TIM_CR1_CEN, timer_of_base(to) + TIM_CR1); +} + static int stm32_clock_event_shutdown(struct clock_event_device *clkevt) { struct timer_of *to = to_timer_of(clkevt); - writel_relaxed(0, timer_of_base(to) + TIM_CR1); + stm32_clock_event_disable(to); return 0; } -static int stm32_clock_event_set_periodic(struct clock_event_device *clkevt) +static int stm32_clock_event_set_next_event(unsigned long evt, + struct clock_event_device *clkevt) { struct timer_of *to = to_timer_of(clkevt); + unsigned long now, next; + + next = readl_relaxed(timer_of_base(to) + TIM_CNT) + evt; + writel_relaxed(next, timer_of_base(to) + TIM_CCR1); + now = readl_relaxed(timer_of_base(to) + TIM_CNT); + + if ((next - now) > evt) + return -ETIME; - writel_relaxed(timer_of_period(to), timer_of_base(to) + TIM_ARR); - writel_relaxed(TIM_CR1_ARPE | TIM_CR1_CEN, timer_of_base(to) + TIM_CR1); + writel_relaxed(TIM_DIER_CC1IE, timer_of_base(to) + TIM_DIER); return 0; } -static int stm32_clock_event_set_next_event(unsigned long evt, - struct clock_event_device *clkevt) +static int stm32_clock_event_set_periodic(struct clock_event_device *clkevt) +{ + struct timer_of *to = to_timer_of(clkevt); + + stm32_clock_event_enable(to); + + return stm32_clock_event_set_next_event(timer_of_period(to), clkevt); +} + +static int stm32_clock_event_set_oneshot(struct clock_event_device *clkevt) { struct timer_of *to = to_timer_of(clkevt); - writel_relaxed(evt, timer_of_base(to) + TIM_ARR); - writel_relaxed(TIM_CR1_ARPE | TIM_CR1_OPM | TIM_CR1_CEN, - timer_of_base(to) + TIM_CR1); + stm32_clock_event_enable(to); return 0; } @@ -78,6 +106,11 @@ static irqreturn_t stm32_clock_event_handler(int irq, void *dev_id) writel_relaxed(0, timer_of_base(to) + TIM_SR); + if (clockevent_state_periodic(clkevt)) + stm32_clock_event_set_periodic(clkevt); + else + stm32_clock_event_shutdown(clkevt); + clkevt->event_handler(clkevt); return IRQ_HANDLED; @@ -108,9 +141,10 @@ static void __init stm32_clockevent_init(struct timer_of *to) to->clkevt.name = to->np->full_name; to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; + to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; to->clkevt.set_state_shutdown = stm32_clock_event_shutdown; to->clkevt.set_state_periodic = stm32_clock_event_set_periodic; - to->clkevt.set_state_oneshot = stm32_clock_event_shutdown; + to->clkevt.set_state_oneshot = stm32_clock_event_set_oneshot; to->clkevt.tick_resume = stm32_clock_event_shutdown; to->clkevt.set_next_event = stm32_clock_event_set_next_event; @@ -129,12 +163,10 @@ static void __init stm32_clockevent_init(struct timer_of *to) prescaler = prescaler < TIM_PSC_MAX ? prescaler : TIM_PSC_MAX; to->clkevt.rating = 100; } - writel_relaxed(0, timer_of_base(to) + TIM_ARR); writel_relaxed(prescaler - 1, timer_of_base(to) + TIM_PSC); writel_relaxed(TIM_EGR_UG, timer_of_base(to) + TIM_EGR); writel_relaxed(0, timer_of_base(to) + TIM_SR); - writel_relaxed(TIM_DIER_UIE, timer_of_base(to) + TIM_DIER); /* Adjust rate and period given the prescaler value */ to->of_clk.rate = DIV_ROUND_CLOSEST(to->of_clk.rate, prescaler); -- cgit v1.1 From 3c84e75b1e5d8406e12b533d44f54ad84d6e3bd6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:56 +0100 Subject: clocksource/drivers/stm32: Factor out more of the clockevent code In order to prepare the clocksource code, let's factor out the clockevent code, split the prescaler and timer width code into separate functions. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-17-git-send-email-daniel.lezcano@linaro.org [ Small edits. ] Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 107 +++++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 25 deletions(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 882037f..0d37f1a 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -44,6 +44,42 @@ #define TIM_PSC_MAX USHRT_MAX #define TIM_PSC_CLKRATE 10000 +struct stm32_timer_private { + int bits; +}; + +/** + * stm32_timer_of_bits_set - set accessor helper + * @to: a timer_of structure pointer + * @bits: the number of bits (16 or 32) + * + * Accessor helper to set the number of bits in the timer-of private + * structure. + * + */ +static void stm32_timer_of_bits_set(struct timer_of *to, int bits) +{ + struct stm32_timer_private *pd = to->private_data; + + pd->bits = bits; +} + +/** + * stm32_timer_of_bits_get - get accessor helper + * @to: a timer_of structure pointer + * + * Accessor helper to get the number of bits in the timer-of private + * structure. + * + * Returns an integer corresponding to the number of bits. + */ +static int stm32_timer_of_bits_get(struct timer_of *to) +{ + struct stm32_timer_private *pd = to->private_data; + + return pd->bits; +} + static void stm32_clock_event_disable(struct timer_of *to) { writel_relaxed(0, timer_of_base(to) + TIM_DIER); @@ -124,35 +160,31 @@ static irqreturn_t stm32_clock_event_handler(int irq, void *dev_id) * is 32 bits wide, the result will be UINT_MAX, otherwise it will * be truncated by the 16-bit register to USHRT_MAX. * - * Returns UINT_MAX if the timer is 32 bits wide, USHRT_MAX if it is a - * 16 bits wide. */ -static u32 __init stm32_timer_width(struct timer_of *to) +static void __init stm32_timer_set_width(struct timer_of *to) { + u32 width; + writel_relaxed(UINT_MAX, timer_of_base(to) + TIM_ARR); - return readl_relaxed(timer_of_base(to) + TIM_ARR); + width = readl_relaxed(timer_of_base(to) + TIM_ARR); + + stm32_timer_of_bits_set(to, width == UINT_MAX ? 32 : 16); } -static void __init stm32_clockevent_init(struct timer_of *to) +/** + * stm32_timer_set_prescaler - Compute and set the prescaler register + * @to: a pointer to a timer-of structure + * + * Depending on the timer width, compute the prescaler to always + * target a 10MHz timer rate for 16 bits. 32-bit timers are + * considered precise and long enough to not use the prescaler. + */ +static void __init stm32_timer_set_prescaler(struct timer_of *to) { - u32 width = 0; - int prescaler; + int prescaler = 1; - to->clkevt.name = to->np->full_name; - to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; - to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; - to->clkevt.set_state_shutdown = stm32_clock_event_shutdown; - to->clkevt.set_state_periodic = stm32_clock_event_set_periodic; - to->clkevt.set_state_oneshot = stm32_clock_event_set_oneshot; - to->clkevt.tick_resume = stm32_clock_event_shutdown; - to->clkevt.set_next_event = stm32_clock_event_set_next_event; - - width = stm32_timer_width(to); - if (width == UINT_MAX) { - prescaler = 1; - to->clkevt.rating = 250; - } else { + if (stm32_timer_of_bits_get(to) != 32) { prescaler = DIV_ROUND_CLOSEST(timer_of_rate(to), TIM_PSC_CLKRATE); /* @@ -161,7 +193,6 @@ static void __init stm32_clockevent_init(struct timer_of *to) * this case. */ prescaler = prescaler < TIM_PSC_MAX ? prescaler : TIM_PSC_MAX; - to->clkevt.rating = 100; } writel_relaxed(prescaler - 1, timer_of_base(to) + TIM_PSC); @@ -171,12 +202,26 @@ static void __init stm32_clockevent_init(struct timer_of *to) /* Adjust rate and period given the prescaler value */ to->of_clk.rate = DIV_ROUND_CLOSEST(to->of_clk.rate, prescaler); to->of_clk.period = DIV_ROUND_UP(to->of_clk.rate, HZ); +} + +static void __init stm32_clockevent_init(struct timer_of *to) +{ + u32 bits = stm32_timer_of_bits_get(to); - clockevents_config_and_register(&to->clkevt, - timer_of_rate(to), 0x1, width); + to->clkevt.name = to->np->full_name; + to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; + to->clkevt.set_state_shutdown = stm32_clock_event_shutdown; + to->clkevt.set_state_periodic = stm32_clock_event_set_periodic; + to->clkevt.set_state_oneshot = stm32_clock_event_set_oneshot; + to->clkevt.tick_resume = stm32_clock_event_shutdown; + to->clkevt.set_next_event = stm32_clock_event_set_next_event; + to->clkevt.rating = bits == 32 ? 250 : 100; + + clockevents_config_and_register(&to->clkevt, timer_of_rate(to), 0x1, + (1 << bits) - 1); pr_info("%pOF: STM32 clockevent driver initialized (%d bits)\n", - to->np, width == UINT_MAX ? 32 : 16); + to->np, bits); } static int __init stm32_timer_init(struct device_node *node) @@ -196,14 +241,26 @@ static int __init stm32_timer_init(struct device_node *node) if (ret) goto err; + to->private_data = kzalloc(sizeof(struct stm32_timer_private), + GFP_KERNEL); + if (!to->private_data) + goto deinit; + rstc = of_reset_control_get(node, NULL); if (!IS_ERR(rstc)) { reset_control_assert(rstc); reset_control_deassert(rstc); } + stm32_timer_set_width(to); + + stm32_timer_set_prescaler(to); + stm32_clockevent_init(to); return 0; + +deinit: + timer_of_cleanup(to); err: kfree(to); return ret; -- cgit v1.1 From f5ef02bd0e8cf53472cc358a542121366add0c9a Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 8 Jan 2018 14:28:57 +0100 Subject: clocksource/drivers/stm32: Add clocksource functionality The scene is set for the clocksource functionality, let's add it for this driver. Tested-by: Benjamin Gaignard Signed-off-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-18-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 0d37f1a..21b7492 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "timer-of.h" @@ -80,6 +81,13 @@ static int stm32_timer_of_bits_get(struct timer_of *to) return pd->bits; } +static void __iomem *stm32_timer_cnt __read_mostly; + +static u64 notrace stm32_read_sched_clock(void) +{ + return readl_relaxed(stm32_timer_cnt); +} + static void stm32_clock_event_disable(struct timer_of *to) { writel_relaxed(0, timer_of_base(to) + TIM_DIER); @@ -204,6 +212,31 @@ static void __init stm32_timer_set_prescaler(struct timer_of *to) to->of_clk.period = DIV_ROUND_UP(to->of_clk.rate, HZ); } +static int __init stm32_clocksource_init(struct timer_of *to) +{ + u32 bits = stm32_timer_of_bits_get(to); + const char *name = to->np->full_name; + + /* + * This driver allows to register several timers and relies on + * the generic time framework to select the right one. + * However, nothing allows to do the same for the + * sched_clock. We are not interested in a sched_clock for the + * 16-bit timers but only for the 32-bit one, so if no 32-bit + * timer is registered yet, we select this 32-bit timer as a + * sched_clock. + */ + if (bits == 32 && !stm32_timer_cnt) { + stm32_timer_cnt = timer_of_base(to) + TIM_CNT; + sched_clock_register(stm32_read_sched_clock, bits, timer_of_rate(to)); + pr_info("%s: STM32 sched_clock registered\n", name); + } + + return clocksource_mmio_init(timer_of_base(to) + TIM_CNT, name, + timer_of_rate(to), bits == 32 ? 250 : 100, + bits, clocksource_mmio_readl_up); +} + static void __init stm32_clockevent_init(struct timer_of *to) { u32 bits = stm32_timer_of_bits_get(to); @@ -256,6 +289,10 @@ static int __init stm32_timer_init(struct device_node *node) stm32_timer_set_prescaler(to); + ret = stm32_clocksource_init(to); + if (ret) + goto deinit; + stm32_clockevent_init(to); return 0; -- cgit v1.1 From 81abdbbffd69fecdac37fe1d2b44a21227ee23d4 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:58 +0100 Subject: clocksource/drivers/stm32: Add the timer delay callback Add the timer delay callback, that saves us ~90ms of boot time. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-19-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 21b7492..c7d1dae 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +89,13 @@ static u64 notrace stm32_read_sched_clock(void) return readl_relaxed(stm32_timer_cnt); } +static struct delay_timer stm32_timer_delay; + +static unsigned long stm32_read_delay(void) +{ + return readl_relaxed(stm32_timer_cnt); +} + static void stm32_clock_event_disable(struct timer_of *to) { writel_relaxed(0, timer_of_base(to) + TIM_DIER); @@ -230,6 +238,11 @@ static int __init stm32_clocksource_init(struct timer_of *to) stm32_timer_cnt = timer_of_base(to) + TIM_CNT; sched_clock_register(stm32_read_sched_clock, bits, timer_of_rate(to)); pr_info("%s: STM32 sched_clock registered\n", name); + + stm32_timer_delay.read_current_timer = stm32_read_delay; + stm32_timer_delay.freq = timer_of_rate(to); + register_current_timer_delay(&stm32_timer_delay); + pr_info("%s: STM32 delay timer registered\n", name); } return clocksource_mmio_init(timer_of_base(to) + TIM_CNT, name, -- cgit v1.1 From 103bb56a2831bfc7f2d442da9e47f89f37d34952 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:59 +0100 Subject: clocksource/drivers/stm32: Start the timer's counter sooner As we have a lot of timers on this platform, we can have potentially all the timers enabled in the DT, so we don't want to start the timer for every probe otherwise they will be running for nothing as only one will be used. Start the timer only when setting the mode or when the clocksource is enabled. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-20-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index c7d1dae..e5cdc3a 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -101,7 +101,15 @@ static void stm32_clock_event_disable(struct timer_of *to) writel_relaxed(0, timer_of_base(to) + TIM_DIER); } -static void stm32_clock_event_enable(struct timer_of *to) +/** + * stm32_timer_start - Start the counter without event + * @to: a timer_of structure pointer + * + * Start the timer in order to have the counter reset and start + * incrementing but disable interrupt event when there is a counter + * overflow. By default, the counter direction is used as upcounter. + */ +static void stm32_timer_start(struct timer_of *to) { writel_relaxed(TIM_CR1_UDIS | TIM_CR1_CEN, timer_of_base(to) + TIM_CR1); } @@ -137,7 +145,7 @@ static int stm32_clock_event_set_periodic(struct clock_event_device *clkevt) { struct timer_of *to = to_timer_of(clkevt); - stm32_clock_event_enable(to); + stm32_timer_start(to); return stm32_clock_event_set_next_event(timer_of_period(to), clkevt); } @@ -146,7 +154,7 @@ static int stm32_clock_event_set_oneshot(struct clock_event_device *clkevt) { struct timer_of *to = to_timer_of(clkevt); - stm32_clock_event_enable(to); + stm32_timer_start(to); return 0; } @@ -235,6 +243,13 @@ static int __init stm32_clocksource_init(struct timer_of *to) * sched_clock. */ if (bits == 32 && !stm32_timer_cnt) { + + /* + * Start immediately the counter as we will be using + * it right after. + */ + stm32_timer_start(to); + stm32_timer_cnt = timer_of_base(to) + TIM_CNT; sched_clock_register(stm32_read_sched_clock, bits, timer_of_rate(to)); pr_info("%s: STM32 sched_clock registered\n", name); -- cgit v1.1 From a9445e47d897054876b8f43e46dc5a3eca2b844d Mon Sep 17 00:00:00 2001 From: "Max R. P. Grossmann" Date: Mon, 8 Jan 2018 20:01:57 +0100 Subject: posix-cpu-timers: Make set_process_cpu_timer() more robust Because the return value of cpu_timer_sample_group() is not checked, compilers and static checkers can legitimately warn about a potential use of the uninitialized variable 'now'. This is not a runtime issue as all call sites hand in valid clock ids. Also cpu_timer_sample_group() is invoked unconditionally even when the result is not used because *oldval is NULL. Make the invocation conditional and check the return value. [ tglx: Massage changelog ] Signed-off-by: Max R. P. Grossmann Signed-off-by: Thomas Gleixner Cc: john.stultz@linaro.org Link: https://lkml.kernel.org/r/20180108190157.10048-1-m@max.pm --- kernel/time/posix-cpu-timers.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index cef79ca..ec9f5da 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1189,9 +1189,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, u64 now; WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); - cpu_timer_sample_group(clock_idx, tsk, &now); - if (oldval) { + if (oldval && cpu_timer_sample_group(clock_idx, tsk, &now) != -EINVAL) { /* * We are setting itimer. The *oldval is absolute and we update * it to be relative, *newval argument is relative and we update -- cgit v1.1 From ae67badaa1643253998cb21d5782e4ea7c231a29 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Jan 2018 23:30:51 +0100 Subject: hrtimer: Optimize the hrtimer code by using static keys for migration_enable/nohz_active The hrtimer_cpu_base::migration_enable and ::nohz_active fields were originally introduced to avoid accessing global variables for these decisions. Still that results in a (cache hot) load and conditional branch, which can be avoided by using static keys. Implement it with static keys and optimize for the most critical case of high performance networking which tends to disable the timer migration functionality. No change in functionality. Signed-off-by: Thomas Gleixner Cc: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: Frederic Weisbecker Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1801142327490.2371@nanos Link: https://lkml.kernel.org/r/20171221104205.7269-2-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 4 --- kernel/time/hrtimer.c | 17 +++------- kernel/time/tick-internal.h | 19 +++++++---- kernel/time/tick-sched.c | 2 +- kernel/time/timer.c | 83 +++++++++++++++++++++++---------------------- 5 files changed, 60 insertions(+), 65 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 012c37f..79b2a8d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -153,8 +153,6 @@ enum hrtimer_base_type { * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events - * @migration_enabled: The migration of hrtimers to other cpus is enabled - * @nohz_active: The nohz functionality is enabled * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @next_timer: Pointer to the first expiring timer @@ -178,8 +176,6 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; - bool migration_enabled; - bool nohz_active; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hres_active : 1, diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d325208..1d06d2b 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -178,23 +178,16 @@ hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) #endif } -#ifdef CONFIG_NO_HZ_COMMON -static inline -struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, - int pinned) -{ - if (pinned || !base->migration_enabled) - return base; - return &per_cpu(hrtimer_bases, get_nohz_timer_target()); -} -#else static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned) { +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) + if (static_branch_likely(&timers_migration_enabled) && !pinned) + return &per_cpu(hrtimer_bases, get_nohz_timer_target()); +#endif return base; } -#endif /* * We switch the timer base to a power-optimized selected CPU target, @@ -969,7 +962,7 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * Kick to reschedule the next tick to handle the new timer * on dynticks target. */ - if (new_base->cpu_base->nohz_active) + if (is_timers_nohz_active()) wake_up_nohz_cpu(new_base->cpu_base->cpu); } else { hrtimer_reprogram(timer, new_base); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f8e1845..f690628 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -150,14 +150,19 @@ static inline void tick_nohz_init(void) { } #ifdef CONFIG_NO_HZ_COMMON extern unsigned long tick_nohz_active; -#else +extern void timers_update_nohz(void); +extern struct static_key_false timers_nohz_active; +static inline bool is_timers_nohz_active(void) +{ + return static_branch_likely(&timers_nohz_active); +} +# ifdef CONFIG_SMP +extern struct static_key_false timers_migration_enabled; +# endif +#else /* CONFIG_NO_HZ_COMMON */ +static inline void timers_update_nohz(void) { } #define tick_nohz_active (0) -#endif - -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void timers_update_migration(bool update_nohz); -#else -static inline void timers_update_migration(bool update_nohz) { } +static inline bool is_timers_nohz_active(void) { return false; } #endif DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f7cc7ab..29a5733 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1107,7 +1107,7 @@ static inline void tick_nohz_activate(struct tick_sched *ts, int mode) ts->nohz_mode = mode; /* One update is enough */ if (!test_and_set_bit(0, &tick_nohz_active)) - timers_update_migration(true); + timers_update_nohz(); } /** diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 0bcf00e..d530f72 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -200,8 +200,6 @@ struct timer_base { unsigned long clk; unsigned long next_expiry; unsigned int cpu; - bool migration_enabled; - bool nohz_active; bool is_idle; bool must_forward_clk; DECLARE_BITMAP(pending_map, WHEEL_SIZE); @@ -210,45 +208,57 @@ struct timer_base { static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +#ifdef CONFIG_NO_HZ_COMMON + +DEFINE_STATIC_KEY_FALSE(timers_nohz_active); +static DEFINE_MUTEX(timer_keys_mutex); + +static void timer_update_keys(struct work_struct *work); +static DECLARE_WORK(timer_update_work, timer_update_keys); + +#ifdef CONFIG_SMP unsigned int sysctl_timer_migration = 1; -void timers_update_migration(bool update_nohz) +DEFINE_STATIC_KEY_FALSE(timers_migration_enabled); + +static void timers_update_migration(void) { - bool on = sysctl_timer_migration && tick_nohz_active; - unsigned int cpu; + if (sysctl_timer_migration && tick_nohz_active) + static_branch_enable(&timers_migration_enabled); + else + static_branch_disable(&timers_migration_enabled); +} +#else +static inline void timers_update_migration(void) { } +#endif /* !CONFIG_SMP */ - /* Avoid the loop, if nothing to update */ - if (this_cpu_read(timer_bases[BASE_STD].migration_enabled) == on) - return; +static void timer_update_keys(struct work_struct *work) +{ + mutex_lock(&timer_keys_mutex); + timers_update_migration(); + static_branch_enable(&timers_nohz_active); + mutex_unlock(&timer_keys_mutex); +} - for_each_possible_cpu(cpu) { - per_cpu(timer_bases[BASE_STD].migration_enabled, cpu) = on; - per_cpu(timer_bases[BASE_DEF].migration_enabled, cpu) = on; - per_cpu(hrtimer_bases.migration_enabled, cpu) = on; - if (!update_nohz) - continue; - per_cpu(timer_bases[BASE_STD].nohz_active, cpu) = true; - per_cpu(timer_bases[BASE_DEF].nohz_active, cpu) = true; - per_cpu(hrtimer_bases.nohz_active, cpu) = true; - } +void timers_update_nohz(void) +{ + schedule_work(&timer_update_work); } int timer_migration_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - static DEFINE_MUTEX(mutex); int ret; - mutex_lock(&mutex); + mutex_lock(&timer_keys_mutex); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) - timers_update_migration(false); - mutex_unlock(&mutex); + timers_update_migration(); + mutex_unlock(&timer_keys_mutex); return ret; } -#endif +#endif /* NO_HZ_COMMON */ static unsigned long round_jiffies_common(unsigned long j, int cpu, bool force_up) @@ -534,7 +544,7 @@ __internal_add_timer(struct timer_base *base, struct timer_list *timer) static void trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer) { - if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) + if (!is_timers_nohz_active()) return; /* @@ -849,21 +859,20 @@ static inline struct timer_base *get_timer_base(u32 tflags) return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK); } -#ifdef CONFIG_NO_HZ_COMMON static inline struct timer_base * get_target_base(struct timer_base *base, unsigned tflags) { -#ifdef CONFIG_SMP - if ((tflags & TIMER_PINNED) || !base->migration_enabled) - return get_timer_this_cpu_base(tflags); - return get_timer_cpu_base(tflags, get_nohz_timer_target()); -#else - return get_timer_this_cpu_base(tflags); +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) + if (static_branch_likely(&timers_migration_enabled) && + !(tflags & TIMER_PINNED)) + return get_timer_cpu_base(tflags, get_nohz_timer_target()); #endif + return get_timer_this_cpu_base(tflags); } static inline void forward_timer_base(struct timer_base *base) { +#ifdef CONFIG_NO_HZ_COMMON unsigned long jnow; /* @@ -887,16 +896,8 @@ static inline void forward_timer_base(struct timer_base *base) base->clk = jnow; else base->clk = base->next_expiry; -} -#else -static inline struct timer_base * -get_target_base(struct timer_base *base, unsigned tflags) -{ - return get_timer_this_cpu_base(tflags); -} - -static inline void forward_timer_base(struct timer_base *base) { } #endif +} /* -- cgit v1.1 From d05ca13b8d3f685667b3b1748fa89285466270c5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Dec 2017 11:41:31 +0100 Subject: hrtimer: Correct blatantly incorrect comment The protection of a hrtimer which runs its callback against migration to a different CPU has nothing to do with hard interrupt context. The protection against migration of a hrtimer running the expiry callback is the pointer in the cpu_base which holds a pointer to the currently running timer. This pointer is evaluated in the code which potentially switches the timer base and makes sure it's kept on the CPU on which the callback is running. Reported-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Reviewed-by: Frederic Weisbecker Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-3-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 1d06d2b..7687355 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1195,9 +1195,9 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, timer->is_rel = false; /* - * Because we run timers from hardirq context, there is no chance - * they get migrated to another cpu, therefore its safe to unlock - * the timer base. + * The timer is marked as running in the CPU base, so it is + * protected against migration to a different CPU even if the lock + * is dropped. */ raw_spin_unlock(&cpu_base->lock); trace_hrtimer_expire_entry(timer, now); -- cgit v1.1 From 1fbc78b3c980364d4fc15db83eca4a8e7ad289da Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:32 +0100 Subject: hrtimer: Fix kerneldoc syntax for 'struct hrtimer_cpu_base' The '/**' sequence marks the start of a structure description. Add the missing second asterisk. While at it adapt the ordering of the struct members to the struct definition and document the purpose of expires_next more precisely. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-4-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 79b2a8d..b3a382b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -144,7 +144,7 @@ enum hrtimer_base_type { HRTIMER_MAX_CLOCK_BASES, }; -/* +/** * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers @@ -153,12 +153,12 @@ enum hrtimer_base_type { * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events - * @expires_next: absolute time of the next event which was scheduled - * via clock_set_next_event() - * @next_timer: Pointer to the first expiring timer * @in_hrtirq: hrtimer_interrupt() is currently executing * @hres_active: State of high resolution mode * @hang_detected: The last hrtimer interrupt detected a hang + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue + * @next_timer: Pointer to the first expiring timer * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs -- cgit v1.1 From 907777136f80d0cc0f714e5a389c4dfa9b4670ee Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:33 +0100 Subject: hrtimer: Clean up the 'int clock' parameter of schedule_hrtimeout_range_clock() schedule_hrtimeout_range_clock() uses an 'int clock' parameter for the clock ID, instead of the customary predefined "clockid_t" type. In hrtimer coding style the canonical variable name for the clock ID is 'clock_id', therefore change the name of the parameter here as well to make it all consistent. While at it, clean up the description for the 'clock_id' and 'mode' function parameters. The clock modes and the clock IDs are not restricted as the comment suggests. Fix the mode description as well for the callers of schedule_hrtimeout_range_clock(). No functional changes intended. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-5-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 2 +- kernel/time/hrtimer.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index b3a382b..931ce9c 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -462,7 +462,7 @@ extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, extern int schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, const enum hrtimer_mode mode, - int clock); + clockid_t clock_id); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); /* Soft interrupt function to run the hrtimer queues: */ diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 7687355..f2de328 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1662,12 +1662,12 @@ void __init hrtimers_init(void) * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) * @delta: slack in expires timeout (ktime_t) - * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL - * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME + * @mode: timer mode + * @clock_id: timer clock to be used */ int __sched schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, - const enum hrtimer_mode mode, int clock) + const enum hrtimer_mode mode, clockid_t clock_id) { struct hrtimer_sleeper t; @@ -1688,7 +1688,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } - hrtimer_init_on_stack(&t.timer, clock, mode); + hrtimer_init_on_stack(&t.timer, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_init_sleeper(&t, current); @@ -1710,7 +1710,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) * @delta: slack in expires timeout (ktime_t) - * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * @mode: timer mode * * Make the current task sleep until the given expiry time has * elapsed. The routine will return immediately unless @@ -1749,7 +1749,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); /** * schedule_hrtimeout - sleep until timeout * @expires: timeout value (ktime_t) - * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * @mode: timer mode * * Make the current task sleep until the given expiry time has * elapsed. The routine will return immediately unless -- cgit v1.1 From 6de6250c759781daeadca784d0cc34ae73f3b502 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:34 +0100 Subject: hrtimer: Fix hrtimer_start[_range_ns]() function descriptions The hrtimer_start[_range_ns]() functions start a timer reliably on this CPU only when HRTIMER_MODE_PINNED is set. Furthermore the HRTIMER_MODE_PINNED mode is not considered when a hrtimer is initialized. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-6-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 6 +++--- kernel/time/hrtimer.c | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 931ce9c..4e6a884 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -361,11 +361,11 @@ extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 range_ns, const enum hrtimer_mode mode); /** - * hrtimer_start - (re)start an hrtimer on the current CPU + * hrtimer_start - (re)start an hrtimer * @timer: the timer to be added * @tim: expiry time - * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL) + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) */ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f2de328..fd08729 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -924,12 +924,12 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, } /** - * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU + * hrtimer_start_range_ns - (re)start an hrtimer * @timer: the timer to be added * @tim: expiry time * @delta_ns: "slack" range for the timer - * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL) + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) */ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode) @@ -1107,7 +1107,8 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, * hrtimer_init - initialize a timer to the given clock * @timer: the timer to be initialized * @clock_id: the clock to be used - * @mode: timer mode abs/rel + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL); pinned is not considered here! */ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) -- cgit v1.1 From 48d0c9becc7f3c66874c100c126459a9da0fdced Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:35 +0100 Subject: hrtimer: Ensure POSIX compliance (relative CLOCK_REALTIME hrtimers) The POSIX specification defines that relative CLOCK_REALTIME timers are not affected by clock modifications. Those timers have to use CLOCK_MONOTONIC to ensure POSIX compliance. The introduction of the additional HRTIMER_MODE_PINNED mode broke this requirement for pinned timers. There is no user space visible impact because user space timers are not using pinned mode, but for consistency reasons this needs to be fixed. Check whether the mode has the HRTIMER_MODE_REL bit set instead of comparing with HRTIMER_MODE_ABS. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Fixes: 597d0275736d ("timers: Framework for identifying pinned timers") Link: http://lkml.kernel.org/r/20171221104205.7269-7-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index fd08729..60faade 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1095,7 +1095,12 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, cpu_base = raw_cpu_ptr(&hrtimer_bases); - if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS) + /* + * POSIX magic: Relative CLOCK_REALTIME timers are not affected by + * clock modifications, so they needs to become CLOCK_MONOTONIC to + * ensure POSIX compliance. + */ + if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) clock_id = CLOCK_MONOTONIC; base = hrtimer_clockid_to_base(clock_id); -- cgit v1.1 From 19b51cb5ff6ab7957bcbbec4ff812b83208f7e99 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:36 +0100 Subject: hrtimer: Clean up 'enum hrtimer_mode' It's not obvious that the HRTIMER_MODE variants are bit combinations, because all modes are hard coded constants currently. Change it so the bit meanings are clear; and use the symbols for creating modes which combine bits. While at it get rid of the ugly tail comments as well. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-8-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4e6a884..28f267c 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -28,13 +28,19 @@ struct hrtimer_cpu_base; /* * Mode arguments of xxx_hrtimer functions: + * + * HRTIMER_MODE_ABS - Time value is absolute + * HRTIMER_MODE_REL - Time value is relative to now + * HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered + * when starting the timer) */ enum hrtimer_mode { - HRTIMER_MODE_ABS = 0x0, /* Time value is absolute */ - HRTIMER_MODE_REL = 0x1, /* Time value is relative to now */ - HRTIMER_MODE_PINNED = 0x02, /* Timer is bound to CPU */ - HRTIMER_MODE_ABS_PINNED = 0x02, - HRTIMER_MODE_REL_PINNED = 0x03, + HRTIMER_MODE_ABS = 0x00, + HRTIMER_MODE_REL = 0x01, + HRTIMER_MODE_PINNED = 0x02, + + HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, + HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, }; /* -- cgit v1.1 From 91633eed73a3ac37aaece5c8c1f93a18bae616a9 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:37 +0100 Subject: tracing/hrtimer: Fix tracing bugs by taking all clock bases and modes into account So far only CLOCK_MONOTONIC and CLOCK_REALTIME were taken into account as well as HRTIMER_MODE_ABS/REL in the hrtimer_init tracepoint. The query for detecting the ABS or REL timer modes is not valid anymore, it got broken by the introduction of HRTIMER_MODE_PINNED. HRTIMER_MODE_PINNED is not evaluated in the hrtimer_init() call, but for the sake of completeness print all given modes. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-9-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/trace/events/timer.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 16e305e..c6f7280 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -136,6 +136,20 @@ DEFINE_EVENT(timer_class, timer_cancel, TP_ARGS(timer) ); +#define decode_clockid(type) \ + __print_symbolic(type, \ + { CLOCK_REALTIME, "CLOCK_REALTIME" }, \ + { CLOCK_MONOTONIC, "CLOCK_MONOTONIC" }, \ + { CLOCK_BOOTTIME, "CLOCK_BOOTTIME" }, \ + { CLOCK_TAI, "CLOCK_TAI" }) + +#define decode_hrtimer_mode(mode) \ + __print_symbolic(mode, \ + { HRTIMER_MODE_ABS, "ABS" }, \ + { HRTIMER_MODE_REL, "REL" }, \ + { HRTIMER_MODE_ABS_PINNED, "ABS|PINNED" }, \ + { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }) + /** * hrtimer_init - called when the hrtimer is initialized * @hrtimer: pointer to struct hrtimer @@ -162,10 +176,8 @@ TRACE_EVENT(hrtimer_init, ), TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, - __entry->clockid == CLOCK_REALTIME ? - "CLOCK_REALTIME" : "CLOCK_MONOTONIC", - __entry->mode == HRTIMER_MODE_ABS ? - "HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL") + decode_clockid(__entry->clockid), + decode_hrtimer_mode(__entry->mode)) ); /** -- cgit v1.1 From 63e2ed3659752a4850e0ef3a07f809988fcd74a4 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:38 +0100 Subject: tracing/hrtimer: Print the hrtimer mode in the 'hrtimer_start' tracepoint The 'hrtimer_start' tracepoint lacks the mode information. The mode is important because consecutive starts can switch from ABS to REL or from PINNED to non PINNED. Append the mode field. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-10-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/trace/events/timer.h | 13 ++++++++----- kernel/time/hrtimer.c | 16 +++++++++------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index c6f7280..744b431 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -186,15 +186,16 @@ TRACE_EVENT(hrtimer_init, */ TRACE_EVENT(hrtimer_start, - TP_PROTO(struct hrtimer *hrtimer), + TP_PROTO(struct hrtimer *hrtimer, enum hrtimer_mode mode), - TP_ARGS(hrtimer), + TP_ARGS(hrtimer, mode), TP_STRUCT__entry( __field( void *, hrtimer ) __field( void *, function ) __field( s64, expires ) __field( s64, softexpires ) + __field( enum hrtimer_mode, mode ) ), TP_fast_assign( @@ -202,12 +203,14 @@ TRACE_EVENT(hrtimer_start, __entry->function = hrtimer->function; __entry->expires = hrtimer_get_expires(hrtimer); __entry->softexpires = hrtimer_get_softexpires(hrtimer); + __entry->mode = mode; ), - TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", - __entry->hrtimer, __entry->function, + TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu " + "mode=%s", __entry->hrtimer, __entry->function, (unsigned long long) __entry->expires, - (unsigned long long) __entry->softexpires) + (unsigned long long) __entry->softexpires, + decode_hrtimer_mode(__entry->mode)) ); /** diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 60faade..f4f4658 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -435,10 +435,11 @@ debug_init(struct hrtimer *timer, clockid_t clockid, trace_hrtimer_init(timer, clockid, mode); } -static inline void debug_activate(struct hrtimer *timer) +static inline void debug_activate(struct hrtimer *timer, + enum hrtimer_mode mode) { debug_hrtimer_activate(timer); - trace_hrtimer_start(timer); + trace_hrtimer_start(timer, mode); } static inline void debug_deactivate(struct hrtimer *timer) @@ -828,9 +829,10 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); * Returns 1 when the new timer is the leftmost timer in the tree. */ static int enqueue_hrtimer(struct hrtimer *timer, - struct hrtimer_clock_base *base) + struct hrtimer_clock_base *base, + enum hrtimer_mode mode) { - debug_activate(timer); + debug_activate(timer, mode); base->cpu_base->active_bases |= 1 << base->index; @@ -953,7 +955,7 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - leftmost = enqueue_hrtimer(timer, new_base); + leftmost = enqueue_hrtimer(timer, new_base, mode); if (!leftmost) goto unlock; @@ -1222,7 +1224,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, */ if (restart != HRTIMER_NORESTART && !(timer->state & HRTIMER_STATE_ENQUEUED)) - enqueue_hrtimer(timer, base); + enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS); /* * Separate the ->running assignment from the ->state assignment. @@ -1621,7 +1623,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, * sort out already expired timers and reprogram the * event device. */ - enqueue_hrtimer(timer, new_base); + enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS); } } -- cgit v1.1 From c272ca58c3ec5631f4ab507489d9477f74efe645 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:39 +0100 Subject: hrtimer: Switch 'for' loop to _ffs() evaluation Looping over all clock bases to find active bits is suboptimal if not all bases are active. Avoid this by converting it to a __ffs() evaluation. The functionallity is outsourced into its own function and is called via a macro as suggested by Peter Zijlstra. Suggested-by: Peter Zijlstra Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-11-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f4f4658..cfcf8de 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -448,6 +448,23 @@ static inline void debug_deactivate(struct hrtimer *timer) trace_hrtimer_cancel(timer); } +static struct hrtimer_clock_base * +__next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) +{ + unsigned int idx; + + if (!*active) + return NULL; + + idx = __ffs(*active); + *active &= ~(1U << idx); + + return &cpu_base->clock_base[idx]; +} + +#define for_each_active_base(base, cpu_base, active) \ + while ((base = __next_base((cpu_base), &(active)))) + #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base, struct hrtimer *timer) @@ -459,18 +476,15 @@ static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base, static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) { - struct hrtimer_clock_base *base = cpu_base->clock_base; + struct hrtimer_clock_base *base; unsigned int active = cpu_base->active_bases; ktime_t expires, expires_next = KTIME_MAX; hrtimer_update_next_timer(cpu_base, NULL); - for (; active; base++, active >>= 1) { + for_each_active_base(base, cpu_base, active) { struct timerqueue_node *next; struct hrtimer *timer; - if (!(active & 0x01)) - continue; - next = timerqueue_getnext(&base->active); timer = container_of(next, struct hrtimer, node); expires = ktime_sub(hrtimer_get_expires(timer), base->offset); @@ -1241,16 +1255,13 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) { - struct hrtimer_clock_base *base = cpu_base->clock_base; + struct hrtimer_clock_base *base; unsigned int active = cpu_base->active_bases; - for (; active; base++, active >>= 1) { + for_each_active_base(base, cpu_base, active) { struct timerqueue_node *node; ktime_t basenow; - if (!(active & 0x01)) - continue; - basenow = ktime_add(now, base->offset); while ((node = timerqueue_getnext(&base->active))) { -- cgit v1.1 From 3f0b9e8eec7262648ab9c8321bf931624ee5c10a Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:40 +0100 Subject: hrtimer: Store running timer in hrtimer_clock_base The pointer to the currently running timer is stored in hrtimer_cpu_base before the base lock is dropped and the callback is invoked. This results in two levels of indirections and the upcoming support for softirq based hrtimer requires splitting the "running" storage into soft and hard IRQ context expiry. Storing both in the cpu base would require conditionals in all code paths accessing that information. It's possible to have a per clock base sequence count and running pointer without changing the semantics of the related mechanisms because the timer base pointer cannot be changed while a timer is running the callback. Unfortunately this makes cpu_clock base larger than 32 bytes on 32-bit kernels. Instead of having huge gaps due to alignment, remove the alignment and let the compiler pack CPU base for 32-bit kernels. The resulting cache access patterns are fortunately not really different from the current behaviour. On 64-bit kernels the 64-byte alignment stays and the behaviour is unchanged. This was determined by analyzing the resulting layout and looking at the number of cache lines involved for the frequently used clocks. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-12-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 20 +++++++++----------- kernel/time/hrtimer.c | 28 +++++++++++++--------------- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 28f267c..1bae7b9 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -118,9 +118,9 @@ struct hrtimer_sleeper { }; #ifdef CONFIG_64BIT -# define HRTIMER_CLOCK_BASE_ALIGN 64 +# define __hrtimer_clock_base_align ____cacheline_aligned #else -# define HRTIMER_CLOCK_BASE_ALIGN 32 +# define __hrtimer_clock_base_align #endif /** @@ -129,18 +129,22 @@ struct hrtimer_sleeper { * @index: clock type index for per_cpu support when moving a * timer to a base on another cpu. * @clockid: clock id for per_cpu support + * @seq: seqcount around __run_hrtimer + * @running: pointer to the currently running hrtimer * @active: red black tree root node for the active timers * @get_time: function to retrieve the current time of the clock * @offset: offset of this clock to the monotonic base */ struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; - int index; + unsigned int index; clockid_t clockid; + seqcount_t seq; + struct hrtimer *running; struct timerqueue_head active; ktime_t (*get_time)(void); ktime_t offset; -} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); +} __hrtimer_clock_base_align; enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, @@ -154,8 +158,6 @@ enum hrtimer_base_type { * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers - * @seq: seqcount around __run_hrtimer - * @running: pointer to the currently running hrtimer * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events @@ -177,8 +179,6 @@ enum hrtimer_base_type { */ struct hrtimer_cpu_base { raw_spinlock_t lock; - seqcount_t seq; - struct hrtimer *running; unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; @@ -198,8 +198,6 @@ struct hrtimer_cpu_base { static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { - BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN); - timer->node.expires = time; timer->_softexpires = time; } @@ -424,7 +422,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) */ static inline int hrtimer_callback_running(struct hrtimer *timer) { - return timer->base->cpu_base->running == timer; + return timer->base->running == timer; } /* Forward a hrtimer so it expires after now: */ diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index cfcf8de..e56805f 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -70,7 +70,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = { .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), - .seq = SEQCNT_ZERO(hrtimer_bases.seq), .clock_base = { { @@ -118,7 +117,6 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { * timer->base->cpu_base */ static struct hrtimer_cpu_base migration_cpu_base = { - .seq = SEQCNT_ZERO(migration_cpu_base), .clock_base = { { .cpu_base = &migration_cpu_base, }, }, }; @@ -1148,19 +1146,19 @@ EXPORT_SYMBOL_GPL(hrtimer_init); */ bool hrtimer_active(const struct hrtimer *timer) { - struct hrtimer_cpu_base *cpu_base; + struct hrtimer_clock_base *base; unsigned int seq; do { - cpu_base = READ_ONCE(timer->base->cpu_base); - seq = raw_read_seqcount_begin(&cpu_base->seq); + base = READ_ONCE(timer->base); + seq = raw_read_seqcount_begin(&base->seq); if (timer->state != HRTIMER_STATE_INACTIVE || - cpu_base->running == timer) + base->running == timer) return true; - } while (read_seqcount_retry(&cpu_base->seq, seq) || - cpu_base != READ_ONCE(timer->base->cpu_base)); + } while (read_seqcount_retry(&base->seq, seq) || + base != READ_ONCE(timer->base)); return false; } @@ -1194,16 +1192,16 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, lockdep_assert_held(&cpu_base->lock); debug_deactivate(timer); - cpu_base->running = timer; + base->running = timer; /* * Separate the ->running assignment from the ->state assignment. * * As with a regular write barrier, this ensures the read side in - * hrtimer_active() cannot observe cpu_base->running == NULL && + * hrtimer_active() cannot observe base->running == NULL && * timer->state == INACTIVE. */ - raw_write_seqcount_barrier(&cpu_base->seq); + raw_write_seqcount_barrier(&base->seq); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); fn = timer->function; @@ -1244,13 +1242,13 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, * Separate the ->running assignment from the ->state assignment. * * As with a regular write barrier, this ensures the read side in - * hrtimer_active() cannot observe cpu_base->running == NULL && + * hrtimer_active() cannot observe base->running.timer == NULL && * timer->state == INACTIVE. */ - raw_write_seqcount_barrier(&cpu_base->seq); + raw_write_seqcount_barrier(&base->seq); - WARN_ON_ONCE(cpu_base->running != timer); - cpu_base->running = NULL; + WARN_ON_ONCE(base->running != timer); + base->running = NULL; } static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) -- cgit v1.1 From da21c5a58a7f30db69e04e06dfb6777ccbb1113c Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:41 +0100 Subject: hrtimer: Make room in 'struct hrtimer_cpu_base' The upcoming softirq based hrtimers support requires an additional field in the hrtimer_cpu_base struct, which would grow the struct size beyond a cache line. The hrtimer_cpu_base::nr_retries and ::nr_hangs members are solely used for diagnostic output and have no requirement to be 'unsigned int'. Make them 'unsigned short' to create room for the new struct member. No functional change. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-13-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1bae7b9..56e56bc 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -189,8 +189,8 @@ struct hrtimer_cpu_base { ktime_t expires_next; struct hrtimer *next_timer; unsigned int nr_events; - unsigned int nr_retries; - unsigned int nr_hangs; + unsigned short nr_retries; + unsigned short nr_hangs; unsigned int max_hang_time; #endif struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; -- cgit v1.1 From 28bfd18bf3daa5db8bb3422ea7138c8b7d2444ac Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:42 +0100 Subject: hrtimer: Make the hrtimer_cpu_base::hres_active field unconditional, to simplify the code The hrtimer_cpu_base::hres_active_member field depends on CONFIG_HIGH_RES_TIMERS=y currently, and all related functions to this member are conditional as well. To simplify the code make it unconditional and set it to zero during initialization. (This will also help with the upcoming softirq based hrtimers code.) The conditional code sections can be avoided by adding IS_ENABLED(HIGHRES) conditionals into common functions, which ensures dead code elimination. There is no functional change. Suggested-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-14-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 20 ++++++++------------ kernel/time/hrtimer.c | 31 +++++++++++++++---------------- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 56e56bc..22627b3 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -161,8 +161,8 @@ enum hrtimer_base_type { * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events - * @in_hrtirq: hrtimer_interrupt() is currently executing * @hres_active: State of high resolution mode + * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue @@ -182,9 +182,9 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; + unsigned int hres_active : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, - hres_active : 1, hang_detected : 1; ktime_t expires_next; struct hrtimer *next_timer; @@ -266,16 +266,17 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) return timer->base->get_time(); } +static inline int hrtimer_is_hres_active(struct hrtimer *timer) +{ + return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? + timer->base->cpu_base->hres_active : 0; +} + #ifdef CONFIG_HIGH_RES_TIMERS struct clock_event_device; extern void hrtimer_interrupt(struct clock_event_device *dev); -static inline int hrtimer_is_hres_active(struct hrtimer *timer) -{ - return timer->base->cpu_base->hres_active; -} - /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an @@ -298,11 +299,6 @@ extern unsigned int hrtimer_resolution; #define hrtimer_resolution (unsigned int)LOW_RES_NSEC -static inline int hrtimer_is_hres_active(struct hrtimer *timer) -{ - return 0; -} - static inline void clock_was_set_delayed(void) { } #endif diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e56805f..b688090 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -512,6 +512,20 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) offs_real, offs_boot, offs_tai); } +/* + * Is the high resolution mode active ? + */ +static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) +{ + return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? + cpu_base->hres_active : 0; +} + +static inline int hrtimer_hres_active(void) +{ + return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); +} + /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -541,19 +555,6 @@ static inline int hrtimer_is_hres_enabled(void) } /* - * Is the high resolution mode active ? - */ -static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) -{ - return cpu_base->hres_active; -} - -static inline int hrtimer_hres_active(void) -{ - return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); -} - -/* * Reprogram the event source with checking both queues for the * next event * Called with interrupts disabled and base->lock held @@ -661,7 +662,6 @@ static void hrtimer_reprogram(struct hrtimer *timer, static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { base->expires_next = KTIME_MAX; - base->hres_active = 0; } /* @@ -720,8 +720,6 @@ void clock_was_set_delayed(void) #else -static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *b) { return 0; } -static inline int hrtimer_hres_active(void) { return 0; } static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline void hrtimer_switch_to_hres(void) { } static inline void @@ -1600,6 +1598,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) } cpu_base->cpu = cpu; + cpu_base->hres_active = 0; hrtimer_init_hres(cpu_base); return 0; } -- cgit v1.1 From 851cff8caf4d638d001aac6e57a3511abd94f100 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:43 +0100 Subject: hrtimer: Use accesor functions instead of direct access __hrtimer_hres_active() is now available unconditionally, so replace open coded direct accesses to hrtimer_cpu_base.hres_active. No functional change. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-15-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index b688090..5a624f9 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -564,7 +564,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { ktime_t expires_next; - if (!cpu_base->hres_active) + if (!__hrtimer_hres_active(cpu_base)) return; expires_next = __hrtimer_get_next_event(cpu_base); @@ -673,7 +673,7 @@ static void retrigger_next_event(void *arg) { struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); - if (!base->hres_active) + if (!__hrtimer_hres_active(base)) return; raw_spin_lock(&base->lock); -- cgit v1.1 From 07a9a7eae86abb796468b225586086d7c4cb59fc Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:44 +0100 Subject: hrtimer: Make the remote enqueue check unconditional hrtimer_cpu_base.expires_next is used to cache the next event armed in the timer hardware. The value is used to check whether an hrtimer can be enqueued remotely. If the new hrtimer is expiring before expires_next, then remote enqueue is not possible as the remote hrtimer hardware cannot be accessed for reprogramming to an earlier expiry time. The remote enqueue check is currently conditional on CONFIG_HIGH_RES_TIMERS=y and hrtimer_cpu_base.hres_active. There is no compelling reason to make this conditional. Move hrtimer_cpu_base.expires_next out of the CONFIG_HIGH_RES_TIMERS=y guarded area and remove the conditionals in hrtimer_check_target(). The check is currently a NOOP for the CONFIG_HIGH_RES_TIMERS=n and the !hrtimer_cpu_base.hres_active case because in these cases nothing updates hrtimer_cpu_base.expires_next yet. This will be changed with later patches which further reduce the #ifdef zoo in this code. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-16-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 6 +++--- kernel/time/hrtimer.c | 26 ++++++-------------------- 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 22627b3..bb7270e 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -164,13 +164,13 @@ enum hrtimer_base_type { * @hres_active: State of high resolution mode * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang - * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue * @next_timer: Pointer to the first expiring timer * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue * @clock_base: array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). @@ -186,13 +186,13 @@ struct hrtimer_cpu_base { #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hang_detected : 1; - ktime_t expires_next; struct hrtimer *next_timer; unsigned int nr_events; unsigned short nr_retries; unsigned short nr_hangs; unsigned int max_hang_time; #endif + ktime_t expires_next; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5a624f9..a9ab67f 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -154,26 +154,21 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, } /* - * With HIGHRES=y we do not migrate the timer when it is expiring - * before the next event on the target cpu because we cannot reprogram - * the target cpu hardware and we would cause it to fire late. + * We do not migrate the timer when it is expiring before the next + * event on the target cpu. When high resolution is enabled, we cannot + * reprogram the target cpu hardware and we would cause it to fire + * late. To keep it simple, we handle the high resolution enabled and + * disabled case similar. * * Called with cpu_base->lock of target cpu held. */ static int hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) { -#ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires; - if (!new_base->cpu_base->hres_active) - return 0; - expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); return expires <= new_base->cpu_base->expires_next; -#else - return 0; -#endif } static inline @@ -657,14 +652,6 @@ static void hrtimer_reprogram(struct hrtimer *timer, } /* - * Initialize the high resolution related parts of cpu_base - */ -static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) -{ - base->expires_next = KTIME_MAX; -} - -/* * Retrigger next event is called after clock was set * * Called with interrupts disabled via on_each_cpu() @@ -729,7 +716,6 @@ static inline int hrtimer_reprogram(struct hrtimer *timer, { return 0; } -static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } static inline void retrigger_next_event(void *arg) { } #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -1599,7 +1585,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->cpu = cpu; cpu_base->hres_active = 0; - hrtimer_init_hres(cpu_base); + cpu_base->expires_next = KTIME_MAX; return 0; } -- cgit v1.1 From eb27926ba05233dc4f2052cc9d4f19359ec3cd2c Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:45 +0100 Subject: hrtimer: Make hrtimer_cpu_base.next_timer handling unconditional hrtimer_cpu_base.next_timer stores the pointer to the next expiring timer in a CPU base. This pointer cannot be dereferenced and is solely used to check whether a hrtimer which is removed is the hrtimer which is the first to expire in the CPU base. If this is the case, then the timer hardware needs to be reprogrammed to avoid an extra interrupt for nothing. Again, this is conditional functionality, but there is no compelling reason to make this conditional. As a preparation, hrtimer_cpu_base.next_timer needs to be available unconditonally. Aside of that the upcoming support for softirq based hrtimers requires access to this pointer unconditionally as well, so our motivation is not entirely simplicity based. Make the update of hrtimer_cpu_base.next_timer unconditional and remove the #ifdef cruft. The impact on CONFIG_HIGH_RES_TIMERS=n && CONFIG_NOHZ=n is marginal as it's just a store on an already dirtied cacheline. No functional change. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-17-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 4 ++-- kernel/time/hrtimer.c | 12 ++---------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bb7270e..2d3e1d6 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -164,13 +164,13 @@ enum hrtimer_base_type { * @hres_active: State of high resolution mode * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang - * @next_timer: Pointer to the first expiring timer * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue + * @next_timer: Pointer to the first expiring timer * @clock_base: array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). @@ -186,13 +186,13 @@ struct hrtimer_cpu_base { #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hang_detected : 1; - struct hrtimer *next_timer; unsigned int nr_events; unsigned short nr_retries; unsigned short nr_hangs; unsigned int max_hang_time; #endif ktime_t expires_next; + struct hrtimer *next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index a9ab67f..26abaa7 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -459,21 +459,13 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) while ((base = __next_base((cpu_base), &(active)))) #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) -static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base, - struct hrtimer *timer) -{ -#ifdef CONFIG_HIGH_RES_TIMERS - cpu_base->next_timer = timer; -#endif -} - static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) { struct hrtimer_clock_base *base; unsigned int active = cpu_base->active_bases; ktime_t expires, expires_next = KTIME_MAX; - hrtimer_update_next_timer(cpu_base, NULL); + cpu_base->next_timer = NULL; for_each_active_base(base, cpu_base, active) { struct timerqueue_node *next; struct hrtimer *timer; @@ -483,7 +475,7 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires < expires_next) { expires_next = expires; - hrtimer_update_next_timer(cpu_base, timer); + cpu_base->next_timer = timer; } } /* -- cgit v1.1 From 11a9fe069e341ac53bddb8fe1a85ea986cff1a42 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:46 +0100 Subject: hrtimer: Make hrtimer_reprogramm() unconditional hrtimer_reprogram() needs to be available unconditionally for softirq based hrtimers. Move the function and all required struct members out of the CONFIG_HIGH_RES_TIMERS #ifdef. There is no functional change because hrtimer_reprogram() is only invoked when hrtimer_cpu_base.hres_active is true. Making it unconditional increases the text size for the CONFIG_HIGH_RES_TIMERS=n case, but avoids replication of that code for the upcoming softirq based hrtimers support. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-18-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 6 +-- kernel/time/hrtimer.c | 129 +++++++++++++++++++++++------------------------- 2 files changed, 65 insertions(+), 70 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2d3e1d6..98ed357 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -182,10 +182,10 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; - unsigned int hres_active : 1; -#ifdef CONFIG_HIGH_RES_TIMERS - unsigned int in_hrtirq : 1, + unsigned int hres_active : 1, + in_hrtirq : 1, hang_detected : 1; +#ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; unsigned short nr_hangs; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 26abaa7..63d804a 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -582,68 +582,6 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) } /* - * When a timer is enqueued and expires earlier than the already enqueued - * timers, we have to check, whether it expires earlier than the timer for - * which the clock event device was armed. - * - * Called with interrupts disabled and base->cpu_base.lock held - */ -static void hrtimer_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) -{ - struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); - ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); - - WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); - - /* - * If the timer is not on the current cpu, we cannot reprogram - * the other cpus clock event device. - */ - if (base->cpu_base != cpu_base) - return; - - /* - * If the hrtimer interrupt is running, then it will - * reevaluate the clock bases and reprogram the clock event - * device. The callbacks are always executed in hard interrupt - * context so we don't need an extra check for a running - * callback. - */ - if (cpu_base->in_hrtirq) - return; - - /* - * CLOCK_REALTIME timer might be requested with an absolute - * expiry time which is less than base->offset. Set it to 0. - */ - if (expires < 0) - expires = 0; - - if (expires >= cpu_base->expires_next) - return; - - /* Update the pointer to the next expiring timer */ - cpu_base->next_timer = timer; - - /* - * If a hang was detected in the last timer interrupt then we - * do not schedule a timer which is earlier than the expiry - * which we enforced in the hang detection. We want the system - * to make progress. - */ - if (cpu_base->hang_detected) - return; - - /* - * Program the timer hardware. We enforce the expiry for - * events which are already in the past. - */ - cpu_base->expires_next = expires; - tick_program_event(expires, 1); -} - -/* * Retrigger next event is called after clock was set * * Called with interrupts disabled via on_each_cpu() @@ -703,16 +641,73 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline void hrtimer_switch_to_hres(void) { } static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } -static inline int hrtimer_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) -{ - return 0; -} static inline void retrigger_next_event(void *arg) { } #endif /* CONFIG_HIGH_RES_TIMERS */ /* + * When a timer is enqueued and expires earlier than the already enqueued + * timers, we have to check, whether it expires earlier than the timer for + * which the clock event device was armed. + * + * Called with interrupts disabled and base->cpu_base.lock held + */ +static void hrtimer_reprogram(struct hrtimer *timer, + struct hrtimer_clock_base *base) +{ + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + + WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); + + /* + * If the timer is not on the current cpu, we cannot reprogram + * the other cpus clock event device. + */ + if (base->cpu_base != cpu_base) + return; + + /* + * If the hrtimer interrupt is running, then it will + * reevaluate the clock bases and reprogram the clock event + * device. The callbacks are always executed in hard interrupt + * context so we don't need an extra check for a running + * callback. + */ + if (cpu_base->in_hrtirq) + return; + + /* + * CLOCK_REALTIME timer might be requested with an absolute + * expiry time which is less than base->offset. Set it to 0. + */ + if (expires < 0) + expires = 0; + + if (expires >= cpu_base->expires_next) + return; + + /* Update the pointer to the next expiring timer */ + cpu_base->next_timer = timer; + + /* + * If a hang was detected in the last timer interrupt then we + * do not schedule a timer which is earlier than the expiry + * which we enforced in the hang detection. We want the system + * to make progress. + */ + if (cpu_base->hang_detected) + return; + + /* + * Program the timer hardware. We enforce the expiry for + * events which are already in the past. + */ + cpu_base->expires_next = expires; + tick_program_event(expires, 1); +} + +/* * Clock realtime was set * * Change the offset of the realtime clock vs. the monotonic -- cgit v1.1 From ebba2c723f38a766546b2eaf828c522576c791d4 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:47 +0100 Subject: hrtimer: Make hrtimer_force_reprogramm() unconditionally available hrtimer_force_reprogram() needs to be available unconditionally for softirq based hrtimers. Move the function and all required struct members out of the CONFIG_HIGH_RES_TIMERS #ifdef. There is no functional change because hrtimer_force_reprogram() is only invoked when hrtimer_cpu_base.hres_active is true and CONFIG_HIGH_RES_TIMERS=y. Making it unconditional increases the text size for the CONFIG_HIGH_RES_TIMERS=n case slightly, but avoids replication of that code for the upcoming softirq based hrtimers support. Most of the code gets eliminated in the CONFIG_HIGH_RES_TIMERS=n case by the compiler. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-19-anna-maria@linutronix.de [ Made it build on !CONFIG_HIGH_RES_TIMERS ] Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 60 ++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 63d804a..2b3222e 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -458,7 +458,6 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) #define for_each_active_base(base, cpu_base, active) \ while ((base = __next_base((cpu_base), &(active)))) -#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) { struct hrtimer_clock_base *base; @@ -487,7 +486,6 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) expires_next = 0; return expires_next; } -#endif static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) { @@ -513,34 +511,6 @@ static inline int hrtimer_hres_active(void) return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); } -/* High resolution timer related functions */ -#ifdef CONFIG_HIGH_RES_TIMERS - -/* - * High resolution timer enabled ? - */ -static bool hrtimer_hres_enabled __read_mostly = true; -unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; -EXPORT_SYMBOL_GPL(hrtimer_resolution); - -/* - * Enable / Disable high resolution mode - */ -static int __init setup_hrtimer_hres(char *str) -{ - return (kstrtobool(str, &hrtimer_hres_enabled) == 0); -} - -__setup("highres=", setup_hrtimer_hres); - -/* - * hrtimer_high_res_enabled - query, if the highres mode is enabled - */ -static inline int hrtimer_is_hres_enabled(void) -{ - return hrtimer_hres_enabled; -} - /* * Reprogram the event source with checking both queues for the * next event @@ -581,6 +551,34 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) tick_program_event(cpu_base->expires_next, 1); } +/* High resolution timer related functions */ +#ifdef CONFIG_HIGH_RES_TIMERS + +/* + * High resolution timer enabled ? + */ +static bool hrtimer_hres_enabled __read_mostly = true; +unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; +EXPORT_SYMBOL_GPL(hrtimer_resolution); + +/* + * Enable / Disable high resolution mode + */ +static int __init setup_hrtimer_hres(char *str) +{ + return (kstrtobool(str, &hrtimer_hres_enabled) == 0); +} + +__setup("highres=", setup_hrtimer_hres); + +/* + * hrtimer_high_res_enabled - query, if the highres mode is enabled + */ +static inline int hrtimer_is_hres_enabled(void) +{ + return hrtimer_hres_enabled; +} + /* * Retrigger next event is called after clock was set * @@ -639,8 +637,6 @@ void clock_was_set_delayed(void) static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline void hrtimer_switch_to_hres(void) { } -static inline void -hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } static inline void retrigger_next_event(void *arg) { } #endif /* CONFIG_HIGH_RES_TIMERS */ -- cgit v1.1 From 61bb4bcb79c7afcd0bf0d20aef4704977172fd60 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:48 +0100 Subject: hrtimer: Unify hrtimer removal handling When the first hrtimer on the current CPU is removed, hrtimer_force_reprogram() is invoked but only when CONFIG_HIGH_RES_TIMERS=y and hrtimer_cpu_base.hres_active is set. hrtimer_force_reprogram() updates hrtimer_cpu_base.expires_next and reprograms the clock event device. When CONFIG_HIGH_RES_TIMERS=y and hrtimer_cpu_base.hres_active is set, a pointless hrtimer interrupt can be prevented. hrtimer_check_target() makes the 'can remote enqueue' decision. As soon as hrtimer_check_target() is unconditionally available and hrtimer_cpu_base.expires_next is updated by hrtimer_reprogram(), hrtimer_force_reprogram() needs to be available unconditionally as well to prevent the following scenario with CONFIG_HIGH_RES_TIMERS=n: - the first hrtimer on this CPU is removed and hrtimer_force_reprogram() is not executed - CPU goes idle (next timer is calculated and hrtimers are taken into account) - a hrtimer is enqueued remote on the idle CPU: hrtimer_check_target() compares expiry value and hrtimer_cpu_base.expires_next. The expiry value is after expires_next, so the hrtimer is enqueued. This timer will fire late, if it expires before the effective first hrtimer on this CPU and the comparison was with an outdated expires_next value. To prevent this scenario, make hrtimer_force_reprogram() unconditional except the effective reprogramming part, which gets eliminated by the compiler in the CONFIG_HIGH_RES_TIMERS=n case. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-20-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 2b3222e..e6a78ae 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -521,9 +521,6 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { ktime_t expires_next; - if (!__hrtimer_hres_active(cpu_base)) - return; - expires_next = __hrtimer_get_next_event(cpu_base); if (skip_equal && expires_next == cpu_base->expires_next) @@ -532,6 +529,9 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) cpu_base->expires_next = expires_next; /* + * If hres is not active, hardware does not have to be + * reprogrammed yet. + * * If a hang was detected in the last timer interrupt then we * leave the hang delay active in the hardware. We want the * system to make progress. That also prevents the following @@ -545,7 +545,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) * set. So we'd effectivly block all timers until the T2 event * fires. */ - if (cpu_base->hang_detected) + if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) return; tick_program_event(cpu_base->expires_next, 1); @@ -844,7 +844,6 @@ static void __remove_hrtimer(struct hrtimer *timer, if (!timerqueue_del(&base->active, &timer->node)) cpu_base->active_bases &= ~(1 << base->index); -#ifdef CONFIG_HIGH_RES_TIMERS /* * Note: If reprogram is false we do not update * cpu_base->next_timer. This happens when we remove the first @@ -855,7 +854,6 @@ static void __remove_hrtimer(struct hrtimer *timer, */ if (reprogram && timer == cpu_base->next_timer) hrtimer_force_reprogram(cpu_base, 1); -#endif } /* -- cgit v1.1 From 14c803419de6acba08e143d51813ac5e0f3443b8 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:49 +0100 Subject: hrtimer: Unify remote enqueue handling hrtimer_reprogram() is conditionally invoked from hrtimer_start_range_ns() when hrtimer_cpu_base.hres_active is true. In the !hres_active case there is a special condition for the nohz_active case: If the newly enqueued timer expires before the first expiring timer on a remote CPU then the remote CPU needs to be notified and woken up from a NOHZ idle sleep to take the new first expiring timer into account. Previous changes have already established the prerequisites to make the remote enqueue behaviour the same whether high resolution mode is active or not: If the to be enqueued timer expires before the first expiring timer on a remote CPU, then it cannot be enqueued there. This was done for the high resolution mode because there is no way to access the remote CPU timer hardware. The same is true for NOHZ, but was handled differently by unconditionally enqueuing the timer and waking up the remote CPU so it can reprogram its timer. Again there is no compelling reason for this difference. hrtimer_check_target(), which makes the 'can remote enqueue' decision is already unconditional, but not yet functional because nothing updates hrtimer_cpu_base.expires_next in the !hres_active case. To unify this the following changes are required: 1) Make the store of the new first expiry time unconditonal in hrtimer_reprogram() and check __hrtimer_hres_active() before proceeding to the actual hardware access. This check also lets the compiler eliminate the rest of the function in case of CONFIG_HIGH_RES_TIMERS=n. 2) Invoke hrtimer_reprogram() unconditionally from hrtimer_start_range_ns() 3) Remove the remote wakeup special case for the !high_res && nohz_active case. Confine the timers_nohz_active static key to timer.c which is the only user now. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-21-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 18 ++++++------------ kernel/time/tick-internal.h | 6 ------ kernel/time/timer.c | 9 ++++++++- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e6a78ae..1c68bf2 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -685,21 +685,24 @@ static void hrtimer_reprogram(struct hrtimer *timer, /* Update the pointer to the next expiring timer */ cpu_base->next_timer = timer; + cpu_base->expires_next = expires; /* + * If hres is not active, hardware does not have to be + * programmed yet. + * * If a hang was detected in the last timer interrupt then we * do not schedule a timer which is earlier than the expiry * which we enforced in the hang detection. We want the system * to make progress. */ - if (cpu_base->hang_detected) + if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) return; /* * Program the timer hardware. We enforce the expiry for * events which are already in the past. */ - cpu_base->expires_next = expires; tick_program_event(expires, 1); } @@ -936,16 +939,7 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, if (!leftmost) goto unlock; - if (!hrtimer_is_hres_active(timer)) { - /* - * Kick to reschedule the next tick to handle the new timer - * on dynticks target. - */ - if (is_timers_nohz_active()) - wake_up_nohz_cpu(new_base->cpu_base->cpu); - } else { - hrtimer_reprogram(timer, new_base); - } + hrtimer_reprogram(timer, new_base); unlock: unlock_hrtimer_base(timer, &flags); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f690628..e277284 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -151,18 +151,12 @@ static inline void tick_nohz_init(void) { } #ifdef CONFIG_NO_HZ_COMMON extern unsigned long tick_nohz_active; extern void timers_update_nohz(void); -extern struct static_key_false timers_nohz_active; -static inline bool is_timers_nohz_active(void) -{ - return static_branch_likely(&timers_nohz_active); -} # ifdef CONFIG_SMP extern struct static_key_false timers_migration_enabled; # endif #else /* CONFIG_NO_HZ_COMMON */ static inline void timers_update_nohz(void) { } #define tick_nohz_active (0) -static inline bool is_timers_nohz_active(void) { return false; } #endif DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index d530f72..48150ab 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -210,7 +210,7 @@ static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); #ifdef CONFIG_NO_HZ_COMMON -DEFINE_STATIC_KEY_FALSE(timers_nohz_active); +static DEFINE_STATIC_KEY_FALSE(timers_nohz_active); static DEFINE_MUTEX(timer_keys_mutex); static void timer_update_keys(struct work_struct *work); @@ -258,6 +258,13 @@ int timer_migration_handler(struct ctl_table *table, int write, mutex_unlock(&timer_keys_mutex); return ret; } + +static inline bool is_timers_nohz_active(void) +{ + return static_branch_unlikely(&timers_nohz_active); +} +#else +static inline bool is_timers_nohz_active(void) { return false; } #endif /* NO_HZ_COMMON */ static unsigned long round_jiffies_common(unsigned long j, int cpu, -- cgit v1.1 From 2ac2dccce9d16a7b1a8fddf69a955d249375bce4 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:50 +0100 Subject: hrtimer: Make remote enqueue decision less restrictive The current decision whether a timer can be queued on a remote CPU checks for timer->expiry <= remote_cpu_base.expires_next. This is too restrictive because a timer with the same expiry time as an existing timer will be enqueued on right-hand size of the existing timer inside the rbtree, i.e. behind the first expiring timer. So its safe to allow enqueuing timers with the same expiry time as the first expiring timer on a remote CPU base. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-22-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 1c68bf2..f4a56fb 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -168,7 +168,7 @@ hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) ktime_t expires; expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); - return expires <= new_base->cpu_base->expires_next; + return expires < new_base->cpu_base->expires_next; } static inline -- cgit v1.1 From 3ec7a3ee9f15f6dcac1591902d85b94c2a4b520d Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:51 +0100 Subject: hrtimer: Remove the 'base' parameter from hrtimer_reprogram() hrtimer_reprogram() must have access to the hrtimer_clock_base of the new first expiring timer to access hrtimer_clock_base.offset for adjusting the expiry time to CLOCK_MONOTONIC. This is required to evaluate whether the new left most timer in the hrtimer_clock_base is the first expiring timer of all clock bases in a hrtimer_cpu_base. The only user of hrtimer_reprogram() is hrtimer_start_range_ns(), which has a pointer to hrtimer_clock_base() already and hands it in as a parameter. But hrtimer_start_range_ns() will be split for the upcoming support for softirq based hrtimers to avoid code duplication and will lose the direct access to the clock base pointer. Instead of handing in timer and timer->base as a parameter remove the base parameter from hrtimer_reprogram() instead and retrieve the clock base internally. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-23-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f4a56fb..33a6c99 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -648,10 +648,10 @@ static inline void retrigger_next_event(void *arg) { } * * Called with interrupts disabled and base->cpu_base.lock held */ -static void hrtimer_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) +static void hrtimer_reprogram(struct hrtimer *timer) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + struct hrtimer_clock_base *base = timer->base; ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); @@ -939,7 +939,7 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, if (!leftmost) goto unlock; - hrtimer_reprogram(timer, new_base); + hrtimer_reprogram(timer); unlock: unlock_hrtimer_base(timer, &flags); } -- cgit v1.1 From 138a6b7ae4dedde5513678f57b275eee19c41b6a Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:52 +0100 Subject: hrtimer: Factor out __hrtimer_start_range_ns() Preparatory patch for softirq based hrtimers to avoid code duplication, factor out the __hrtimer_start_range_ns() function from hrtimer_start_range_ns(). No functional change. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-24-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 33a6c99..4142e6f 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -905,22 +905,11 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, return tim; } -/** - * hrtimer_start_range_ns - (re)start an hrtimer - * @timer: the timer to be added - * @tim: expiry time - * @delta_ns: "slack" range for the timer - * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) - */ -void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, - u64 delta_ns, const enum hrtimer_mode mode) +static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + u64 delta_ns, const enum hrtimer_mode mode, + struct hrtimer_clock_base *base) { - struct hrtimer_clock_base *base, *new_base; - unsigned long flags; - int leftmost; - - base = lock_hrtimer_base(timer, &flags); + struct hrtimer_clock_base *new_base; /* Remove an active timer from the queue: */ remove_hrtimer(timer, base, true); @@ -935,12 +924,27 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - leftmost = enqueue_hrtimer(timer, new_base, mode); - if (!leftmost) - goto unlock; + return enqueue_hrtimer(timer, new_base, mode); +} +/** + * hrtimer_start_range_ns - (re)start an hrtimer + * @timer: the timer to be added + * @tim: expiry time + * @delta_ns: "slack" range for the timer + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) + */ +void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + u64 delta_ns, const enum hrtimer_mode mode) +{ + struct hrtimer_clock_base *base; + unsigned long flags; + + base = lock_hrtimer_base(timer, &flags); + + if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) + hrtimer_reprogram(timer); - hrtimer_reprogram(timer); -unlock: unlock_hrtimer_base(timer, &flags); } EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); -- cgit v1.1 From ad38f596d8e4babc19be8b21a7a49debffb4a7f5 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:53 +0100 Subject: hrtimer: Factor out __hrtimer_next_event_base() Preparatory patch for softirq based hrtimers to avoid code duplication. No functional change. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-25-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 4142e6f..5d9b81d 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -458,13 +458,13 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) #define for_each_active_base(base, cpu_base, active) \ while ((base = __next_base((cpu_base), &(active)))) -static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) +static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, + unsigned int active, + ktime_t expires_next) { struct hrtimer_clock_base *base; - unsigned int active = cpu_base->active_bases; - ktime_t expires, expires_next = KTIME_MAX; + ktime_t expires; - cpu_base->next_timer = NULL; for_each_active_base(base, cpu_base, active) { struct timerqueue_node *next; struct hrtimer *timer; @@ -487,6 +487,18 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) return expires_next; } +static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) +{ + unsigned int active = cpu_base->active_bases; + ktime_t expires_next = KTIME_MAX; + + cpu_base->next_timer = NULL; + + expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + + return expires_next; +} + static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) { ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; -- cgit v1.1 From dd934aa8ad1fbaab3d916125c7fe42fff75aa7ff Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:54 +0100 Subject: hrtimer: Use irqsave/irqrestore around __run_hrtimer() __run_hrtimer() is called with the hrtimer_cpu_base.lock held and interrupts disabled. Before invoking the timer callback the base lock is dropped, but interrupts stay disabled. The upcoming support for softirq based hrtimers requires that interrupts are enabled before the timer callback is invoked. To avoid code duplication, take hrtimer_cpu_base.lock with raw_spin_lock_irqsave(flags) at the call site and hand in the flags as a parameter. So raw_spin_unlock_irqrestore() before the callback invocation will either keep interrupts disabled in interrupt context or restore to interrupt enabled state when called from softirq context. Suggested-by: Peter Zijlstra Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-26-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5d9b81d..31ccd86 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1159,7 +1159,8 @@ EXPORT_SYMBOL_GPL(hrtimer_active); static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, struct hrtimer_clock_base *base, - struct hrtimer *timer, ktime_t *now) + struct hrtimer *timer, ktime_t *now, + unsigned long flags) { enum hrtimer_restart (*fn)(struct hrtimer *); int restart; @@ -1194,11 +1195,11 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, * protected against migration to a different CPU even if the lock * is dropped. */ - raw_spin_unlock(&cpu_base->lock); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); trace_hrtimer_expire_entry(timer, now); restart = fn(timer); trace_hrtimer_expire_exit(timer); - raw_spin_lock(&cpu_base->lock); + raw_spin_lock_irq(&cpu_base->lock); /* * Note: We clear the running state after enqueue_hrtimer and @@ -1226,7 +1227,8 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, base->running = NULL; } -static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) +static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, + unsigned long flags) { struct hrtimer_clock_base *base; unsigned int active = cpu_base->active_bases; @@ -1257,7 +1259,7 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) if (basenow < hrtimer_get_softexpires_tv64(timer)) break; - __run_hrtimer(cpu_base, base, timer, &basenow); + __run_hrtimer(cpu_base, base, timer, &basenow, flags); } } } @@ -1272,13 +1274,14 @@ void hrtimer_interrupt(struct clock_event_device *dev) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); ktime_t expires_next, now, entry_time, delta; + unsigned long flags; int retries = 0; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; dev->next_event = KTIME_MAX; - raw_spin_lock(&cpu_base->lock); + raw_spin_lock_irqsave(&cpu_base->lock, flags); entry_time = now = hrtimer_update_base(cpu_base); retry: cpu_base->in_hrtirq = 1; @@ -1291,7 +1294,7 @@ retry: */ cpu_base->expires_next = KTIME_MAX; - __hrtimer_run_queues(cpu_base, now); + __hrtimer_run_queues(cpu_base, now, flags); /* Reevaluate the clock bases for the next expiry */ expires_next = __hrtimer_get_next_event(cpu_base); @@ -1301,7 +1304,7 @@ retry: */ cpu_base->expires_next = expires_next; cpu_base->in_hrtirq = 0; - raw_spin_unlock(&cpu_base->lock); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); /* Reprogramming necessary ? */ if (!tick_program_event(expires_next, 0)) { @@ -1322,7 +1325,7 @@ retry: * Acquire base lock for updating the offsets and retrieving * the current time. */ - raw_spin_lock(&cpu_base->lock); + raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); cpu_base->nr_retries++; if (++retries < 3) @@ -1335,7 +1338,8 @@ retry: */ cpu_base->nr_hangs++; cpu_base->hang_detected = 1; - raw_spin_unlock(&cpu_base->lock); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + delta = ktime_sub(now, entry_time); if ((unsigned int)delta > cpu_base->max_hang_time) cpu_base->max_hang_time = (unsigned int) delta; @@ -1377,6 +1381,7 @@ static inline void __hrtimer_peek_ahead_timers(void) { } void hrtimer_run_queues(void) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + unsigned long flags; ktime_t now; if (__hrtimer_hres_active(cpu_base)) @@ -1394,10 +1399,10 @@ void hrtimer_run_queues(void) return; } - raw_spin_lock(&cpu_base->lock); + raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); - __hrtimer_run_queues(cpu_base, now); - raw_spin_unlock(&cpu_base->lock); + __hrtimer_run_queues(cpu_base, now, flags); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } /* -- cgit v1.1 From 98ecadd4305d8677ba77162152485798d47dcc85 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:55 +0100 Subject: hrtimer: Add clock bases and hrtimer mode for softirq context Currently hrtimer callback functions are always executed in hard interrupt context. Users of hrtimers, which need their timer function to be executed in soft interrupt context, make use of tasklets to get the proper context. Add additional hrtimer clock bases for timers which must expire in softirq context, so the detour via the tasklet can be avoided. This is also required for RT, where the majority of hrtimer is moved into softirq hrtimer context. The selection of the expiry mode happens via a mode bit. Introduce HRTIMER_MODE_SOFT and the matching combinations with the ABS/REL/PINNED bits and update the decoding of hrtimer_mode in tracepoints. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-27-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 14 ++++++++++++++ include/trace/events/timer.h | 6 +++++- kernel/time/hrtimer.c | 20 ++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 98ed357..26ae8a8 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -33,14 +33,24 @@ struct hrtimer_cpu_base; * HRTIMER_MODE_REL - Time value is relative to now * HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered * when starting the timer) + * HRTIMER_MODE_SOFT - Timer callback function will be executed in + * soft irq context */ enum hrtimer_mode { HRTIMER_MODE_ABS = 0x00, HRTIMER_MODE_REL = 0x01, HRTIMER_MODE_PINNED = 0x02, + HRTIMER_MODE_SOFT = 0x04, HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, + + HRTIMER_MODE_ABS_SOFT = HRTIMER_MODE_ABS | HRTIMER_MODE_SOFT, + HRTIMER_MODE_REL_SOFT = HRTIMER_MODE_REL | HRTIMER_MODE_SOFT, + + HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, + HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, + }; /* @@ -151,6 +161,10 @@ enum hrtimer_base_type { HRTIMER_BASE_REALTIME, HRTIMER_BASE_BOOTTIME, HRTIMER_BASE_TAI, + HRTIMER_BASE_MONOTONIC_SOFT, + HRTIMER_BASE_REALTIME_SOFT, + HRTIMER_BASE_BOOTTIME_SOFT, + HRTIMER_BASE_TAI_SOFT, HRTIMER_MAX_CLOCK_BASES, }; diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 744b431..a57e4ee 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -148,7 +148,11 @@ DEFINE_EVENT(timer_class, timer_cancel, { HRTIMER_MODE_ABS, "ABS" }, \ { HRTIMER_MODE_REL, "REL" }, \ { HRTIMER_MODE_ABS_PINNED, "ABS|PINNED" }, \ - { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }) + { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }, \ + { HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \ + { HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \ + { HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \ + { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }) /** * hrtimer_init - called when the hrtimer is initialized diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 31ccd86..e2353f5 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -92,6 +92,26 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_TAI, .get_time = &ktime_get_clocktai, }, + { + .index = HRTIMER_BASE_MONOTONIC_SOFT, + .clockid = CLOCK_MONOTONIC, + .get_time = &ktime_get, + }, + { + .index = HRTIMER_BASE_REALTIME_SOFT, + .clockid = CLOCK_REALTIME, + .get_time = &ktime_get_real, + }, + { + .index = HRTIMER_BASE_BOOTTIME_SOFT, + .clockid = CLOCK_BOOTTIME, + .get_time = &ktime_get_boottime, + }, + { + .index = HRTIMER_BASE_TAI_SOFT, + .clockid = CLOCK_TAI, + .get_time = &ktime_get_clocktai, + }, } }; -- cgit v1.1 From c458b1d102036eaa2c70e03000c959bd491c2037 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:56 +0100 Subject: hrtimer: Prepare handling of hard and softirq based hrtimers The softirq based hrtimer can utilize most of the existing hrtimers functions, but need to operate on a different data set. Add an 'active_mask' parameter to various functions so the hard and soft bases can be selected. Fixup the existing callers and hand in the ACTIVE_HARD mask. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-28-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e2353f5..ba4674e 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -60,6 +60,15 @@ #include "tick-internal.h" /* + * Masks for selecting the soft and hard context timers from + * cpu_base->active + */ +#define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT) +#define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1) +#define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) +#define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) + +/* * The timer bases: * * There are more clockids than hrtimer bases. Thus, we index @@ -507,13 +516,24 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, return expires_next; } -static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) +/* + * Recomputes cpu_base::*next_timer and returns the earliest expires_next but + * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. + * + * @active_mask must be one of: + * - HRTIMER_ACTIVE, + * - HRTIMER_ACTIVE_SOFT, or + * - HRTIMER_ACTIVE_HARD. + */ +static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, + unsigned int active_mask) { - unsigned int active = cpu_base->active_bases; + unsigned int active; ktime_t expires_next = KTIME_MAX; cpu_base->next_timer = NULL; + active = cpu_base->active_bases & active_mask; expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); return expires_next; @@ -553,7 +573,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { ktime_t expires_next; - expires_next = __hrtimer_get_next_event(cpu_base); + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); if (skip_equal && expires_next == cpu_base->expires_next) return; @@ -1074,7 +1094,7 @@ u64 hrtimer_get_next_event(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); if (!__hrtimer_hres_active(cpu_base)) - expires = __hrtimer_get_next_event(cpu_base); + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1248,10 +1268,10 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, } static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, - unsigned long flags) + unsigned long flags, unsigned int active_mask) { struct hrtimer_clock_base *base; - unsigned int active = cpu_base->active_bases; + unsigned int active = cpu_base->active_bases & active_mask; for_each_active_base(base, cpu_base, active) { struct timerqueue_node *node; @@ -1314,10 +1334,10 @@ retry: */ cpu_base->expires_next = KTIME_MAX; - __hrtimer_run_queues(cpu_base, now, flags); + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); /* Reevaluate the clock bases for the next expiry */ - expires_next = __hrtimer_get_next_event(cpu_base); + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); /* * Store the new expiry value so the migration code can verify * against it. @@ -1421,7 +1441,7 @@ void hrtimer_run_queues(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); - __hrtimer_run_queues(cpu_base, now, flags); + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } -- cgit v1.1 From 5da70160462e80b0ab8a6960cdd0cdd476907523 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:57 +0100 Subject: hrtimer: Implement support for softirq based hrtimers hrtimer callbacks are always invoked in hard interrupt context. Several users in tree require soft interrupt context for their callbacks and achieve this by combining a hrtimer with a tasklet. The hrtimer schedules the tasklet in hard interrupt context and the tasklet callback gets invoked in softirq context later. That's suboptimal and aside of that the real-time patch moves most of the hrtimers into softirq context. So adding native support for hrtimers expiring in softirq context is a valuable extension for both mainline and the RT patch set. Each valid hrtimer clock id has two associated hrtimer clock bases: one for timers expiring in hardirq context and one for timers expiring in softirq context. Implement the functionality to associate a hrtimer with the hard or softirq related clock bases and update the relevant functions to take them into account when the next expiry time needs to be evaluated. Add a check into the hard interrupt context handler functions to check whether the first expiring softirq based timer has expired. If it's expired the softirq is raised and the accounting of softirq based timers to evaluate the next expiry time for programming the timer hardware is skipped until the softirq processing has finished. At the end of the softirq processing the regular processing is resumed. Suggested-by: Thomas Gleixner Suggested-by: Peter Zijlstra Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-29-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 21 ++++-- kernel/time/hrtimer.c | 196 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 188 insertions(+), 29 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 26ae8a8..c7902ca 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -103,6 +103,7 @@ enum hrtimer_restart { * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) * @is_rel: Set if the timer was armed relative + * @is_soft: Set if hrtimer will be expired in soft interrupt context. * * The hrtimer structure must be initialized by hrtimer_init() */ @@ -113,6 +114,7 @@ struct hrtimer { struct hrtimer_clock_base *base; u8 state; u8 is_rel; + u8 is_soft; }; /** @@ -178,13 +180,18 @@ enum hrtimer_base_type { * @hres_active: State of high resolution mode * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang + * @softirq_activated: displays, if the softirq is raised - update of softirq + * related settings is not required then. * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue + * hrtimer enqueue; it is the total first expiry time (hard + * and soft hrtimer are taken into account) * @next_timer: Pointer to the first expiring timer + * @softirq_expires_next: Time to check, if soft queues needs also to be expired + * @softirq_next_timer: Pointer to the first expiring softirq based timer * @clock_base: array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). @@ -196,9 +203,10 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; - unsigned int hres_active : 1, - in_hrtirq : 1, - hang_detected : 1; + unsigned int hres_active : 1, + in_hrtirq : 1, + hang_detected : 1, + softirq_activated : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; @@ -207,6 +215,8 @@ struct hrtimer_cpu_base { #endif ktime_t expires_next; struct hrtimer *next_timer; + ktime_t softirq_expires_next; + struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; @@ -379,7 +389,8 @@ extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * @timer: the timer to be added * @tim: expiry time * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); + * softirq based mode is considered for debug purpose only! */ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index ba4674e..d93e3e7 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -411,7 +411,8 @@ static inline void debug_hrtimer_init(struct hrtimer *timer) debug_object_init(timer, &hrtimer_debug_descr); } -static inline void debug_hrtimer_activate(struct hrtimer *timer) +static inline void debug_hrtimer_activate(struct hrtimer *timer, + enum hrtimer_mode mode) { debug_object_activate(timer, &hrtimer_debug_descr); } @@ -444,8 +445,10 @@ void destroy_hrtimer_on_stack(struct hrtimer *timer) EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); #else + static inline void debug_hrtimer_init(struct hrtimer *timer) { } -static inline void debug_hrtimer_activate(struct hrtimer *timer) { } +static inline void debug_hrtimer_activate(struct hrtimer *timer, + enum hrtimer_mode mode) { } static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } #endif @@ -460,7 +463,7 @@ debug_init(struct hrtimer *timer, clockid_t clockid, static inline void debug_activate(struct hrtimer *timer, enum hrtimer_mode mode) { - debug_hrtimer_activate(timer); + debug_hrtimer_activate(timer, mode); trace_hrtimer_start(timer, mode); } @@ -503,7 +506,10 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires < expires_next) { expires_next = expires; - cpu_base->next_timer = timer; + if (timer->is_soft) + cpu_base->softirq_next_timer = timer; + else + cpu_base->next_timer = timer; } } /* @@ -520,21 +526,39 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, * Recomputes cpu_base::*next_timer and returns the earliest expires_next but * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. * + * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, + * those timers will get run whenever the softirq gets handled, at the end of + * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. + * + * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. + * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual + * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. + * * @active_mask must be one of: - * - HRTIMER_ACTIVE, + * - HRTIMER_ACTIVE_ALL, * - HRTIMER_ACTIVE_SOFT, or * - HRTIMER_ACTIVE_HARD. */ -static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, - unsigned int active_mask) +static ktime_t +__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) { unsigned int active; + struct hrtimer *next_timer = NULL; ktime_t expires_next = KTIME_MAX; - cpu_base->next_timer = NULL; + if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { + active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; + cpu_base->softirq_next_timer = NULL; + expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX); + + next_timer = cpu_base->softirq_next_timer; + } - active = cpu_base->active_bases & active_mask; - expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + if (active_mask & HRTIMER_ACTIVE_HARD) { + active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; + cpu_base->next_timer = next_timer; + expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + } return expires_next; } @@ -545,8 +569,14 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; - return ktime_get_update_offsets_now(&base->clock_was_set_seq, + ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, offs_real, offs_boot, offs_tai); + + base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; + base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; + base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; + + return now; } /* @@ -573,7 +603,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { ktime_t expires_next; - expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + /* + * Find the current next expiration time. + */ + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); + + if (cpu_base->next_timer && cpu_base->next_timer->is_soft) { + /* + * When the softirq is activated, hrtimer has to be + * programmed with the first hard hrtimer because soft + * timer interrupt could occur too late. + */ + if (cpu_base->softirq_activated) + expires_next = __hrtimer_get_next_event(cpu_base, + HRTIMER_ACTIVE_HARD); + else + cpu_base->softirq_expires_next = expires_next; + } if (skip_equal && expires_next == cpu_base->expires_next) return; @@ -700,7 +746,7 @@ static inline void retrigger_next_event(void *arg) { } * * Called with interrupts disabled and base->cpu_base.lock held */ -static void hrtimer_reprogram(struct hrtimer *timer) +static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *base = timer->base; @@ -709,6 +755,37 @@ static void hrtimer_reprogram(struct hrtimer *timer) WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); /* + * CLOCK_REALTIME timer might be requested with an absolute + * expiry time which is less than base->offset. Set it to 0. + */ + if (expires < 0) + expires = 0; + + if (timer->is_soft) { + /* + * soft hrtimer could be started on a remote CPU. In this + * case softirq_expires_next needs to be updated on the + * remote CPU. The soft hrtimer will not expire before the + * first hard hrtimer on the remote CPU - + * hrtimer_check_target() prevents this case. + */ + struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base; + + if (timer_cpu_base->softirq_activated) + return; + + if (!ktime_before(expires, timer_cpu_base->softirq_expires_next)) + return; + + timer_cpu_base->softirq_next_timer = timer; + timer_cpu_base->softirq_expires_next = expires; + + if (!ktime_before(expires, timer_cpu_base->expires_next) || + !reprogram) + return; + } + + /* * If the timer is not on the current cpu, we cannot reprogram * the other cpus clock event device. */ @@ -725,13 +802,6 @@ static void hrtimer_reprogram(struct hrtimer *timer) if (cpu_base->in_hrtirq) return; - /* - * CLOCK_REALTIME timer might be requested with an absolute - * expiry time which is less than base->offset. Set it to 0. - */ - if (expires < 0) - expires = 0; - if (expires >= cpu_base->expires_next) return; @@ -957,6 +1027,31 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, return tim; } +static void +hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) +{ + ktime_t expires; + + /* + * Find the next SOFT expiration. + */ + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); + + /* + * reprogramming needs to be triggered, even if the next soft + * hrtimer expires at the same time than the next hard + * hrtimer. cpu_base->softirq_expires_next needs to be updated! + */ + if (expires == KTIME_MAX) + return; + + /* + * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() + * cpu_base->*expires_next is only set by hrtimer_reprogram() + */ + hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); +} + static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode, struct hrtimer_clock_base *base) @@ -978,13 +1073,15 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, return enqueue_hrtimer(timer, new_base, mode); } + /** * hrtimer_start_range_ns - (re)start an hrtimer * @timer: the timer to be added * @tim: expiry time * @delta_ns: "slack" range for the timer * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); + * softirq based mode is considered for debug purpose only! */ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode) @@ -992,10 +1089,16 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, struct hrtimer_clock_base *base; unsigned long flags; + /* + * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft + * match. + */ + WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); + base = lock_hrtimer_base(timer, &flags); if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) - hrtimer_reprogram(timer); + hrtimer_reprogram(timer, true); unlock_hrtimer_base(timer, &flags); } @@ -1094,7 +1197,7 @@ u64 hrtimer_get_next_event(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); if (!__hrtimer_hres_active(cpu_base)) - expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1304,6 +1407,23 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, } } +static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) +{ + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + unsigned long flags; + ktime_t now; + + raw_spin_lock_irqsave(&cpu_base->lock, flags); + + now = hrtimer_update_base(cpu_base); + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); + + cpu_base->softirq_activated = 0; + hrtimer_update_softirq_timer(cpu_base, true); + + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); +} + #ifdef CONFIG_HIGH_RES_TIMERS /* @@ -1334,10 +1454,16 @@ retry: */ cpu_base->expires_next = KTIME_MAX; + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + } + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); /* Reevaluate the clock bases for the next expiry */ - expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); /* * Store the new expiry value so the migration code can verify * against it. @@ -1441,6 +1567,13 @@ void hrtimer_run_queues(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); + + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + } + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } @@ -1622,6 +1755,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->cpu = cpu; cpu_base->hres_active = 0; cpu_base->expires_next = KTIME_MAX; + cpu_base->softirq_expires_next = KTIME_MAX; return 0; } @@ -1665,6 +1799,12 @@ int hrtimers_dead_cpu(unsigned int scpu) BUG_ON(cpu_online(scpu)); tick_cancel_sched_timer(scpu); + /* + * this BH disable ensures that raise_softirq_irqoff() does + * not wakeup ksoftirqd (and acquire the pi-lock) while + * holding the cpu_base lock + */ + local_bh_disable(); local_irq_disable(); old_base = &per_cpu(hrtimer_bases, scpu); new_base = this_cpu_ptr(&hrtimer_bases); @@ -1680,12 +1820,19 @@ int hrtimers_dead_cpu(unsigned int scpu) &new_base->clock_base[i]); } + /* + * The migration might have changed the first expiring softirq + * timer on this CPU. Update it. + */ + hrtimer_update_softirq_timer(new_base, false); + raw_spin_unlock(&old_base->lock); raw_spin_unlock(&new_base->lock); /* Check, if we got expired work to do */ __hrtimer_peek_ahead_timers(); local_irq_enable(); + local_bh_enable(); return 0; } @@ -1694,6 +1841,7 @@ int hrtimers_dead_cpu(unsigned int scpu) void __init hrtimers_init(void) { hrtimers_prepare_cpu(smp_processor_id()); + open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); } /** -- cgit v1.1 From 42f42da41b54c191ae6a775e84a86c100d66c5e8 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:58 +0100 Subject: hrtimer: Implement SOFT/HARD clock base selection All prerequisites to handle hrtimers for expiry in either hard or soft interrupt context are in place. Add the missing bit in hrtimer_init() which associates the timer to the hard or the softirq clock base. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-30-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d93e3e7..3d20158 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1220,8 +1220,9 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id) static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { + bool softtimer = !!(mode & HRTIMER_MODE_SOFT); + int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; struct hrtimer_cpu_base *cpu_base; - int base; memset(timer, 0, sizeof(struct hrtimer)); @@ -1235,7 +1236,8 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) clock_id = CLOCK_MONOTONIC; - base = hrtimer_clockid_to_base(clock_id); + base += hrtimer_clockid_to_base(clock_id); + timer->is_soft = softtimer; timer->base = &cpu_base->clock_base[base]; timerqueue_init(&timer->node); } @@ -1244,8 +1246,13 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, * hrtimer_init - initialize a timer to the given clock * @timer: the timer to be initialized * @clock_id: the clock to be used - * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL); pinned is not considered here! + * @mode: The modes which are relevant for intitialization: + * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, + * HRTIMER_MODE_REL_SOFT + * + * The PINNED variants of the above can be handed in, + * but the PINNED bit is ignored as pinning happens + * when the hrtimer is started */ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) -- cgit v1.1 From b03bbbe08ff04d80136b6aac152954ef308a4909 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Dec 2017 11:42:03 +0100 Subject: ALSA/dummy: Replace tasklet with softirq hrtimer The tasklet is used to defer the execution of snd_pcm_period_elapsed() to the softirq context. Using the HRTIMER_MODE_SOFT mode invokes the timer callback in softirq context as well which renders the tasklet useless. [o-takashi: avoid stall due to a call of hrtimer_cancel() on a callback of hrtimer] Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Reviewed-by: Takashi Iwai Cc: Christoph Hellwig Cc: Jaroslav Kysela Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Takashi Iwai Cc: Takashi Sakamoto Cc: alsa-devel@alsa-project.org Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-35-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- sound/drivers/dummy.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index 7b2b1f7..6ad2ff5 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -375,17 +375,9 @@ struct dummy_hrtimer_pcm { ktime_t period_time; atomic_t running; struct hrtimer timer; - struct tasklet_struct tasklet; struct snd_pcm_substream *substream; }; -static void dummy_hrtimer_pcm_elapsed(unsigned long priv) -{ - struct dummy_hrtimer_pcm *dpcm = (struct dummy_hrtimer_pcm *)priv; - if (atomic_read(&dpcm->running)) - snd_pcm_period_elapsed(dpcm->substream); -} - static enum hrtimer_restart dummy_hrtimer_callback(struct hrtimer *timer) { struct dummy_hrtimer_pcm *dpcm; @@ -393,7 +385,14 @@ static enum hrtimer_restart dummy_hrtimer_callback(struct hrtimer *timer) dpcm = container_of(timer, struct dummy_hrtimer_pcm, timer); if (!atomic_read(&dpcm->running)) return HRTIMER_NORESTART; - tasklet_schedule(&dpcm->tasklet); + /* + * In cases of XRUN and draining, this calls .trigger to stop PCM + * substream. + */ + snd_pcm_period_elapsed(dpcm->substream); + if (!atomic_read(&dpcm->running)) + return HRTIMER_NORESTART; + hrtimer_forward_now(timer, dpcm->period_time); return HRTIMER_RESTART; } @@ -403,7 +402,7 @@ static int dummy_hrtimer_start(struct snd_pcm_substream *substream) struct dummy_hrtimer_pcm *dpcm = substream->runtime->private_data; dpcm->base_time = hrtimer_cb_get_time(&dpcm->timer); - hrtimer_start(&dpcm->timer, dpcm->period_time, HRTIMER_MODE_REL); + hrtimer_start(&dpcm->timer, dpcm->period_time, HRTIMER_MODE_REL_SOFT); atomic_set(&dpcm->running, 1); return 0; } @@ -413,14 +412,14 @@ static int dummy_hrtimer_stop(struct snd_pcm_substream *substream) struct dummy_hrtimer_pcm *dpcm = substream->runtime->private_data; atomic_set(&dpcm->running, 0); - hrtimer_cancel(&dpcm->timer); + if (!hrtimer_callback_running(&dpcm->timer)) + hrtimer_cancel(&dpcm->timer); return 0; } static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm) { hrtimer_cancel(&dpcm->timer); - tasklet_kill(&dpcm->tasklet); } static snd_pcm_uframes_t @@ -465,12 +464,10 @@ static int dummy_hrtimer_create(struct snd_pcm_substream *substream) if (!dpcm) return -ENOMEM; substream->runtime->private_data = dpcm; - hrtimer_init(&dpcm->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&dpcm->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); dpcm->timer.function = dummy_hrtimer_callback; dpcm->substream = substream; atomic_set(&dpcm->running, 0); - tasklet_init(&dpcm->tasklet, dummy_hrtimer_pcm_elapsed, - (unsigned long)dpcm); return 0; } -- cgit v1.1 From b1a31a5f5f27ff8aba42b545a1c721941f735107 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Dec 2017 11:42:04 +0100 Subject: usb/gadget/NCM: Replace tasklet with softirq hrtimer The tx_tasklet tasklet is used in invoke the hrtimer (task_timer) in softirq context. This can be also achieved without the tasklet but with HRTIMER_MODE_SOFT as hrtimer mode. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Acked-by: Felipe Balbi Cc: Christoph Hellwig Cc: Felipe Balbi Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: keescook@chromium.org Cc: linux-usb@vger.kernel.org Link: http://lkml.kernel.org/r/20171221104205.7269-36-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- drivers/usb/gadget/function/f_ncm.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index c5bce8e..5780fba 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -73,9 +73,7 @@ struct f_ncm { struct sk_buff *skb_tx_ndp; u16 ndp_dgram_count; bool timer_force_tx; - struct tasklet_struct tx_tasklet; struct hrtimer task_timer; - bool timer_stopping; }; @@ -1104,7 +1102,7 @@ static struct sk_buff *ncm_wrap_ntb(struct gether *port, /* Delay the timer. */ hrtimer_start(&ncm->task_timer, TX_TIMEOUT_NSECS, - HRTIMER_MODE_REL); + HRTIMER_MODE_REL_SOFT); /* Add the datagram position entries */ ntb_ndp = skb_put_zero(ncm->skb_tx_ndp, dgram_idx_len); @@ -1148,17 +1146,15 @@ err: } /* - * This transmits the NTB if there are frames waiting. + * The transmit should only be run if no skb data has been sent + * for a certain duration. */ -static void ncm_tx_tasklet(unsigned long data) +static enum hrtimer_restart ncm_tx_timeout(struct hrtimer *data) { - struct f_ncm *ncm = (void *)data; - - if (ncm->timer_stopping) - return; + struct f_ncm *ncm = container_of(data, struct f_ncm, task_timer); /* Only send if data is available. */ - if (ncm->skb_tx_data) { + if (!ncm->timer_stopping && ncm->skb_tx_data) { ncm->timer_force_tx = true; /* XXX This allowance of a NULL skb argument to ndo_start_xmit @@ -1171,16 +1167,6 @@ static void ncm_tx_tasklet(unsigned long data) ncm->timer_force_tx = false; } -} - -/* - * The transmit should only be run if no skb data has been sent - * for a certain duration. - */ -static enum hrtimer_restart ncm_tx_timeout(struct hrtimer *data) -{ - struct f_ncm *ncm = container_of(data, struct f_ncm, task_timer); - tasklet_schedule(&ncm->tx_tasklet); return HRTIMER_NORESTART; } @@ -1513,8 +1499,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f) ncm->port.open = ncm_open; ncm->port.close = ncm_close; - tasklet_init(&ncm->tx_tasklet, ncm_tx_tasklet, (unsigned long) ncm); - hrtimer_init(&ncm->task_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&ncm->task_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); ncm->task_timer.function = ncm_tx_timeout; DBG(cdev, "CDC Network: %s speed IN/%s OUT/%s NOTIFY/%s\n", @@ -1623,7 +1608,6 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f) DBG(c->cdev, "ncm unbind\n"); hrtimer_cancel(&ncm->task_timer); - tasklet_kill(&ncm->tx_tasklet); ncm_string_defs[0].id = 0; usb_free_all_descriptors(f); -- cgit v1.1