From 84e478c6f1eb9c4bfa1fff2f8108e9a061b46428 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 5 Feb 2010 21:47:05 -0500 Subject: nmi_watchdog: Config option to enable new nmi_watchdog These are the bits that enable the new nmi_watchdog and safely isolate the old nmi_watchdog. Only one or the other can run, not both at the same time. Signed-off-by: Don Zickus Cc: Linus Torvalds Cc: Andrew Morton Cc: gorcunov@gmail.com Cc: aris@redhat.com Cc: peterz@infradead.org LKML-Reference: <1265424425-31562-4-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 25c3ed5..f80b67e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -170,6 +170,19 @@ config DETECT_SOFTLOCKUP can be detected via the NMI-watchdog, on platforms that support it.) +config NMI_WATCHDOG + bool "Detect Hard Lockups with an NMI Watchdog" + depends on DEBUG_KERNEL && PERF_EVENTS + default y + help + Say Y here to enable the kernel to use the NMI as a watchdog + to detect hard lockups. This is useful when a cpu hangs for no + reason but can still respond to NMIs. A backtrace is displayed + for reviewing and reporting. + + The overhead should be minimal, just an extra NMI every few + seconds. + config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" depends on DETECT_SOFTLOCKUP -- cgit v1.1 From 8e7672cdb413af859086ffceaed68f7e1e8ea4c2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 9 Feb 2010 06:11:00 +0100 Subject: nmi_watchdog: Only enable on x86 for now It wont even build on other platforms just yet - so restrict it to x86 for now. Cc: Don Zickus Cc: gorcunov@gmail.com Cc: aris@redhat.com Cc: peterz@infradead.org LKML-Reference: <1265424425-31562-4-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index f80b67e..acef882 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -173,6 +173,7 @@ config DETECT_SOFTLOCKUP config NMI_WATCHDOG bool "Detect Hard Lockups with an NMI Watchdog" depends on DEBUG_KERNEL && PERF_EVENTS + depends on X86 default y help Say Y here to enable the kernel to use the NMI as a watchdog -- cgit v1.1 From c3128fb6ad39b0edda6675d20585a64846cf89ea Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Feb 2010 17:19:18 -0500 Subject: nmi_watchdog: Use a boolean config flag for compiling Determines if an arch has setup arch specific perf_events and nmi_watchdog code. This should restrict compiles to only those arches ready. Signed-off-by: Don Zickus Cc: peterz@infradead.org Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266013161-31197-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index acef882..01a4d85 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -172,8 +172,7 @@ config DETECT_SOFTLOCKUP config NMI_WATCHDOG bool "Detect Hard Lockups with an NMI Watchdog" - depends on DEBUG_KERNEL && PERF_EVENTS - depends on X86 + depends on DEBUG_KERNEL && PERF_EVENTS && PERF_EVENTS_NMI default y help Say Y here to enable the kernel to use the NMI as a watchdog -- cgit v1.1 From c99c30feadf664ccd8590b96259cb9f3954bd246 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 28 Feb 2010 20:49:00 +0100 Subject: nmi_watchdog: Turn it off by default It was nice to enable it by default for testing - but before we push it upstream we want it to be off - so that people can opt-in gradually. Cc: Don Zickus Cc: peterz@infradead.org Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266880143-24943-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 01a4d85..e2e73cc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -173,7 +173,6 @@ config DETECT_SOFTLOCKUP config NMI_WATCHDOG bool "Detect Hard Lockups with an NMI Watchdog" depends on DEBUG_KERNEL && PERF_EVENTS && PERF_EVENTS_NMI - default y help Say Y here to enable the kernel to use the NMI as a watchdog to detect hard lockups. This is useful when a cpu hangs for no -- cgit v1.1 From 58687acba59266735adb8ccd9b5b9aa2c7cd205b Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:44 -0400 Subject: lockup_detector: Combine nmi_watchdog and softlockup detector The new nmi_watchdog (which uses the perf event subsystem) is very similar in structure to the softlockup detector. Using Ingo's suggestion, I combined the two functionalities into one file: kernel/watchdog.c. Now both the nmi_watchdog (or hardlockup detector) and softlockup detector sit on top of the perf event subsystem, which is run every 60 seconds or so to see if there are any lockups. To detect hardlockups, cpus not responding to interrupts, I implemented an hrtimer that runs 5 times for every perf event overflow event. If that stops counting on a cpu, then the cpu is most likely in trouble. To detect softlockups, tasks not yielding to the scheduler, I used the previous kthread idea that now gets kicked every time the hrtimer fires. If the kthread isn't being scheduled neither is anyone else and the warning is printed to the console. I tested this on x86_64 and both the softlockup and hardlockup paths work. V2: - cleaned up the Kconfig and softlockup combination - surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI - seperated out the softlockup case from perf event subsystem - re-arranged the enabling/disabling nmi watchdog from proc space - added cpumasks for hardlockup failure cases - removed fallback to soft events if no PMU exists for hard events V3: - comment cleanups - drop support for older softlockup code - per_cpu cleanups - completely remove software clock base hardlockup detector - use per_cpu masking on hard/soft lockup detection - #ifdef cleanups - rename config option NMI_WATCHDOG to LOCKUP_DETECTOR - documentation additions V4: - documentation fixes - convert per_cpu to __get_cpu_var - powerpc compile fixes V5: - split apart warn flags for hard and soft lockups TODO: - figure out how to make an arch-agnostic clock2cycles call (if possible) to feed into perf events as a sample period [fweisbec: merged conflict patch] Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- lib/Kconfig.debug | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 220ae60..49e285d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -153,7 +153,7 @@ config DEBUG_SHIRQ points; some don't and need to be caught. config DETECT_SOFTLOCKUP - bool "Detect Soft Lockups" + bool depends on DEBUG_KERNEL && !S390 default y help @@ -171,17 +171,27 @@ config DETECT_SOFTLOCKUP can be detected via the NMI-watchdog, on platforms that support it.) -config NMI_WATCHDOG - bool "Detect Hard Lockups with an NMI Watchdog" - depends on DEBUG_KERNEL && PERF_EVENTS && PERF_EVENTS_NMI +config LOCKUP_DETECTOR + bool "Detect Hard and Soft Lockups" + depends on DEBUG_KERNEL + default DETECT_SOFTLOCKUP help - Say Y here to enable the kernel to use the NMI as a watchdog - to detect hard lockups. This is useful when a cpu hangs for no - reason but can still respond to NMIs. A backtrace is displayed - for reviewing and reporting. + Say Y here to enable the kernel to act as a watchdog to detect + hard and soft lockups. + + Softlockups are bugs that cause the kernel to loop in kernel + mode for more than 60 seconds, without giving other tasks a + chance to run. The current stack trace is displayed upon + detection and the system will stay locked up. + + Hardlockups are bugs that cause the CPU to loop in kernel mode + for more than 60 seconds, without letting other interrupts have a + chance to run. The current stack trace is displayed upon detection + and the system will stay locked up. - The overhead should be minimal, just an extra NMI every few - seconds. + The overhead should be minimal. A periodic hrtimer runs to + generate interrupts and kick the watchdog task every 10-12 seconds. + An NMI is generated every 60 seconds or so to check for hardlockups. config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" -- cgit v1.1 From 89d7ce2a2178e7f562f608b466a18c8c2ece87af Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 13 May 2010 00:27:20 +0200 Subject: lockup_detector: Make BOOTPARAM_SOFTLOCKUP_PANIC depend on LOCKUP_DETECTOR Panic on softlockups was still depending on the softlockup detector. But the latter has been merged into the lockup detector now. Let's update this config dependency. Signed-off-by: Frederic Weisbecker Cc: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 49e285d..755713a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -195,7 +195,7 @@ config LOCKUP_DETECTOR config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" - depends on DETECT_SOFTLOCKUP + depends on LOCKUP_DETECTOR help Say Y here to enable the kernel to panic on "soft lockups", which are bugs that cause the kernel to loop in kernel -- cgit v1.1 From e16bb1d7fe07609bc8b0e4c043eff2f47ada78d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 15 May 2010 22:30:22 +0200 Subject: lockup_detector: Update some config We kept CONFIG_DETECT_SOFTLOCKUP around for compatibility with older configs. But it was enabled by default if CONFIG_DEBUG_KERNEL. So if we want to enable CONFIG_LOCKUP_DETECTOR on configs that had CONFIG_DETECT_SOFTLOCKUP, all we need is to have the same enabling by default if CONFIG_DEBUG_KERNEL. We can then remove CONFIG_DETECT_SOFTLOCKUP directly. So tag CONFIG_LOCKUP_DETECTOR as default y. This is what we want for most serious kernel debugging anyway. And also forbid the lockup detector in S390 as it was for the previous softlockup detector, event though the true reason for that is not outlined. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Don Zickus Cc: Peter Zijlstra Cc: Cyrill Gorcunov --- lib/Kconfig.debug | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 755713a..3a18b0b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -152,29 +152,10 @@ config DEBUG_SHIRQ Drivers ought to be able to handle interrupts coming in at those points; some don't and need to be caught. -config DETECT_SOFTLOCKUP - bool - depends on DEBUG_KERNEL && !S390 - default y - help - Say Y here to enable the kernel to detect "soft lockups", - which are bugs that cause the kernel to loop in kernel - mode for more than 60 seconds, without giving other tasks a - chance to run. - - When a soft-lockup is detected, the kernel will print the - current stack trace (which you should report), but the - system will stay locked up. This feature has negligible - overhead. - - (Note that "hard lockups" are separate type of bugs that - can be detected via the NMI-watchdog, on platforms that - support it.) - config LOCKUP_DETECTOR bool "Detect Hard and Soft Lockups" - depends on DEBUG_KERNEL - default DETECT_SOFTLOCKUP + depends on DEBUG_KERNEL && !S390 + default y help Say Y here to enable the kernel to act as a watchdog to detect hard and soft lockups. @@ -212,7 +193,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE int - depends on DETECT_SOFTLOCKUP + depends on LOCKUP_DETECTOR range 0 1 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC -- cgit v1.1 From 23637d477c1f53acbb176a02c241d60a25888fae Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 15 May 2010 23:15:20 +0200 Subject: lockup_detector: Introduce CONFIG_HARDLOCKUP_DETECTOR This new config is deemed to simplify even more the lockup detector dependencies and can make it easier to bring a smooth sorting between archs that support the new generic lockup detector and those that still have their own, especially for those that are in the middle of this migration. Instead of checking whether we have CONFIG_LOCKUP_DETECTOR + CONFIG_PERF_EVENTS_NMI each time an arch wants to know if it needs to build its own lockup detector, take a shortcut with this new config. It is enabled only if the hardlockup detection part of the whole lockup detector is on. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Don Zickus Cc: Cyrill Gorcunov --- lib/Kconfig.debug | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3a18b0b..e65e47d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -174,6 +174,9 @@ config LOCKUP_DETECTOR generate interrupts and kick the watchdog task every 10-12 seconds. An NMI is generated every 60 seconds or so to check for hardlockups. +config HARDLOCKUP_DETECTOR + def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI + config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" depends on LOCKUP_DETECTOR -- cgit v1.1 From e35e7fb0e9ea557f7504ced6fe4ccf69e44b7f07 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 19 May 2010 11:36:49 +0200 Subject: lockup_detector: Don't enable the lockup detector by default The lockup detector is a new feature that now involves the nmi watchdog. Drop the default y and let the user choose. Signed-off-by: Frederic Weisbecker Cc: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov --- lib/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e65e47d..63968a9 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -155,7 +155,6 @@ config DEBUG_SHIRQ config LOCKUP_DETECTOR bool "Detect Hard and Soft Lockups" depends on DEBUG_KERNEL && !S390 - default y help Say Y here to enable the kernel to act as a watchdog to detect hard and soft lockups. -- cgit v1.1