diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 3 | ||||
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | kernel/softlockup.c | 48 | ||||
-rw-r--r-- | kernel/sysctl.c | 20 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 4 | ||||
-rw-r--r-- | lib/Kconfig.debug | 26 |
6 files changed, 89 insertions, 15 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e07c432..042588f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1971,6 +1971,9 @@ and is between 256 and 4096 characters. It is defined in the file snd-ymfpci= [HW,ALSA] + softlockup_panic= + [KNL] Should the soft-lockup detector generate panics. + sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/sonypi.txt diff --git a/include/linux/sched.h b/include/linux/sched.h index c5d3f84..69760a3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -294,10 +294,11 @@ extern void softlockup_tick(void); extern void spawn_softlockup_task(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); -extern unsigned long softlockup_thresh; +extern unsigned int softlockup_panic; extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; +extern int softlockup_thresh; #else static inline void softlockup_tick(void) { diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 01b6522..6b682d8 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -25,7 +25,22 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp); static DEFINE_PER_CPU(struct task_struct *, watchdog_task); static int __read_mostly did_panic; -unsigned long __read_mostly softlockup_thresh = 60; +int __read_mostly softlockup_thresh = 60; + +/* + * Should we panic (and reboot, if panic_timeout= is set) when a + * soft-lockup occurs: + */ +unsigned int __read_mostly softlockup_panic = + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; + +static int __init softlockup_panic_setup(char *str) +{ + softlockup_panic = simple_strtoul(str, NULL, 0); + + return 1; +} +__setup("softlockup_panic=", softlockup_panic_setup); static int softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) @@ -49,12 +64,17 @@ static unsigned long get_timestamp(int this_cpu) return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ } -void touch_softlockup_watchdog(void) +static void __touch_softlockup_watchdog(void) { int this_cpu = raw_smp_processor_id(); __raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu); } + +void touch_softlockup_watchdog(void) +{ + __raw_get_cpu_var(touch_timestamp) = 0; +} EXPORT_SYMBOL(touch_softlockup_watchdog); void touch_all_softlockup_watchdogs(void) @@ -79,8 +99,16 @@ void softlockup_tick(void) struct pt_regs *regs = get_irq_regs(); unsigned long now; + /* Is detection switched off? */ + if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) { + /* Be sure we don't false trigger if switched back on */ + if (touch_timestamp) + per_cpu(touch_timestamp, this_cpu) = 0; + return; + } + if (touch_timestamp == 0) { - touch_softlockup_watchdog(); + __touch_softlockup_watchdog(); return; } @@ -89,13 +117,13 @@ void softlockup_tick(void) /* report at most once a second */ if ((print_timestamp >= touch_timestamp && print_timestamp < (touch_timestamp + 1)) || - did_panic || !per_cpu(watchdog_task, this_cpu)) { + did_panic) { return; } /* do not print during early bootup: */ if (unlikely(system_state != SYSTEM_RUNNING)) { - touch_softlockup_watchdog(); + __touch_softlockup_watchdog(); return; } @@ -120,6 +148,9 @@ void softlockup_tick(void) else dump_stack(); spin_unlock(&print_lock); + + if (softlockup_panic) + panic("softlockup: hung tasks"); } /* @@ -172,6 +203,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now) t->last_switch_timestamp = now; touch_nmi_watchdog(); + + if (softlockup_panic) + panic("softlockup: blocked tasks"); } /* @@ -214,7 +248,7 @@ static int watchdog(void *__bind_cpu) sched_setscheduler(current, SCHED_FIFO, ¶m); /* initialize timestamp */ - touch_softlockup_watchdog(); + __touch_softlockup_watchdog(); set_current_state(TASK_INTERRUPTIBLE); /* @@ -223,7 +257,7 @@ static int watchdog(void *__bind_cpu) * debug-printout triggers in softlockup_tick(). */ while (!kthread_should_stop()) { - touch_softlockup_watchdog(); + __touch_softlockup_watchdog(); schedule(); if (kthread_should_stop()) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2911665..a829dc8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -84,12 +84,13 @@ extern int latencytop_enabled; extern int sysctl_nr_open_min, sysctl_nr_open_max; /* Constants used for minimum and maximum */ -#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) +#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP) static int one = 1; #endif #ifdef CONFIG_DETECT_SOFTLOCKUP static int sixty = 60; +static int neg_one = -1; #endif #ifdef CONFIG_MMU @@ -729,13 +730,24 @@ static struct ctl_table kern_table[] = { #ifdef CONFIG_DETECT_SOFTLOCKUP { .ctl_name = CTL_UNNUMBERED, + .procname = "softlockup_panic", + .data = &softlockup_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, + }, + { + .ctl_name = CTL_UNNUMBERED, .procname = "softlockup_thresh", .data = &softlockup_thresh, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, - .extra1 = &one, + .extra1 = &neg_one, .extra2 = &sixty, }, { diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b854a89..28abad6 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -133,8 +133,6 @@ void tick_nohz_update_jiffies(void) if (!ts->tick_stopped) return; - touch_softlockup_watchdog(); - cpu_clear(cpu, nohz_cpu_mask); now = ktime_get(); ts->idle_waketime = now; @@ -142,6 +140,8 @@ void tick_nohz_update_jiffies(void) local_irq_save(flags); tick_do_update_jiffies64(now); local_irq_restore(flags); + + touch_softlockup_watchdog(); } void tick_nohz_stop_idle(int cpu) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d2099f4..509ae35 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -147,7 +147,7 @@ config DETECT_SOFTLOCKUP help Say Y here to enable the kernel to detect "soft lockups", which are bugs that cause the kernel to loop in kernel - mode for more than 10 seconds, without giving other tasks a + mode for more than 60 seconds, without giving other tasks a chance to run. When a soft-lockup is detected, the kernel will print the @@ -159,6 +159,30 @@ config DETECT_SOFTLOCKUP can be detected via the NMI-watchdog, on platforms that support it.) +config BOOTPARAM_SOFTLOCKUP_PANIC + bool "Panic (Reboot) On Soft Lockups" + depends on DETECT_SOFTLOCKUP + help + Say Y here to enable the kernel to panic on "soft lockups", + which are bugs that cause the kernel to loop in kernel + mode for more than 60 seconds, without giving other tasks a + chance to run. + + The panic can be used in combination with panic_timeout, + to cause the system to reboot automatically after a + lockup has been detected. This feature is useful for + high-availability systems that have uptime guarantees and + where a lockup must be resolved ASAP. + + Say N if unsure. + +config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE + int + depends on DETECT_SOFTLOCKUP + range 0 1 + default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC + default 1 if BOOTPARAM_SOFTLOCKUP_PANIC + config SCHED_DEBUG bool "Collect scheduler debugging info" depends on DEBUG_KERNEL && PROC_FS |