summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Travis <travis@sgi.com>2015-09-12 21:51:10 -0500
committerIngo Molnar <mingo@kernel.org>2015-09-14 15:31:30 +0200
commitd0a9964e98731c708500a2e712f28f9d39183647 (patch)
treec90e87a7238301eb143e7d17becbbdf422069071
parent7c52198b9c36001d47e11d50f7f1556805c5c3e3 (diff)
downloadop-kernel-dev-d0a9964e98731c708500a2e712f28f9d39183647.zip
op-kernel-dev-d0a9964e98731c708500a2e712f28f9d39183647.tar.gz
x86/platform/uv: Implement simple dump failover if kdump fails
The ability to trigger a kdump using the system NMI command was added by commit 12ba6c990fab ("x86/UV: Add kdump to UV NMI handler") Author: Mike Travis <travis@sgi.com> Date: Mon Sep 23 16:25:03 2013 -0500 This is useful because when kdump is working the information gathered is more informative than the original per CPU stack traces or "dump" option. However a number of things can go wrong with kdump and then the stack traces are more useful than nothing. The two most common reasons for kdump to not be available are: 1) if a problem occurs during boot before the kdump service is started, or 2) the kdump daemon failed to start. In either case the call to crash_kexec() returns unexpectedly. When this happens uv_nmi_kdump() also sets the uv_nmi_kexec_failed flag which causes the slave CPU's to also return to the NMI handler. Upon this unexpected return to the NMI handler, the NMI handler will revert to the "dump" action which uses show_regs() to obtain a process trace dump for all the CPU's. Other minor changes: The "dump" action now generates both the show_regs() stack trace and show instruction pointer information. Whereas the "ips" action only shows instruction pointers for non-idle CPU's. This is more like an abbreviated "ps" display. Change printk(KERN_DEFAULT...) --> pr_info() Signed-off-by: Mike Travis <travis@sgi.com> Signed-off-by: George Beshers <gbeshers@sgi.com> Cc: Alex Thorlton <athorlton@sgi.com> Cc: Dimitri Sivanich <sivanich@sgi.com> Cc: Hedi Berriche <hedi@sgi.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Russ Anderson <rja@sgi.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/platform/uv/uv_nmi.c53
1 files changed, 30 insertions, 23 deletions
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 5c9f63f..327f21c 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -376,38 +376,42 @@ static void uv_nmi_wait(int master)
atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus());
}
+/* Dump Instruction Pointer header */
static void uv_nmi_dump_cpu_ip_hdr(void)
{
- printk(KERN_DEFAULT
- "\nUV: %4s %6s %-32s %s (Note: PID 0 not listed)\n",
+ pr_info("\nUV: %4s %6s %-32s %s (Note: PID 0 not listed)\n",
"CPU", "PID", "COMMAND", "IP");
}
+/* Dump Instruction Pointer info */
static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
{
- printk(KERN_DEFAULT "UV: %4d %6d %-32.32s ",
- cpu, current->pid, current->comm);
-
+ pr_info("UV: %4d %6d %-32.32s ", cpu, current->pid, current->comm);
printk_address(regs->ip);
}
-/* Dump this cpu's state */
+/*
+ * Dump this CPU's state. If action was set to "kdump" and the crash_kexec
+ * failed, then we provide "dump" as an alternate action. Action "dump" now
+ * also includes the show "ips" (instruction pointers) action whereas the
+ * action "ips" only displays instruction pointers for the non-idle CPU's.
+ * This is an abbreviated form of the "ps" command.
+ */
static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs)
{
const char *dots = " ................................. ";
- if (uv_nmi_action_is("ips")) {
- if (cpu == 0)
- uv_nmi_dump_cpu_ip_hdr();
+ if (cpu == 0)
+ uv_nmi_dump_cpu_ip_hdr();
- if (current->pid != 0)
- uv_nmi_dump_cpu_ip(cpu, regs);
+ if (current->pid != 0 || !uv_nmi_action_is("ips"))
+ uv_nmi_dump_cpu_ip(cpu, regs);
- } else if (uv_nmi_action_is("dump")) {
- printk(KERN_DEFAULT
- "UV:%sNMI process trace for CPU %d\n", dots, cpu);
+ if (uv_nmi_action_is("dump")) {
+ pr_info("UV:%sNMI process trace for CPU %d\n", dots, cpu);
show_regs(regs);
}
+
this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE);
}
@@ -469,8 +473,7 @@ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
uv_nmi_trigger_dump(tcpu);
}
if (ignored)
- printk(KERN_DEFAULT "UV: %d CPUs ignored NMI\n",
- ignored);
+ pr_alert("UV: %d CPUs ignored NMI\n", ignored);
console_loglevel = saved_console_loglevel;
pr_alert("UV: process trace complete\n");
@@ -492,8 +495,9 @@ static void uv_nmi_touch_watchdogs(void)
touch_nmi_watchdog();
}
-#if defined(CONFIG_KEXEC_CORE)
static atomic_t uv_nmi_kexec_failed;
+
+#if defined(CONFIG_KEXEC_CORE)
static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
{
/* Call crash to dump system state */
@@ -502,10 +506,9 @@ static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
crash_kexec(regs);
pr_emerg("UV: crash_kexec unexpectedly returned, ");
+ atomic_set(&uv_nmi_kexec_failed, 1);
if (!kexec_crash_image) {
pr_cont("crash kernel not loaded\n");
- atomic_set(&uv_nmi_kexec_failed, 1);
- uv_nmi_sync_exit(1);
return;
}
pr_cont("kexec busy, stalling cpus while waiting\n");
@@ -514,9 +517,6 @@ static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
/* If crash exec fails the slaves should return, otherwise stall */
while (atomic_read(&uv_nmi_kexec_failed) == 0)
mdelay(10);
-
- /* Crash kernel most likely not loaded, return in an orderly fashion */
- uv_nmi_sync_exit(0);
}
#else /* !CONFIG_KEXEC_CORE */
@@ -524,6 +524,7 @@ static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
{
if (master)
pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n");
+ atomic_set(&uv_nmi_kexec_failed, 1);
}
#endif /* !CONFIG_KEXEC_CORE */
@@ -613,9 +614,14 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
master = (atomic_read(&uv_nmi_cpu) == cpu);
/* If NMI action is "kdump", then attempt to do it */
- if (uv_nmi_action_is("kdump"))
+ if (uv_nmi_action_is("kdump")) {
uv_nmi_kdump(cpu, master, regs);
+ /* Unexpected return, revert action to "dump" */
+ if (master)
+ strncpy(uv_nmi_action, "dump", strlen(uv_nmi_action));
+ }
+
/* Pause as all cpus enter the NMI handler */
uv_nmi_wait(master);
@@ -640,6 +646,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
atomic_set(&uv_nmi_cpus_in_nmi, -1);
atomic_set(&uv_nmi_cpu, -1);
atomic_set(&uv_in_nmi, 0);
+ atomic_set(&uv_nmi_kexec_failed, 0);
}
uv_nmi_touch_watchdogs();
OpenPOWER on IntegriCloud