summaryrefslogtreecommitdiffstats
path: root/sys/kern/kern_shutdown.c
diff options
context:
space:
mode:
authoravg <avg@FreeBSD.org>2011-12-11 21:02:01 +0000
committeravg <avg@FreeBSD.org>2011-12-11 21:02:01 +0000
commit75ddaeae802cb467ed6abb2bd33cdb33dfd34068 (patch)
tree56c7a4a1cbf236bc73c2bcadee83d2a80f84aa18 /sys/kern/kern_shutdown.c
parent966190317663bb888ac1921039bc6ab70ae57358 (diff)
downloadFreeBSD-src-75ddaeae802cb467ed6abb2bd33cdb33dfd34068.zip
FreeBSD-src-75ddaeae802cb467ed6abb2bd33cdb33dfd34068.tar.gz
panic: add a switch and infrastructure for stopping other CPUs in SMP case
Historical behavior of letting other CPUs merily go on is a default for time being. The new behavior can be switched on via kern.stop_scheduler_on_panic tunable and sysctl. Stopping of the CPUs has (at least) the following benefits: - more of the system state at panic time is preserved intact - threads and interrupts do not interfere with dumping of the system state Only one thread runs uninterrupted after panic if stop_scheduler_on_panic is set. That thread might call code that is also used in normal context and that code might use locks to prevent concurrent execution of certain parts. Those locks might be held by the stopped threads and would never be released. To work around this issue, it was decided that instead of explicit checks for panic context, we would rather put those checks inside the locking primitives. This change has substantial portions written and re-written by attilio and kib at various times. Other changes are heavily based on the ideas and patches submitted by jhb and mdf. bde has provided many insights into the details and history of the current code. The new behavior may cause problems for systems that use a USB keyboard for interfacing with system console. This is because of some unusual locking patterns in the ukbd code which have to be used because on one hand ukbd is below syscons, but on the other hand it has to interface with other usb code that uses regular mutexes/Giant for its concurrency protection. Dumping to USB-connected disks may also be affected. PR: amd64/139614 (at least) In cooperation with: attilio, jhb, kib, mdf Discussed with: arch@, bde Tested by: Eugene Grosbein <eugen@grosbein.net>, gnn, Steven Hartland <killing@multiplay.co.uk>, glebius, Andrew Boyer <aboyer@averesystems.com> (various versions of the patch) MFC after: 3 months (or never)
Diffstat (limited to 'sys/kern/kern_shutdown.c')
-rw-r--r--sys/kern/kern_shutdown.c42
1 files changed, 36 insertions, 6 deletions
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index 2b5a5ae..518fe4b 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -121,6 +121,11 @@ SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW | CTLFLAG_TUN,
&sync_on_panic, 0, "Do a sync before rebooting from a panic");
TUNABLE_INT("kern.sync_on_panic", &sync_on_panic);
+static int stop_scheduler_on_panic = 0;
+SYSCTL_INT(_kern, OID_AUTO, stop_scheduler_on_panic, CTLFLAG_RW | CTLFLAG_TUN,
+ &stop_scheduler_on_panic, 0, "stop scheduler upon entering panic");
+TUNABLE_INT("kern.stop_scheduler_on_panic", &stop_scheduler_on_panic);
+
static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0,
"Shutdown environment");
@@ -138,6 +143,7 @@ SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW,
*/
const char *panicstr;
+int stop_scheduler; /* system stopped CPUs for panic */
int dumping; /* system is dumping */
int rebooting; /* system is rebooting */
static struct dumperinfo dumper; /* our selected dumper */
@@ -294,10 +300,12 @@ kern_reboot(int howto)
* systems don't shutdown properly (i.e., ACPI power off) if we
* run on another processor.
*/
- thread_lock(curthread);
- sched_bind(curthread, 0);
- thread_unlock(curthread);
- KASSERT(PCPU_GET(cpuid) == 0, ("%s: not running on cpu 0", __func__));
+ if (!SCHEDULER_STOPPED()) {
+ thread_lock(curthread);
+ sched_bind(curthread, 0);
+ thread_unlock(curthread);
+ KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0"));
+ }
#endif
/* We're in the process of rebooting. */
rebooting = 1;
@@ -547,13 +555,18 @@ panic(const char *fmt, ...)
{
#ifdef SMP
static volatile u_int panic_cpu = NOCPU;
+ cpuset_t other_cpus;
#endif
struct thread *td = curthread;
int bootopt, newpanic;
va_list ap;
static char buf[256];
- critical_enter();
+ if (stop_scheduler_on_panic)
+ spinlock_enter();
+ else
+ critical_enter();
+
#ifdef SMP
/*
* We don't want multiple CPU's to panic at the same time, so we
@@ -566,6 +579,22 @@ panic(const char *fmt, ...)
PCPU_GET(cpuid)) == 0)
while (panic_cpu != NOCPU)
; /* nothing */
+
+ if (stop_scheduler_on_panic) {
+ if (panicstr == NULL && !kdb_active) {
+ other_cpus = all_cpus;
+ CPU_CLR(PCPU_GET(cpuid), &other_cpus);
+ stop_cpus_hard(other_cpus);
+ }
+
+ /*
+ * We set stop_scheduler here and not in the block above,
+ * because we want to ensure that if panic has been called and
+ * stop_scheduler_on_panic is true, then stop_scheduler will
+ * always be set. Even if panic has been entered from kdb.
+ */
+ stop_scheduler = 1;
+ }
#endif
bootopt = RB_AUTOBOOT;
@@ -604,7 +633,8 @@ panic(const char *fmt, ...)
/* thread_unlock(td); */
if (!sync_on_panic)
bootopt |= RB_NOSYNC;
- critical_exit();
+ if (!stop_scheduler_on_panic)
+ critical_exit();
kern_reboot(bootopt);
}
OpenPOWER on IntegriCloud