summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorattilio <attilio@FreeBSD.org>2009-08-13 17:09:45 +0000
committerattilio <attilio@FreeBSD.org>2009-08-13 17:09:45 +0000
commite85ca71aadcc42a3f7cc90e6cff96c28f1c54290 (patch)
treefa01b2cb8792e972d05922a5507e082efa4d4c1d /sys
parentb1786d80d2f6e85141431d158b4cf3a47a2f71ec (diff)
downloadFreeBSD-src-e85ca71aadcc42a3f7cc90e6cff96c28f1c54290.zip
FreeBSD-src-e85ca71aadcc42a3f7cc90e6cff96c28f1c54290.tar.gz
* Completely Remove the option STOP_NMI from the kernel. This option
has proven to have a good effect when entering KDB by using a NMI, but it completely violates all the good rules about interrupts disabled while holding a spinlock in other occasions. This can be the cause of deadlocks on events where a normal IPI_STOP is expected. * Adds an new IPI called IPI_STOP_HARD on all the supported architectures. This IPI is responsible for sending a stop message among CPUs using a privileged channel when disponible. In other cases it just does match a normal IPI_STOP. Right now the IPI_STOP_HARD functionality uses a NMI on ia32 and amd64 architectures, while on the other has a normal IPI_STOP effect. It is responsibility of maintainers to eventually implement an hard stop when necessary and possible. * Use the new IPI facility in order to implement a new userend SMP kernel function called stop_cpus_hard(). That is specular to stop_cpu() but it does use the privileged channel for the stopping facility. * Let KDB use the newly introduced function stop_cpus_hard() and leave stop_cpus() for all the other cases * Disable interrupts on CPU0 when starting the process of APs suspension. * Style cleanup and comments adding This patch should fix the reboot/shutdown deadlocks many users are constantly reporting on mailing lists. Please don't forget to update your config file with the STOP_NMI option removal Reviewed by: jhb Tested by: pho, bz, rink Approved by: re (kib)
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/local_apic.c13
-rw-r--r--sys/amd64/amd64/mp_machdep.c100
-rw-r--r--sys/amd64/amd64/trap.c2
-rw-r--r--sys/amd64/conf/GENERIC1
-rw-r--r--sys/amd64/conf/NOTES5
-rw-r--r--sys/amd64/conf/XENHVM1
-rw-r--r--sys/amd64/include/apicvar.h6
-rw-r--r--sys/amd64/include/smp.h5
-rw-r--r--sys/conf/options.amd641
-rw-r--r--sys/conf/options.i3861
-rw-r--r--sys/conf/options.pc981
-rw-r--r--sys/i386/conf/GENERIC1
-rw-r--r--sys/i386/conf/NOTES1
-rw-r--r--sys/i386/i386/local_apic.c13
-rw-r--r--sys/i386/i386/mp_machdep.c99
-rw-r--r--sys/i386/i386/trap.c2
-rw-r--r--sys/i386/include/apicvar.h7
-rw-r--r--sys/i386/include/smp.h6
-rw-r--r--sys/i386/xen/mp_machdep.c2
-rw-r--r--sys/ia64/ia64/interrupt.c2
-rw-r--r--sys/ia64/include/smp.h1
-rw-r--r--sys/kern/kern_shutdown.c10
-rw-r--r--sys/kern/subr_kdb.c7
-rw-r--r--sys/kern/subr_smp.c25
-rw-r--r--sys/mips/include/smp.h1
-rw-r--r--sys/mips/mips/mp_machdep.c7
-rw-r--r--sys/pc98/conf/NOTES4
-rw-r--r--sys/powerpc/include/smp.h1
-rw-r--r--sys/powerpc/powerpc/mp_machdep.c8
-rw-r--r--sys/sparc64/include/smp.h1
-rw-r--r--sys/sun4v/include/smp.h1
-rw-r--r--sys/sys/smp.h1
32 files changed, 142 insertions, 194 deletions
diff --git a/sys/amd64/amd64/local_apic.c b/sys/amd64/amd64/local_apic.c
index 14559f3..cd3073c 100644
--- a/sys/amd64/amd64/local_apic.c
+++ b/sys/amd64/amd64/local_apic.c
@@ -1238,8 +1238,17 @@ lapic_ipi_vectored(u_int vector, int dest)
KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
("%s: invalid vector %d", __func__, vector));
- icrlo = vector | APIC_DELMODE_FIXED | APIC_DESTMODE_PHY |
- APIC_LEVEL_DEASSERT | APIC_TRIGMOD_EDGE;
+ icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE;
+
+ /*
+ * IPI_STOP_HARD is just a "fake" vector used to send a NMI.
+ * Use special rules regard NMI if passed, otherwise specify
+ * the vector.
+ */
+ if (vector == IPI_STOP_HARD)
+ icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT;
+ else
+ icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT;
destfield = 0;
switch (dest) {
case APIC_IPI_DEST_SELF:
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 52c209c..0ef8017 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -114,31 +114,12 @@ volatile int smp_tlb_wait;
extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
-#ifdef STOP_NMI
-static volatile cpumask_t ipi_nmi_pending;
-
-static void ipi_nmi_selected(cpumask_t cpus);
-#endif
-
/*
* Local data and functions.
*/
-#ifdef STOP_NMI
-/*
- * Provide an alternate method of stopping other CPUs. If another CPU has
- * disabled interrupts the conventional STOP IPI will be blocked. This
- * NMI-based stop should get through in that case.
- */
-static int stop_cpus_with_nmi = 1;
-SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW,
- &stop_cpus_with_nmi, 0, "");
-TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi);
-#else
-#define stop_cpus_with_nmi 0
-#endif
-
static u_int logical_cpus;
+static volatile cpumask_t ipi_nmi_pending;
/* used to hold the AP's until we are ready to release them */
static struct mtx ap_boot_mtx;
@@ -1158,12 +1139,14 @@ ipi_selected(cpumask_t cpus, u_int ipi)
ipi = IPI_BITMAP_VECTOR;
}
-#ifdef STOP_NMI
- if (ipi == IPI_STOP && stop_cpus_with_nmi) {
- ipi_nmi_selected(cpus);
- return;
- }
-#endif
+ /*
+ * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+ * of help in order to understand what is the source.
+ * Set the mask of receiving CPUs for this purpose.
+ */
+ if (ipi == IPI_STOP_HARD)
+ atomic_set_int(&ipi_nmi_pending, cpus);
+
CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
while ((cpu = ffs(cpus)) != 0) {
cpu--;
@@ -1194,64 +1177,43 @@ void
ipi_all_but_self(u_int ipi)
{
- if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
+ if (IPI_IS_BITMAPED(ipi)) {
ipi_selected(PCPU_GET(other_cpus), ipi);
return;
}
- CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
- lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
-}
-#ifdef STOP_NMI
-/*
- * send NMI IPI to selected CPUs
- */
-
-#define BEFORE_SPIN 1000000
-
-static void
-ipi_nmi_selected(cpumask_t cpus)
-{
- int cpu;
- register_t icrlo;
-
- icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT
- | APIC_TRIGMOD_EDGE;
-
- CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
-
- atomic_set_int(&ipi_nmi_pending, cpus);
-
- while ((cpu = ffs(cpus)) != 0) {
- cpu--;
- cpus &= ~(1 << cpu);
-
- KASSERT(cpu_apic_ids[cpu] != -1,
- ("IPI NMI to non-existent CPU %d", cpu));
-
- /* Wait for an earlier IPI to finish. */
- if (!lapic_ipi_wait(BEFORE_SPIN))
- panic("ipi_nmi_selected: previous IPI has not cleared");
+ /*
+ * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+ * of help in order to understand what is the source.
+ * Set the mask of receiving CPUs for this purpose.
+ */
+ if (ipi == IPI_STOP_HARD)
+ atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
- lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]);
- }
+ CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+ lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
}
int
-ipi_nmi_handler(void)
+ipi_nmi_handler()
{
- int cpumask = PCPU_GET(cpumask);
+ cpumask_t cpumask;
- if (!(ipi_nmi_pending & cpumask))
- return 1;
+ /*
+ * As long as there is not a simple way to know about a NMI's
+ * source, if the bitmask for the current CPU is present in
+ * the global pending bitword an IPI_STOP_HARD has been issued
+ * and should be handled.
+ */
+ cpumask = PCPU_GET(cpumask);
+ if ((ipi_nmi_pending & cpumask) == 0)
+ return (1);
atomic_clear_int(&ipi_nmi_pending, cpumask);
cpustop_handler();
- return 0;
+ return (0);
}
-#endif /* STOP_NMI */
-
/*
* Handle an IPI_STOP by saving our current context and spinning until we
* are resumed.
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index fee3caf..323e8d1 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -239,13 +239,11 @@ trap(struct trapframe *frame)
type = frame->tf_trapno;
#ifdef SMP
-#ifdef STOP_NMI
/* Handler for NMI IPIs used for stopping CPUs. */
if (type == T_NMI) {
if (ipi_nmi_handler() == 0)
goto out;
}
-#endif /* STOP_NMI */
#endif /* SMP */
#ifdef KDB
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index 73a4fb6..a49f7bc 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -69,7 +69,6 @@ options P1003_1B_SEMAPHORES # POSIX-style semaphores
options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed.
options KBD_INSTALL_CDEV # install a CDEV entry in /dev
-options STOP_NMI # Stop CPUS using NMI instead of IPI
options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4)
options AUDIT # Security event auditing
options MAC # TrustedBSD MAC Framework
diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES
index 088a381..27fe068 100644
--- a/sys/amd64/conf/NOTES
+++ b/sys/amd64/conf/NOTES
@@ -30,11 +30,6 @@ device mptable # Optional MPSPEC mptable support
#
options MP_WATCHDOG
-#
-# Debugging options.
-#
-options STOP_NMI # Stop CPUS using NMI instead of IPI
-
#####################################################################
diff --git a/sys/amd64/conf/XENHVM b/sys/amd64/conf/XENHVM
index 5e108d5..1536e3c 100644
--- a/sys/amd64/conf/XENHVM
+++ b/sys/amd64/conf/XENHVM
@@ -68,7 +68,6 @@ options SYSVMSG # SYSV-style message queues
options SYSVSEM # SYSV-style semaphores
options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
options KBD_INSTALL_CDEV # install a CDEV entry in /dev
-options STOP_NMI # Stop CPUS using NMI instead of IPI
options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4)
options AUDIT # Security event auditing
#options KDTRACE_FRAME # Ensure frames are compiled in
diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h
index 84ba3b8..73fff6c 100644
--- a/sys/amd64/include/apicvar.h
+++ b/sys/amd64/include/apicvar.h
@@ -102,11 +102,6 @@
* smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user
* at a time) The second group uses a single interrupt and a bitmap to avoid
* redundant IPI interrupts.
- *
- * Right now IPI_STOP used by kdb shares the interrupt priority class with
- * the two IPI groups mentioned above. As such IPI_STOP may cause a deadlock.
- * Eventually IPI_STOP should use NMI IPIs - this would eliminate this and
- * other deadlocks caused by IPI_STOP.
*/
/* Interrupts for local APIC LVT entries other than the timer. */
@@ -134,6 +129,7 @@
#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */
#define IPI_SUSPEND (APIC_IPI_INTS + 8) /* Suspend CPU until restarted. */
+#define IPI_STOP_HARD (APIC_IPI_INTS + 9) /* Stop CPU with a NMI. */
/*
* The spurious interrupt can share the priority class with the IPIs since
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index d295715..1cc21a4 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -52,6 +52,7 @@ void cpu_add(u_int apic_id, char boot_cpu);
void cpustop_handler(void);
void cpususpend_handler(void);
void init_secondary(void);
+int ipi_nmi_handler(void);
void ipi_selected(cpumask_t cpus, u_int ipi);
void ipi_all_but_self(u_int ipi);
void ipi_bitmap_handler(struct trapframe frame);
@@ -66,10 +67,6 @@ void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva,
void smp_invltlb(void);
void smp_masked_invltlb(cpumask_t mask);
-#ifdef STOP_NMI
-int ipi_nmi_handler(void);
-#endif
-
#endif /* !LOCORE */
#endif /* SMP */
diff --git a/sys/conf/options.amd64 b/sys/conf/options.amd64
index 5247921..beb97ed 100644
--- a/sys/conf/options.amd64
+++ b/sys/conf/options.amd64
@@ -52,7 +52,6 @@ PSM_DEBUG opt_psm.h
DEV_ATPIC opt_atpic.h
# Debugging
-STOP_NMI opt_cpu.h
KDTRACE_FRAME opt_kdtrace.h
# BPF just-in-time compiler
diff --git a/sys/conf/options.i386 b/sys/conf/options.i386
index 45a1637..cd2ab98 100644
--- a/sys/conf/options.i386
+++ b/sys/conf/options.i386
@@ -110,7 +110,6 @@ ASR_COMPAT opt_asr.h
# Debugging
NPX_DEBUG opt_npx.h
-STOP_NMI opt_cpu.h
# BPF just-in-time compiler
BPF_JITTER opt_bpf.h
diff --git a/sys/conf/options.pc98 b/sys/conf/options.pc98
index 837169b..dca3d69 100644
--- a/sys/conf/options.pc98
+++ b/sys/conf/options.pc98
@@ -95,7 +95,6 @@ DEV_NPX opt_npx.h
# Debugging
NPX_DEBUG opt_npx.h
-STOP_NMI opt_cpu.h
AGP_DEBUG opt_agp.h
# BPF just-in-time compiler
diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC
index 02f5a36..ef958af 100644
--- a/sys/i386/conf/GENERIC
+++ b/sys/i386/conf/GENERIC
@@ -70,7 +70,6 @@ options P1003_1B_SEMAPHORES # POSIX-style semaphores
options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed.
options KBD_INSTALL_CDEV # install a CDEV entry in /dev
-options STOP_NMI # Stop CPUS using NMI instead of IPI
options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4)
options AUDIT # Security event auditing
options MAC # TrustedBSD MAC Framework
diff --git a/sys/i386/conf/NOTES b/sys/i386/conf/NOTES
index f442e24..f772b25 100644
--- a/sys/i386/conf/NOTES
+++ b/sys/i386/conf/NOTES
@@ -49,7 +49,6 @@ options MP_WATCHDOG
# Debugging options.
#
-options STOP_NMI # Stop CPUS using NMI instead of IPI
options COUNT_XINVLTLB_HITS # Counters for TLB events
options COUNT_IPIS # Per-CPU IPI interrupt counters
diff --git a/sys/i386/i386/local_apic.c b/sys/i386/i386/local_apic.c
index 6b350e2..2cc6a45 100644
--- a/sys/i386/i386/local_apic.c
+++ b/sys/i386/i386/local_apic.c
@@ -1248,8 +1248,17 @@ lapic_ipi_vectored(u_int vector, int dest)
KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
("%s: invalid vector %d", __func__, vector));
- icrlo = vector | APIC_DELMODE_FIXED | APIC_DESTMODE_PHY |
- APIC_LEVEL_DEASSERT | APIC_TRIGMOD_EDGE;
+ icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE;
+
+ /*
+ * IPI_STOP_HARD is just a "fake" vector used to send a NMI.
+ * Use special rules regard NMI if passed, otherwise specify
+ * the vector.
+ */
+ if (vector == IPI_STOP_HARD)
+ icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT;
+ else
+ icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT;
destfield = 0;
switch (dest) {
case APIC_IPI_DEST_SELF:
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 0bfe91d..6729288 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -155,12 +155,6 @@ vm_offset_t smp_tlb_addr1;
vm_offset_t smp_tlb_addr2;
volatile int smp_tlb_wait;
-#ifdef STOP_NMI
-static volatile cpumask_t ipi_nmi_pending;
-
-static void ipi_nmi_selected(cpumask_t cpus);
-#endif
-
#ifdef COUNT_IPIS
/* Interrupt counts. */
static u_long *ipi_preempt_counts[MAXCPU];
@@ -177,21 +171,8 @@ u_long *ipi_lazypmap_counts[MAXCPU];
* Local data and functions.
*/
-#ifdef STOP_NMI
-/*
- * Provide an alternate method of stopping other CPUs. If another CPU has
- * disabled interrupts the conventional STOP IPI will be blocked. This
- * NMI-based stop should get through in that case.
- */
-static int stop_cpus_with_nmi = 1;
-SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW,
- &stop_cpus_with_nmi, 0, "");
-TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi);
-#else
-#define stop_cpus_with_nmi 0
-#endif
-
static u_int logical_cpus;
+static volatile cpumask_t ipi_nmi_pending;
/* used to hold the AP's until we are ready to release them */
static struct mtx ap_boot_mtx;
@@ -1318,12 +1299,14 @@ ipi_selected(cpumask_t cpus, u_int ipi)
ipi = IPI_BITMAP_VECTOR;
}
-#ifdef STOP_NMI
- if (ipi == IPI_STOP && stop_cpus_with_nmi) {
- ipi_nmi_selected(cpus);
- return;
- }
-#endif
+ /*
+ * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+ * of help in order to understand what is the source.
+ * Set the mask of receiving CPUs for this purpose.
+ */
+ if (ipi == IPI_STOP_HARD)
+ atomic_set_int(&ipi_nmi_pending, cpus);
+
CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
while ((cpu = ffs(cpus)) != 0) {
cpu--;
@@ -1354,64 +1337,42 @@ void
ipi_all_but_self(u_int ipi)
{
- if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
+ if (IPI_IS_BITMAPED(ipi)) {
ipi_selected(PCPU_GET(other_cpus), ipi);
return;
}
+
+ /*
+ * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+ * of help in order to understand what is the source.
+ * Set the mask of receiving CPUs for this purpose.
+ */
+ if (ipi == IPI_STOP_HARD)
+ atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
}
-#ifdef STOP_NMI
-/*
- * send NMI IPI to selected CPUs
- */
-
-#define BEFORE_SPIN 1000000
-
-void
-ipi_nmi_selected(cpumask_t cpus)
-{
- int cpu;
- register_t icrlo;
-
- icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT
- | APIC_TRIGMOD_EDGE;
-
- CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
-
- atomic_set_int(&ipi_nmi_pending, cpus);
-
- while ((cpu = ffs(cpus)) != 0) {
- cpu--;
- cpus &= ~(1 << cpu);
-
- KASSERT(cpu_apic_ids[cpu] != -1,
- ("IPI NMI to non-existent CPU %d", cpu));
-
- /* Wait for an earlier IPI to finish. */
- if (!lapic_ipi_wait(BEFORE_SPIN))
- panic("ipi_nmi_selected: previous IPI has not cleared");
-
- lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]);
- }
-}
-
int
-ipi_nmi_handler(void)
+ipi_nmi_handler()
{
- int cpumask = PCPU_GET(cpumask);
+ cpumask_t cpumask;
- if (!(ipi_nmi_pending & cpumask))
- return 1;
+ /*
+ * As long as there is not a simple way to know about a NMI's
+ * source, if the bitmask for the current CPU is present in
+ * the global pending bitword an IPI_STOP_HARD has been issued
+ * and should be handled.
+ */
+ cpumask = PCPU_GET(cpumask);
+ if ((ipi_nmi_pending & cpumask) == 0)
+ return (1);
atomic_clear_int(&ipi_nmi_pending, cpumask);
cpustop_handler();
- return 0;
+ return (0);
}
-#endif /* STOP_NMI */
-
/*
* Handle an IPI_STOP by saving our current context and spinning until we
* are resumed.
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index e967104..354d791 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -211,13 +211,11 @@ trap(struct trapframe *frame)
type = frame->tf_trapno;
#ifdef SMP
-#ifdef STOP_NMI
/* Handler for NMI IPIs used for stopping CPUs. */
if (type == T_NMI) {
if (ipi_nmi_handler() == 0)
goto out;
}
-#endif /* STOP_NMI */
#endif /* SMP */
#ifdef KDB
diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h
index a03c083..a13766f 100644
--- a/sys/i386/include/apicvar.h
+++ b/sys/i386/include/apicvar.h
@@ -100,11 +100,6 @@
* smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user
* at a time) The second group uses a single interrupt and a bitmap to avoid
* redundant IPI interrupts.
- *
- * Right now IPI_STOP used by kdb shares the interrupt priority class with
- * the two IPI groups mentioned above. As such IPI_STOP may cause a deadlock.
- * Eventually IPI_STOP should use NMI IPIs - this would eliminate this and
- * other deadlocks caused by IPI_STOP.
*/
/* Interrupts for local APIC LVT entries other than the timer. */
@@ -134,6 +129,7 @@
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */
+#define IPI_STOP_HARD (APIC_IPI_INTS + 8) /* Stop CPU with a NMI. */
#else /* XEN */
/* These are the normal i386 APIC definitions */
@@ -161,6 +157,7 @@
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */
+#define IPI_STOP_HARD (APIC_IPI_INTS + 8) /* Stop CPU with a NMI. */
#endif /* XEN */
/*
diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h
index 917c285..968cdb4 100644
--- a/sys/i386/include/smp.h
+++ b/sys/i386/include/smp.h
@@ -60,7 +60,8 @@ inthand_t
void cpu_add(u_int apic_id, char boot_cpu);
void cpustop_handler(void);
void init_secondary(void);
-void ipi_selected(u_int cpus, u_int ipi);
+int ipi_nmi_handler(void);
+void ipi_selected(cpumask_t cpus, u_int ipi);
void ipi_all_but_self(u_int ipi);
#ifndef XEN
void ipi_bitmap_handler(struct trapframe frame);
@@ -76,9 +77,6 @@ void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva,
void smp_invltlb(void);
void smp_masked_invltlb(cpumask_t mask);
-#ifdef STOP_NMI
-int ipi_nmi_handler(void);
-#endif
#ifdef XEN
void ipi_to_irq_init(void);
diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c
index 3aa03ce..bae07d4 100644
--- a/sys/i386/xen/mp_machdep.c
+++ b/sys/i386/xen/mp_machdep.c
@@ -90,8 +90,6 @@ __FBSDID("$FreeBSD$");
#include <xen/hypervisor.h>
#include <xen/interface/vcpu.h>
-#define stop_cpus_with_nmi 0
-
int mp_naps; /* # of Applications processors */
int boot_cpu_id = -1; /* designated BSP */
diff --git a/sys/ia64/ia64/interrupt.c b/sys/ia64/ia64/interrupt.c
index 0c50b48..b70a807 100644
--- a/sys/ia64/ia64/interrupt.c
+++ b/sys/ia64/ia64/interrupt.c
@@ -145,6 +145,8 @@ interrupt(struct trapframe *tf)
/*
* Handle ExtINT interrupts by generating an INTA cycle to
* read the vector.
+ * IPI_STOP_HARD is mapped to IPI_STOP so it is not necessary
+ * to add it to this switch-like construct.
*/
if (vector == 0) {
inta = ib->ib_inta;
diff --git a/sys/ia64/include/smp.h b/sys/ia64/include/smp.h
index c6d98f7..4eddf74 100644
--- a/sys/ia64/include/smp.h
+++ b/sys/ia64/include/smp.h
@@ -21,6 +21,7 @@
#define IPI_AST 4
#define IPI_RENDEZVOUS 5
#define IPI_STOP 6
+#define IPI_STOP_HARD 6
#define IPI_PREEMPT 7
#define IPI_COUNT 8
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index 80dda97..0f3a672 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -412,9 +412,6 @@ boot(int howto)
*/
EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
- /* XXX This doesn't disable interrupts any more. Reconsider? */
- splhigh();
-
if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping)
doadump();
@@ -488,6 +485,13 @@ static void
shutdown_reset(void *junk, int howto)
{
+ /*
+ * Disable interrupts on CPU0 in order to avoid fast handlers
+ * to preempt the stopping process and to deadlock against other
+ * CPUs.
+ */
+ spinlock_enter();
+
printf("Rebooting...\n");
DELAY(1000000); /* wait 1 sec for printf's to complete and be read */
/* cpu_boot(howto); */ /* doesn't do anything at the moment */
diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c
index e6af53e..3e77db7 100644
--- a/sys/kern/subr_kdb.c
+++ b/sys/kern/subr_kdb.c
@@ -88,7 +88,8 @@ SYSCTL_PROC(_debug_kdb, OID_AUTO, trap_code, CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
* Flag indicating whether or not to IPI the other CPUs to stop them on
* entering the debugger. Sometimes, this will result in a deadlock as
* stop_cpus() waits for the other cpus to stop, so we allow it to be
- * disabled.
+ * disabled. In order to maximize the chances of success, use a hard
+ * stop for that.
*/
#ifdef SMP
static int kdb_stop_cpus = 1;
@@ -226,7 +227,7 @@ kdb_panic(const char *msg)
{
#ifdef SMP
- stop_cpus(PCPU_GET(other_cpus));
+ stop_cpus_hard(PCPU_GET(other_cpus));
#endif
printf("KDB: panic\n");
panic(msg);
@@ -518,7 +519,7 @@ kdb_trap(int type, int code, struct trapframe *tf)
#ifdef SMP
if ((did_stop_cpus = kdb_stop_cpus) != 0)
- stop_cpus(PCPU_GET(other_cpus));
+ stop_cpus_hard(PCPU_GET(other_cpus));
#endif
kdb_active++;
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index d64e806..d28001f 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -233,18 +233,21 @@ forward_roundrobin(void)
* XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
* from executing at same time.
*/
-int
-stop_cpus(cpumask_t map)
+static int
+generic_stop_cpus(cpumask_t map, u_int type)
{
int i;
+ KASSERT(type == IPI_STOP || type == IPI_STOP_HARD,
+ ("%s: invalid stop type", __func__));
+
if (!smp_started)
return 0;
- CTR1(KTR_SMP, "stop_cpus(%x)", map);
+ CTR2(KTR_SMP, "stop_cpus(%x) with %u type", map, type);
/* send the stop IPI to all CPUs in map */
- ipi_selected(map, IPI_STOP);
+ ipi_selected(map, type);
i = 0;
while ((stopped_cpus & map) != map) {
@@ -262,6 +265,20 @@ stop_cpus(cpumask_t map)
return 1;
}
+int
+stop_cpus(cpumask_t map)
+{
+
+ return (generic_stop_cpus(map, IPI_STOP));
+}
+
+int
+stop_cpus_hard(cpumask_t map)
+{
+
+ return (generic_stop_cpus(map, IPI_STOP_HARD));
+}
+
#if defined(__amd64__)
/*
* When called the executing CPU will send an IPI to all other CPUs
diff --git a/sys/mips/include/smp.h b/sys/mips/include/smp.h
index 798beed..d614dd3 100644
--- a/sys/mips/include/smp.h
+++ b/sys/mips/include/smp.h
@@ -24,6 +24,7 @@
#define IPI_RENDEZVOUS 0x0002
#define IPI_AST 0x0004
#define IPI_STOP 0x0008
+#define IPI_STOP_HARD 0x0008
#ifndef LOCORE
diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c
index d688a52..bf32392 100644
--- a/sys/mips/mips/mp_machdep.c
+++ b/sys/mips/mips/mp_machdep.c
@@ -129,7 +129,12 @@ smp_handle_ipi(struct trapframe *frame)
break;
case IPI_STOP:
- CTR0(KTR_SMP, "IPI_STOP");
+
+ /*
+ * IPI_STOP_HARD is mapped to IPI_STOP so it is not
+ * necessary to add it in the switch.
+ */
+ CTR0(KTR_SMP, "IPI_STOP or IPI_STOP_HARD");
atomic_set_int(&stopped_cpus, cpumask);
while ((started_cpus & cpumask) == 0)
diff --git a/sys/pc98/conf/NOTES b/sys/pc98/conf/NOTES
index 02f8d07..9ab70b9 100644
--- a/sys/pc98/conf/NOTES
+++ b/sys/pc98/conf/NOTES
@@ -29,10 +29,6 @@ device apic # I/O apic
#
options MP_WATCHDOG
-# Debugging options.
-#
-options STOP_NMI # Stop CPUS using NMI instead of IPI
-
#####################################################################
diff --git a/sys/powerpc/include/smp.h b/sys/powerpc/include/smp.h
index 3929b8c..0e5ec16 100644
--- a/sys/powerpc/include/smp.h
+++ b/sys/powerpc/include/smp.h
@@ -35,6 +35,7 @@
#define IPI_PREEMPT 1
#define IPI_RENDEZVOUS 2
#define IPI_STOP 3
+#define IPI_STOP_HARD 3
#ifndef LOCORE
diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c
index 2c6d11b..1ae7d6d 100644
--- a/sys/powerpc/powerpc/mp_machdep.c
+++ b/sys/powerpc/powerpc/mp_machdep.c
@@ -281,7 +281,13 @@ powerpc_ipi_handler(void *arg)
smp_rendezvous_action();
break;
case IPI_STOP:
- CTR1(KTR_SMP, "%s: IPI_STOP (stop)", __func__);
+
+ /*
+ * IPI_STOP_HARD is mapped to IPI_STOP so it is not
+ * necessary to add such case in the switch.
+ */
+ CTR1(KTR_SMP, "%s: IPI_STOP or IPI_STOP_HARD (stop)",
+ __func__);
self = PCPU_GET(cpumask);
savectx(PCPU_GET(curpcb));
atomic_set_int(&stopped_cpus, self);
diff --git a/sys/sparc64/include/smp.h b/sys/sparc64/include/smp.h
index 8eb5636..8735543 100644
--- a/sys/sparc64/include/smp.h
+++ b/sys/sparc64/include/smp.h
@@ -56,6 +56,7 @@
#define IPI_RENDEZVOUS PIL_RENDEZVOUS
#define IPI_PREEMPT PIL_PREEMPT
#define IPI_STOP PIL_STOP
+#define IPI_STOP_HARD PIL_STOP
#define IPI_RETRIES 5000
diff --git a/sys/sun4v/include/smp.h b/sys/sun4v/include/smp.h
index 4f5adc5..63a8e01 100644
--- a/sys/sun4v/include/smp.h
+++ b/sys/sun4v/include/smp.h
@@ -44,6 +44,7 @@
#define IPI_AST PIL_AST
#define IPI_RENDEZVOUS PIL_RENDEZVOUS
#define IPI_STOP PIL_STOP
+#define IPI_STOP_HARD PIL_STOP
#define IPI_PREEMPT PIL_PREEMPT
diff --git a/sys/sys/smp.h b/sys/sys/smp.h
index 05c8642..d80b9e4 100644
--- a/sys/sys/smp.h
+++ b/sys/sys/smp.h
@@ -123,6 +123,7 @@ void forward_signal(struct thread *);
void forward_roundrobin(void);
int restart_cpus(cpumask_t);
int stop_cpus(cpumask_t);
+int stop_cpus_hard(cpumask_t);
#if defined(__amd64__)
int suspend_cpus(cpumask_t);
#endif
OpenPOWER on IntegriCloud