From e85ca71aadcc42a3f7cc90e6cff96c28f1c54290 Mon Sep 17 00:00:00 2001 From: attilio Date: Thu, 13 Aug 2009 17:09:45 +0000 Subject: * Completely Remove the option STOP_NMI from the kernel. This option has proven to have a good effect when entering KDB by using a NMI, but it completely violates all the good rules about interrupts disabled while holding a spinlock in other occasions. This can be the cause of deadlocks on events where a normal IPI_STOP is expected. * Adds an new IPI called IPI_STOP_HARD on all the supported architectures. This IPI is responsible for sending a stop message among CPUs using a privileged channel when disponible. In other cases it just does match a normal IPI_STOP. Right now the IPI_STOP_HARD functionality uses a NMI on ia32 and amd64 architectures, while on the other has a normal IPI_STOP effect. It is responsibility of maintainers to eventually implement an hard stop when necessary and possible. * Use the new IPI facility in order to implement a new userend SMP kernel function called stop_cpus_hard(). That is specular to stop_cpu() but it does use the privileged channel for the stopping facility. * Let KDB use the newly introduced function stop_cpus_hard() and leave stop_cpus() for all the other cases * Disable interrupts on CPU0 when starting the process of APs suspension. * Style cleanup and comments adding This patch should fix the reboot/shutdown deadlocks many users are constantly reporting on mailing lists. Please don't forget to update your config file with the STOP_NMI option removal Reviewed by: jhb Tested by: pho, bz, rink Approved by: re (kib) --- sys/amd64/amd64/local_apic.c | 13 ++++- sys/amd64/amd64/mp_machdep.c | 100 ++++++++++++--------------------------- sys/amd64/amd64/trap.c | 2 - sys/amd64/conf/GENERIC | 1 - sys/amd64/conf/NOTES | 5 -- sys/amd64/conf/XENHVM | 1 - sys/amd64/include/apicvar.h | 6 +-- sys/amd64/include/smp.h | 5 +- sys/conf/options.amd64 | 1 - sys/conf/options.i386 | 1 - sys/conf/options.pc98 | 1 - sys/i386/conf/GENERIC | 1 - sys/i386/conf/NOTES | 1 - sys/i386/i386/local_apic.c | 13 ++++- sys/i386/i386/mp_machdep.c | 99 ++++++++++++-------------------------- sys/i386/i386/trap.c | 2 - sys/i386/include/apicvar.h | 7 +-- sys/i386/include/smp.h | 6 +-- sys/i386/xen/mp_machdep.c | 2 - sys/ia64/ia64/interrupt.c | 2 + sys/ia64/include/smp.h | 1 + sys/kern/kern_shutdown.c | 10 ++-- sys/kern/subr_kdb.c | 7 +-- sys/kern/subr_smp.c | 25 ++++++++-- sys/mips/include/smp.h | 1 + sys/mips/mips/mp_machdep.c | 7 ++- sys/pc98/conf/NOTES | 4 -- sys/powerpc/include/smp.h | 1 + sys/powerpc/powerpc/mp_machdep.c | 8 +++- sys/sparc64/include/smp.h | 1 + sys/sun4v/include/smp.h | 1 + sys/sys/smp.h | 1 + 32 files changed, 142 insertions(+), 194 deletions(-) (limited to 'sys') diff --git a/sys/amd64/amd64/local_apic.c b/sys/amd64/amd64/local_apic.c index 14559f3..cd3073c 100644 --- a/sys/amd64/amd64/local_apic.c +++ b/sys/amd64/amd64/local_apic.c @@ -1238,8 +1238,17 @@ lapic_ipi_vectored(u_int vector, int dest) KASSERT((vector & ~APIC_VECTOR_MASK) == 0, ("%s: invalid vector %d", __func__, vector)); - icrlo = vector | APIC_DELMODE_FIXED | APIC_DESTMODE_PHY | - APIC_LEVEL_DEASSERT | APIC_TRIGMOD_EDGE; + icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE; + + /* + * IPI_STOP_HARD is just a "fake" vector used to send a NMI. + * Use special rules regard NMI if passed, otherwise specify + * the vector. + */ + if (vector == IPI_STOP_HARD) + icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT; + else + icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT; destfield = 0; switch (dest) { case APIC_IPI_DEST_SELF: diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 52c209c..0ef8017 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -114,31 +114,12 @@ volatile int smp_tlb_wait; extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); -#ifdef STOP_NMI -static volatile cpumask_t ipi_nmi_pending; - -static void ipi_nmi_selected(cpumask_t cpus); -#endif - /* * Local data and functions. */ -#ifdef STOP_NMI -/* - * Provide an alternate method of stopping other CPUs. If another CPU has - * disabled interrupts the conventional STOP IPI will be blocked. This - * NMI-based stop should get through in that case. - */ -static int stop_cpus_with_nmi = 1; -SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW, - &stop_cpus_with_nmi, 0, ""); -TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi); -#else -#define stop_cpus_with_nmi 0 -#endif - static u_int logical_cpus; +static volatile cpumask_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -1158,12 +1139,14 @@ ipi_selected(cpumask_t cpus, u_int ipi) ipi = IPI_BITMAP_VECTOR; } -#ifdef STOP_NMI - if (ipi == IPI_STOP && stop_cpus_with_nmi) { - ipi_nmi_selected(cpus); - return; - } -#endif + /* + * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit + * of help in order to understand what is the source. + * Set the mask of receiving CPUs for this purpose. + */ + if (ipi == IPI_STOP_HARD) + atomic_set_int(&ipi_nmi_pending, cpus); + CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); while ((cpu = ffs(cpus)) != 0) { cpu--; @@ -1194,64 +1177,43 @@ void ipi_all_but_self(u_int ipi) { - if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { + if (IPI_IS_BITMAPED(ipi)) { ipi_selected(PCPU_GET(other_cpus), ipi); return; } - CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); -} -#ifdef STOP_NMI -/* - * send NMI IPI to selected CPUs - */ - -#define BEFORE_SPIN 1000000 - -static void -ipi_nmi_selected(cpumask_t cpus) -{ - int cpu; - register_t icrlo; - - icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT - | APIC_TRIGMOD_EDGE; - - CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus); - - atomic_set_int(&ipi_nmi_pending, cpus); - - while ((cpu = ffs(cpus)) != 0) { - cpu--; - cpus &= ~(1 << cpu); - - KASSERT(cpu_apic_ids[cpu] != -1, - ("IPI NMI to non-existent CPU %d", cpu)); - - /* Wait for an earlier IPI to finish. */ - if (!lapic_ipi_wait(BEFORE_SPIN)) - panic("ipi_nmi_selected: previous IPI has not cleared"); + /* + * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit + * of help in order to understand what is the source. + * Set the mask of receiving CPUs for this purpose. + */ + if (ipi == IPI_STOP_HARD) + atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); - lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]); - } + CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); + lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); } int -ipi_nmi_handler(void) +ipi_nmi_handler() { - int cpumask = PCPU_GET(cpumask); + cpumask_t cpumask; - if (!(ipi_nmi_pending & cpumask)) - return 1; + /* + * As long as there is not a simple way to know about a NMI's + * source, if the bitmask for the current CPU is present in + * the global pending bitword an IPI_STOP_HARD has been issued + * and should be handled. + */ + cpumask = PCPU_GET(cpumask); + if ((ipi_nmi_pending & cpumask) == 0) + return (1); atomic_clear_int(&ipi_nmi_pending, cpumask); cpustop_handler(); - return 0; + return (0); } -#endif /* STOP_NMI */ - /* * Handle an IPI_STOP by saving our current context and spinning until we * are resumed. diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index fee3caf..323e8d1 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -239,13 +239,11 @@ trap(struct trapframe *frame) type = frame->tf_trapno; #ifdef SMP -#ifdef STOP_NMI /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI) { if (ipi_nmi_handler() == 0) goto out; } -#endif /* STOP_NMI */ #endif /* SMP */ #ifdef KDB diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 73a4fb6..a49f7bc 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -69,7 +69,6 @@ options P1003_1B_SEMAPHORES # POSIX-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev -options STOP_NMI # Stop CPUS using NMI instead of IPI options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options MAC # TrustedBSD MAC Framework diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES index 088a381..27fe068 100644 --- a/sys/amd64/conf/NOTES +++ b/sys/amd64/conf/NOTES @@ -30,11 +30,6 @@ device mptable # Optional MPSPEC mptable support # options MP_WATCHDOG -# -# Debugging options. -# -options STOP_NMI # Stop CPUS using NMI instead of IPI - ##################################################################### diff --git a/sys/amd64/conf/XENHVM b/sys/amd64/conf/XENHVM index 5e108d5..1536e3c 100644 --- a/sys/amd64/conf/XENHVM +++ b/sys/amd64/conf/XENHVM @@ -68,7 +68,6 @@ options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options KBD_INSTALL_CDEV # install a CDEV entry in /dev -options STOP_NMI # Stop CPUS using NMI instead of IPI options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing #options KDTRACE_FRAME # Ensure frames are compiled in diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h index 84ba3b8..73fff6c 100644 --- a/sys/amd64/include/apicvar.h +++ b/sys/amd64/include/apicvar.h @@ -102,11 +102,6 @@ * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user * at a time) The second group uses a single interrupt and a bitmap to avoid * redundant IPI interrupts. - * - * Right now IPI_STOP used by kdb shares the interrupt priority class with - * the two IPI groups mentioned above. As such IPI_STOP may cause a deadlock. - * Eventually IPI_STOP should use NMI IPIs - this would eliminate this and - * other deadlocks caused by IPI_STOP. */ /* Interrupts for local APIC LVT entries other than the timer. */ @@ -134,6 +129,7 @@ #define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ #define IPI_SUSPEND (APIC_IPI_INTS + 8) /* Suspend CPU until restarted. */ +#define IPI_STOP_HARD (APIC_IPI_INTS + 9) /* Stop CPU with a NMI. */ /* * The spurious interrupt can share the priority class with the IPIs since diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index d295715..1cc21a4 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -52,6 +52,7 @@ void cpu_add(u_int apic_id, char boot_cpu); void cpustop_handler(void); void cpususpend_handler(void); void init_secondary(void); +int ipi_nmi_handler(void); void ipi_selected(cpumask_t cpus, u_int ipi); void ipi_all_but_self(u_int ipi); void ipi_bitmap_handler(struct trapframe frame); @@ -66,10 +67,6 @@ void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva, void smp_invltlb(void); void smp_masked_invltlb(cpumask_t mask); -#ifdef STOP_NMI -int ipi_nmi_handler(void); -#endif - #endif /* !LOCORE */ #endif /* SMP */ diff --git a/sys/conf/options.amd64 b/sys/conf/options.amd64 index 5247921..beb97ed 100644 --- a/sys/conf/options.amd64 +++ b/sys/conf/options.amd64 @@ -52,7 +52,6 @@ PSM_DEBUG opt_psm.h DEV_ATPIC opt_atpic.h # Debugging -STOP_NMI opt_cpu.h KDTRACE_FRAME opt_kdtrace.h # BPF just-in-time compiler diff --git a/sys/conf/options.i386 b/sys/conf/options.i386 index 45a1637..cd2ab98 100644 --- a/sys/conf/options.i386 +++ b/sys/conf/options.i386 @@ -110,7 +110,6 @@ ASR_COMPAT opt_asr.h # Debugging NPX_DEBUG opt_npx.h -STOP_NMI opt_cpu.h # BPF just-in-time compiler BPF_JITTER opt_bpf.h diff --git a/sys/conf/options.pc98 b/sys/conf/options.pc98 index 837169b..dca3d69 100644 --- a/sys/conf/options.pc98 +++ b/sys/conf/options.pc98 @@ -95,7 +95,6 @@ DEV_NPX opt_npx.h # Debugging NPX_DEBUG opt_npx.h -STOP_NMI opt_cpu.h AGP_DEBUG opt_agp.h # BPF just-in-time compiler diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index 02f5a36..ef958af 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -70,7 +70,6 @@ options P1003_1B_SEMAPHORES # POSIX-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev -options STOP_NMI # Stop CPUS using NMI instead of IPI options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options MAC # TrustedBSD MAC Framework diff --git a/sys/i386/conf/NOTES b/sys/i386/conf/NOTES index f442e24..f772b25 100644 --- a/sys/i386/conf/NOTES +++ b/sys/i386/conf/NOTES @@ -49,7 +49,6 @@ options MP_WATCHDOG # Debugging options. # -options STOP_NMI # Stop CPUS using NMI instead of IPI options COUNT_XINVLTLB_HITS # Counters for TLB events options COUNT_IPIS # Per-CPU IPI interrupt counters diff --git a/sys/i386/i386/local_apic.c b/sys/i386/i386/local_apic.c index 6b350e2..2cc6a45 100644 --- a/sys/i386/i386/local_apic.c +++ b/sys/i386/i386/local_apic.c @@ -1248,8 +1248,17 @@ lapic_ipi_vectored(u_int vector, int dest) KASSERT((vector & ~APIC_VECTOR_MASK) == 0, ("%s: invalid vector %d", __func__, vector)); - icrlo = vector | APIC_DELMODE_FIXED | APIC_DESTMODE_PHY | - APIC_LEVEL_DEASSERT | APIC_TRIGMOD_EDGE; + icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE; + + /* + * IPI_STOP_HARD is just a "fake" vector used to send a NMI. + * Use special rules regard NMI if passed, otherwise specify + * the vector. + */ + if (vector == IPI_STOP_HARD) + icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT; + else + icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT; destfield = 0; switch (dest) { case APIC_IPI_DEST_SELF: diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 0bfe91d..6729288 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -155,12 +155,6 @@ vm_offset_t smp_tlb_addr1; vm_offset_t smp_tlb_addr2; volatile int smp_tlb_wait; -#ifdef STOP_NMI -static volatile cpumask_t ipi_nmi_pending; - -static void ipi_nmi_selected(cpumask_t cpus); -#endif - #ifdef COUNT_IPIS /* Interrupt counts. */ static u_long *ipi_preempt_counts[MAXCPU]; @@ -177,21 +171,8 @@ u_long *ipi_lazypmap_counts[MAXCPU]; * Local data and functions. */ -#ifdef STOP_NMI -/* - * Provide an alternate method of stopping other CPUs. If another CPU has - * disabled interrupts the conventional STOP IPI will be blocked. This - * NMI-based stop should get through in that case. - */ -static int stop_cpus_with_nmi = 1; -SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW, - &stop_cpus_with_nmi, 0, ""); -TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi); -#else -#define stop_cpus_with_nmi 0 -#endif - static u_int logical_cpus; +static volatile cpumask_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -1318,12 +1299,14 @@ ipi_selected(cpumask_t cpus, u_int ipi) ipi = IPI_BITMAP_VECTOR; } -#ifdef STOP_NMI - if (ipi == IPI_STOP && stop_cpus_with_nmi) { - ipi_nmi_selected(cpus); - return; - } -#endif + /* + * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit + * of help in order to understand what is the source. + * Set the mask of receiving CPUs for this purpose. + */ + if (ipi == IPI_STOP_HARD) + atomic_set_int(&ipi_nmi_pending, cpus); + CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); while ((cpu = ffs(cpus)) != 0) { cpu--; @@ -1354,64 +1337,42 @@ void ipi_all_but_self(u_int ipi) { - if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { + if (IPI_IS_BITMAPED(ipi)) { ipi_selected(PCPU_GET(other_cpus), ipi); return; } + + /* + * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit + * of help in order to understand what is the source. + * Set the mask of receiving CPUs for this purpose. + */ + if (ipi == IPI_STOP_HARD) + atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); } -#ifdef STOP_NMI -/* - * send NMI IPI to selected CPUs - */ - -#define BEFORE_SPIN 1000000 - -void -ipi_nmi_selected(cpumask_t cpus) -{ - int cpu; - register_t icrlo; - - icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT - | APIC_TRIGMOD_EDGE; - - CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus); - - atomic_set_int(&ipi_nmi_pending, cpus); - - while ((cpu = ffs(cpus)) != 0) { - cpu--; - cpus &= ~(1 << cpu); - - KASSERT(cpu_apic_ids[cpu] != -1, - ("IPI NMI to non-existent CPU %d", cpu)); - - /* Wait for an earlier IPI to finish. */ - if (!lapic_ipi_wait(BEFORE_SPIN)) - panic("ipi_nmi_selected: previous IPI has not cleared"); - - lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]); - } -} - int -ipi_nmi_handler(void) +ipi_nmi_handler() { - int cpumask = PCPU_GET(cpumask); + cpumask_t cpumask; - if (!(ipi_nmi_pending & cpumask)) - return 1; + /* + * As long as there is not a simple way to know about a NMI's + * source, if the bitmask for the current CPU is present in + * the global pending bitword an IPI_STOP_HARD has been issued + * and should be handled. + */ + cpumask = PCPU_GET(cpumask); + if ((ipi_nmi_pending & cpumask) == 0) + return (1); atomic_clear_int(&ipi_nmi_pending, cpumask); cpustop_handler(); - return 0; + return (0); } -#endif /* STOP_NMI */ - /* * Handle an IPI_STOP by saving our current context and spinning until we * are resumed. diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index e967104..354d791 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -211,13 +211,11 @@ trap(struct trapframe *frame) type = frame->tf_trapno; #ifdef SMP -#ifdef STOP_NMI /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI) { if (ipi_nmi_handler() == 0) goto out; } -#endif /* STOP_NMI */ #endif /* SMP */ #ifdef KDB diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h index a03c083..a13766f 100644 --- a/sys/i386/include/apicvar.h +++ b/sys/i386/include/apicvar.h @@ -100,11 +100,6 @@ * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user * at a time) The second group uses a single interrupt and a bitmap to avoid * redundant IPI interrupts. - * - * Right now IPI_STOP used by kdb shares the interrupt priority class with - * the two IPI groups mentioned above. As such IPI_STOP may cause a deadlock. - * Eventually IPI_STOP should use NMI IPIs - this would eliminate this and - * other deadlocks caused by IPI_STOP. */ /* Interrupts for local APIC LVT entries other than the timer. */ @@ -134,6 +129,7 @@ #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ +#define IPI_STOP_HARD (APIC_IPI_INTS + 8) /* Stop CPU with a NMI. */ #else /* XEN */ /* These are the normal i386 APIC definitions */ @@ -161,6 +157,7 @@ #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ +#define IPI_STOP_HARD (APIC_IPI_INTS + 8) /* Stop CPU with a NMI. */ #endif /* XEN */ /* diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index 917c285..968cdb4 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -60,7 +60,8 @@ inthand_t void cpu_add(u_int apic_id, char boot_cpu); void cpustop_handler(void); void init_secondary(void); -void ipi_selected(u_int cpus, u_int ipi); +int ipi_nmi_handler(void); +void ipi_selected(cpumask_t cpus, u_int ipi); void ipi_all_but_self(u_int ipi); #ifndef XEN void ipi_bitmap_handler(struct trapframe frame); @@ -76,9 +77,6 @@ void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva, void smp_invltlb(void); void smp_masked_invltlb(cpumask_t mask); -#ifdef STOP_NMI -int ipi_nmi_handler(void); -#endif #ifdef XEN void ipi_to_irq_init(void); diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c index 3aa03ce..bae07d4 100644 --- a/sys/i386/xen/mp_machdep.c +++ b/sys/i386/xen/mp_machdep.c @@ -90,8 +90,6 @@ __FBSDID("$FreeBSD$"); #include #include -#define stop_cpus_with_nmi 0 - int mp_naps; /* # of Applications processors */ int boot_cpu_id = -1; /* designated BSP */ diff --git a/sys/ia64/ia64/interrupt.c b/sys/ia64/ia64/interrupt.c index 0c50b48..b70a807 100644 --- a/sys/ia64/ia64/interrupt.c +++ b/sys/ia64/ia64/interrupt.c @@ -145,6 +145,8 @@ interrupt(struct trapframe *tf) /* * Handle ExtINT interrupts by generating an INTA cycle to * read the vector. + * IPI_STOP_HARD is mapped to IPI_STOP so it is not necessary + * to add it to this switch-like construct. */ if (vector == 0) { inta = ib->ib_inta; diff --git a/sys/ia64/include/smp.h b/sys/ia64/include/smp.h index c6d98f7..4eddf74 100644 --- a/sys/ia64/include/smp.h +++ b/sys/ia64/include/smp.h @@ -21,6 +21,7 @@ #define IPI_AST 4 #define IPI_RENDEZVOUS 5 #define IPI_STOP 6 +#define IPI_STOP_HARD 6 #define IPI_PREEMPT 7 #define IPI_COUNT 8 diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index 80dda97..0f3a672 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -412,9 +412,6 @@ boot(int howto) */ EVENTHANDLER_INVOKE(shutdown_post_sync, howto); - /* XXX This doesn't disable interrupts any more. Reconsider? */ - splhigh(); - if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) doadump(); @@ -488,6 +485,13 @@ static void shutdown_reset(void *junk, int howto) { + /* + * Disable interrupts on CPU0 in order to avoid fast handlers + * to preempt the stopping process and to deadlock against other + * CPUs. + */ + spinlock_enter(); + printf("Rebooting...\n"); DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ /* cpu_boot(howto); */ /* doesn't do anything at the moment */ diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c index e6af53e..3e77db7 100644 --- a/sys/kern/subr_kdb.c +++ b/sys/kern/subr_kdb.c @@ -88,7 +88,8 @@ SYSCTL_PROC(_debug_kdb, OID_AUTO, trap_code, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, * Flag indicating whether or not to IPI the other CPUs to stop them on * entering the debugger. Sometimes, this will result in a deadlock as * stop_cpus() waits for the other cpus to stop, so we allow it to be - * disabled. + * disabled. In order to maximize the chances of success, use a hard + * stop for that. */ #ifdef SMP static int kdb_stop_cpus = 1; @@ -226,7 +227,7 @@ kdb_panic(const char *msg) { #ifdef SMP - stop_cpus(PCPU_GET(other_cpus)); + stop_cpus_hard(PCPU_GET(other_cpus)); #endif printf("KDB: panic\n"); panic(msg); @@ -518,7 +519,7 @@ kdb_trap(int type, int code, struct trapframe *tf) #ifdef SMP if ((did_stop_cpus = kdb_stop_cpus) != 0) - stop_cpus(PCPU_GET(other_cpus)); + stop_cpus_hard(PCPU_GET(other_cpus)); #endif kdb_active++; diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index d64e806..d28001f 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -233,18 +233,21 @@ forward_roundrobin(void) * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs * from executing at same time. */ -int -stop_cpus(cpumask_t map) +static int +generic_stop_cpus(cpumask_t map, u_int type) { int i; + KASSERT(type == IPI_STOP || type == IPI_STOP_HARD, + ("%s: invalid stop type", __func__)); + if (!smp_started) return 0; - CTR1(KTR_SMP, "stop_cpus(%x)", map); + CTR2(KTR_SMP, "stop_cpus(%x) with %u type", map, type); /* send the stop IPI to all CPUs in map */ - ipi_selected(map, IPI_STOP); + ipi_selected(map, type); i = 0; while ((stopped_cpus & map) != map) { @@ -262,6 +265,20 @@ stop_cpus(cpumask_t map) return 1; } +int +stop_cpus(cpumask_t map) +{ + + return (generic_stop_cpus(map, IPI_STOP)); +} + +int +stop_cpus_hard(cpumask_t map) +{ + + return (generic_stop_cpus(map, IPI_STOP_HARD)); +} + #if defined(__amd64__) /* * When called the executing CPU will send an IPI to all other CPUs diff --git a/sys/mips/include/smp.h b/sys/mips/include/smp.h index 798beed..d614dd3 100644 --- a/sys/mips/include/smp.h +++ b/sys/mips/include/smp.h @@ -24,6 +24,7 @@ #define IPI_RENDEZVOUS 0x0002 #define IPI_AST 0x0004 #define IPI_STOP 0x0008 +#define IPI_STOP_HARD 0x0008 #ifndef LOCORE diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c index d688a52..bf32392 100644 --- a/sys/mips/mips/mp_machdep.c +++ b/sys/mips/mips/mp_machdep.c @@ -129,7 +129,12 @@ smp_handle_ipi(struct trapframe *frame) break; case IPI_STOP: - CTR0(KTR_SMP, "IPI_STOP"); + + /* + * IPI_STOP_HARD is mapped to IPI_STOP so it is not + * necessary to add it in the switch. + */ + CTR0(KTR_SMP, "IPI_STOP or IPI_STOP_HARD"); atomic_set_int(&stopped_cpus, cpumask); while ((started_cpus & cpumask) == 0) diff --git a/sys/pc98/conf/NOTES b/sys/pc98/conf/NOTES index 02f8d07..9ab70b9 100644 --- a/sys/pc98/conf/NOTES +++ b/sys/pc98/conf/NOTES @@ -29,10 +29,6 @@ device apic # I/O apic # options MP_WATCHDOG -# Debugging options. -# -options STOP_NMI # Stop CPUS using NMI instead of IPI - ##################################################################### diff --git a/sys/powerpc/include/smp.h b/sys/powerpc/include/smp.h index 3929b8c..0e5ec16 100644 --- a/sys/powerpc/include/smp.h +++ b/sys/powerpc/include/smp.h @@ -35,6 +35,7 @@ #define IPI_PREEMPT 1 #define IPI_RENDEZVOUS 2 #define IPI_STOP 3 +#define IPI_STOP_HARD 3 #ifndef LOCORE diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c index 2c6d11b..1ae7d6d 100644 --- a/sys/powerpc/powerpc/mp_machdep.c +++ b/sys/powerpc/powerpc/mp_machdep.c @@ -281,7 +281,13 @@ powerpc_ipi_handler(void *arg) smp_rendezvous_action(); break; case IPI_STOP: - CTR1(KTR_SMP, "%s: IPI_STOP (stop)", __func__); + + /* + * IPI_STOP_HARD is mapped to IPI_STOP so it is not + * necessary to add such case in the switch. + */ + CTR1(KTR_SMP, "%s: IPI_STOP or IPI_STOP_HARD (stop)", + __func__); self = PCPU_GET(cpumask); savectx(PCPU_GET(curpcb)); atomic_set_int(&stopped_cpus, self); diff --git a/sys/sparc64/include/smp.h b/sys/sparc64/include/smp.h index 8eb5636..8735543 100644 --- a/sys/sparc64/include/smp.h +++ b/sys/sparc64/include/smp.h @@ -56,6 +56,7 @@ #define IPI_RENDEZVOUS PIL_RENDEZVOUS #define IPI_PREEMPT PIL_PREEMPT #define IPI_STOP PIL_STOP +#define IPI_STOP_HARD PIL_STOP #define IPI_RETRIES 5000 diff --git a/sys/sun4v/include/smp.h b/sys/sun4v/include/smp.h index 4f5adc5..63a8e01 100644 --- a/sys/sun4v/include/smp.h +++ b/sys/sun4v/include/smp.h @@ -44,6 +44,7 @@ #define IPI_AST PIL_AST #define IPI_RENDEZVOUS PIL_RENDEZVOUS #define IPI_STOP PIL_STOP +#define IPI_STOP_HARD PIL_STOP #define IPI_PREEMPT PIL_PREEMPT diff --git a/sys/sys/smp.h b/sys/sys/smp.h index 05c8642..d80b9e4 100644 --- a/sys/sys/smp.h +++ b/sys/sys/smp.h @@ -123,6 +123,7 @@ void forward_signal(struct thread *); void forward_roundrobin(void); int restart_cpus(cpumask_t); int stop_cpus(cpumask_t); +int stop_cpus_hard(cpumask_t); #if defined(__amd64__) int suspend_cpus(cpumask_t); #endif -- cgit v1.1