summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2015-02-06 18:19:59 +0000
committerjhb <jhb@FreeBSD.org>2015-02-06 18:19:59 +0000
commit9dfc36d98560234b3c913fd291aa845cc98eb563 (patch)
treedde8f43f0382ce3e4d9bd95d56103d2a144f5960 /sys
parent6064614ca50b8b6311bd60d1f85c0dcba8ef8954 (diff)
downloadFreeBSD-src-9dfc36d98560234b3c913fd291aa845cc98eb563.zip
FreeBSD-src-9dfc36d98560234b3c913fd291aa845cc98eb563.tar.gz
Revert the IPI startup sequence to match what is described in the
Intel Multiprocessor Specification v1.4. The Intel SDM claims that the INIT IPIs here are invalid, but other systems follow the MP spec instead. While here, fix the IPI wait routine to accept a timeout in microseconds instead of a raw spin count, and don't spin forever during AP startup. Instead, panic if a STARTUP IPI is not delivered after 20 us. PR: 196542 Differential Revision: https://reviews.freebsd.org/D1719 MFC after: 2 weeks
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/mp_machdep.c30
-rw-r--r--sys/i386/i386/mp_machdep.c30
-rw-r--r--sys/x86/x86/local_apic.c28
3 files changed, 62 insertions, 26 deletions
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 3184f13..91b5c9f 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -1065,14 +1065,27 @@ ipi_startup(int apic_id, int vector)
{
/*
+ * This attempts to follow the algorithm described in the
+ * Intel Multiprocessor Specification v1.4 in section B.4.
+ * For each IPI, we allow the local APIC ~20us to deliver the
+ * IPI. If that times out, we panic.
+ */
+
+ /*
* first we do an INIT IPI: this INIT IPI might be run, resetting
* and running the target CPU. OR this INIT IPI might be latched (P5
* bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
* ignored.
*/
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
- lapic_ipi_wait(-1);
+ lapic_ipi_wait(20);
+
+ /* Explicitly deassert the INIT IPI. */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
+ apic_id);
+
DELAY(10000); /* wait ~10mS */
/*
@@ -1084,9 +1097,11 @@ ipi_startup(int apic_id, int vector)
* will run.
*/
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
vector, apic_id);
- lapic_ipi_wait(-1);
+ if (!lapic_ipi_wait(20))
+ panic("Failed to deliver first STARTUP IPI to APIC %d",
+ apic_id);
DELAY(200); /* wait ~200uS */
/*
@@ -1096,9 +1111,12 @@ ipi_startup(int apic_id, int vector)
* recognized after hardware RESET or INIT IPI.
*/
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
vector, apic_id);
- lapic_ipi_wait(-1);
+ if (!lapic_ipi_wait(20))
+ panic("Failed to deliver second STARTUP IPI to APIC %d",
+ apic_id);
+
DELAY(200); /* wait ~200uS */
}
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 288be90..72d3fe9 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -1138,14 +1138,27 @@ ipi_startup(int apic_id, int vector)
{
/*
+ * This attempts to follow the algorithm described in the
+ * Intel Multiprocessor Specification v1.4 in section B.4.
+ * For each IPI, we allow the local APIC ~20us to deliver the
+ * IPI. If that times out, we panic.
+ */
+
+ /*
* first we do an INIT IPI: this INIT IPI might be run, resetting
* and running the target CPU. OR this INIT IPI might be latched (P5
* bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
* ignored.
*/
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
- lapic_ipi_wait(-1);
+ lapic_ipi_wait(20);
+
+ /* Explicitly deassert the INIT IPI. */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
+ apic_id);
+
DELAY(10000); /* wait ~10mS */
/*
@@ -1157,9 +1170,11 @@ ipi_startup(int apic_id, int vector)
* will run.
*/
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
vector, apic_id);
- lapic_ipi_wait(-1);
+ if (!lapic_ipi_wait(20))
+ panic("Failed to deliver first STARTUP IPI to APIC %d",
+ apic_id);
DELAY(200); /* wait ~200uS */
/*
@@ -1169,9 +1184,12 @@ ipi_startup(int apic_id, int vector)
* recognized after hardware RESET or INIT IPI.
*/
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
vector, apic_id);
- lapic_ipi_wait(-1);
+ if (!lapic_ipi_wait(20))
+ panic("Failed to deliver second STARTUP IPI to APIC %d",
+ apic_id);
+
DELAY(200); /* wait ~200uS */
}
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index e3228ce..080822b 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -1452,22 +1452,22 @@ SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL);
static int
native_lapic_ipi_wait(int delay)
{
- int x, incr;
+ int x;
/*
- * Wait delay loops for IPI to be sent. This is highly bogus
- * since this is sensitive to CPU clock speed. If delay is
+ * Wait delay microseconds for IPI to be sent. If delay is
* -1, we wait forever.
*/
if (delay == -1) {
- incr = 0;
- delay = 1;
- } else
- incr = 1;
- for (x = 0; x < delay; x += incr) {
+ while ((lapic->icr_lo & APIC_DELSTAT_MASK) != APIC_DELSTAT_IDLE)
+ ia32_pause();
+ return (1);
+ }
+
+ for (x = 0; x < delay; x += 5) {
if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
return (1);
- ia32_pause();
+ DELAY(5);
}
return (0);
}
@@ -1501,9 +1501,9 @@ native_lapic_ipi_raw(register_t icrlo, u_int dest)
intr_restore(saveintr);
}
-#define BEFORE_SPIN 1000000
+#define BEFORE_SPIN 50000
#ifdef DETECT_DEADLOCK
-#define AFTER_SPIN 1000
+#define AFTER_SPIN 50
#endif
static void
@@ -1514,7 +1514,7 @@ native_lapic_ipi_vectored(u_int vector, int dest)
KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
("%s: invalid vector %d", __func__, vector));
- icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE;
+ icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT;
/*
* IPI_STOP_HARD is just a "fake" vector used to send a NMI.
@@ -1522,9 +1522,9 @@ native_lapic_ipi_vectored(u_int vector, int dest)
* the vector.
*/
if (vector == IPI_STOP_HARD)
- icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT;
+ icrlo |= APIC_DELMODE_NMI;
else
- icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT;
+ icrlo |= vector | APIC_DELMODE_FIXED;
destfield = 0;
switch (dest) {
case APIC_IPI_DEST_SELF:
OpenPOWER on IntegriCloud