summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
authorpeter <peter@FreeBSD.org>1997-08-26 18:10:38 +0000
committerpeter <peter@FreeBSD.org>1997-08-26 18:10:38 +0000
commit7dfe3e5abe7df9a2db7c225b0ce9e73fc5955654 (patch)
tree02a81dce529d428cea86379dd50d22676d1b7fbc /sys/amd64
parent4ef08431e3258c177c4a081bb0bba2eeef3a3ac3 (diff)
downloadFreeBSD-src-7dfe3e5abe7df9a2db7c225b0ce9e73fc5955654.zip
FreeBSD-src-7dfe3e5abe7df9a2db7c225b0ce9e73fc5955654.tar.gz
Clean up the SMP AP bootstrap and eliminate the wretched idle procs.
- We now have enough per-cpu idle context, the real idle loop has been revived (cpu's halt now with nothing to do). - Some preliminary support for running some operations outside the global lock (eg: zeroing "free but not yet zeroed pages") is present but appears to cause problems. Off by default. - the smp_active sysctl now behaves differently. It's merely a 'true/false' option. Setting smp_active to zero causes the AP's to halt in the idle loop and stop scheduling processes. - bootstrap is a lot safer. Instead of sharing a statically compiled in stack a number of times (which has caused lots of problems) and then abandoning it, we use the idle context to boot the AP's directly. This should help >2 cpu support since the bootlock stuff was in doubt. - print physical apic id in traps.. helps identify private pages getting out of sync. (You don't want to know how much hair I tore out with this!) More cleanup to follow, this is more of a checkpoint than a 'finished' thing.
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/cpu_switch.S113
-rw-r--r--sys/amd64/amd64/locore.S36
-rw-r--r--sys/amd64/amd64/locore.s36
-rw-r--r--sys/amd64/amd64/machdep.c14
-rw-r--r--sys/amd64/amd64/mp_machdep.c174
-rw-r--r--sys/amd64/amd64/mpboot.S51
-rw-r--r--sys/amd64/amd64/mptable.c174
-rw-r--r--sys/amd64/amd64/pmap.c65
-rw-r--r--sys/amd64/amd64/swtch.s113
-rw-r--r--sys/amd64/amd64/trap.c7
-rw-r--r--sys/amd64/amd64/vm_machdep.c14
-rw-r--r--sys/amd64/include/mptable.h174
-rw-r--r--sys/amd64/include/smp.h3
13 files changed, 761 insertions, 213 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index 61b1dfa..f749f03 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -33,7 +33,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: swtch.s,v 1.58 1997/08/04 17:31:43 fsmp Exp $
+ * $Id: swtch.s,v 1.59 1997/08/09 00:02:47 dyson Exp $
*/
#include "npx.h"
@@ -240,17 +240,94 @@ rem3id: .asciz "remrq.id"
/*
* When no processes are on the runq, cpu_switch() branches to _idle
* to wait for something to come ready.
- *
- * NOTE: on an SMP system this routine is a startup-only code path.
- * once initialization is over, meaning the idle procs have been
- * created, we should NEVER branch here.
*/
ALIGN_TEXT
_idle:
-#if defined(SMP) && defined(DIAGNOSTIC)
- cmpl $0, _smp_active
- jnz badsw3
-#endif /* SMP && DIAGNOSTIC */
+#ifdef SMP
+ /* when called, we have the mplock, intr disabled */
+
+ xorl %ebp,%ebp
+
+ /* use our idleproc's "context" */
+ movl _my_idlePTD,%ecx
+ movl %ecx,%cr3
+ movl $_idlestack_top,%ecx
+ movl %ecx,%esp
+
+ /* update common_tss.tss_esp0 pointer */
+ movl $_common_tss, %eax
+ movl %ecx, TSS_ESP0(%eax)
+
+ sti
+
+ /*
+ * XXX callers of cpu_switch() do a bogus splclock(). Locking should
+ * be left to cpu_switch().
+ */
+ call _spl0
+
+ cli
+
+ /*
+ * _REALLY_ free the lock, no matter how deep the prior nesting.
+ * We will recover the nesting on the way out when we have a new
+ * proc to load.
+ *
+ * XXX: we had damn well better be sure we had it before doing this!
+ */
+ movl $FREE_LOCK, %eax
+ movl %eax, _mp_lock
+
+ /* do NOT have lock, intrs disabled */
+ .globl idle_loop
+idle_loop:
+
+ movl %cr3,%eax /* ouch! */
+ movl %eax,%cr3
+
+ cmpl $0,_smp_active
+ jne 1f
+ cmpl $0,_cpuid
+ je 1f
+ jmp 2f
+
+1: cmpl $0,_whichrtqs /* real-time queue */
+ jne 3f
+ cmpl $0,_whichqs /* normal queue */
+ jne 3f
+ cmpl $0,_whichidqs /* 'idle' queue */
+ jne 3f
+
+ cmpl $0,_do_page_zero_idle
+ je 2f
+ /* XXX appears to cause panics */
+ /*
+ * Inside zero_idle we enable interrupts and grab the mplock
+ * as needed. It needs to be careful about entry/exit mutexes.
+ */
+ call _vm_page_zero_idle /* internal locking */
+ testl %eax, %eax
+ jnz idle_loop
+2:
+
+ /* enable intrs for a halt */
+ sti
+ call *_hlt_vector /* wait for interrupt */
+ cli
+ jmp idle_loop
+
+3:
+ call _get_mplock
+ cmpl $0,_whichrtqs /* real-time queue */
+ CROSSJUMP(jne, sw1a, je)
+ cmpl $0,_whichqs /* normal queue */
+ CROSSJUMP(jne, nortqr, je)
+ cmpl $0,_whichidqs /* 'idle' queue */
+ CROSSJUMP(jne, idqr, je)
+ call _rel_mplock
+ jmp idle_loop
+
+#else
xorl %ebp,%ebp
movl $HIDENAME(tmpstk),%esp
movl _IdlePTD,%ecx
@@ -302,6 +379,7 @@ idle_loop:
sti
call *_hlt_vector /* wait for interrupt */
jmp idle_loop
+#endif
CROSSJUMPTARGET(_idle)
@@ -367,6 +445,17 @@ ENTRY(cpu_switch)
/* save is done, now choose a new process or idle */
sw1:
cli
+
+#ifdef SMP
+ /* Stop scheduling if smp_active goes zero and we are not BSP */
+ cmpl $0,_smp_active
+ jne 1f
+ cmpl $0,_cpuid
+ je 1f
+ CROSSJUMP(je, _idle, jne) /* wind down */
+1:
+#endif
+
sw1a:
movl _whichrtqs,%edi /* pick next p. from rtqs */
testl %edi,%edi
@@ -594,12 +683,6 @@ sw0_2: .asciz "cpu_switch: not SRUN"
#endif
#if defined(SMP) && defined(DIAGNOSTIC)
-badsw3:
- pushl $sw0_3
- call _panic
-
-sw0_3: .asciz "cpu_switch: went idle with smp_active"
-
badsw4:
pushl $sw0_4
call _panic
diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S
index 23007033..a3df36f 100644
--- a/sys/amd64/amd64/locore.S
+++ b/sys/amd64/amd64/locore.S
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* from: @(#)locore.s 7.3 (Berkeley) 5/13/91
- * $Id: locore.s,v 1.3 1997/08/23 04:10:36 smp Exp smp $
+ * $Id: locore.s,v 1.95 1997/08/24 00:05:33 fsmp Exp $
*
* originally from: locore.s, by William F. Jolitz
*
@@ -90,24 +90,36 @@
.set _SMP_prvstart,(MPPTDI << PDRSHIFT)
.globl _SMP_prvpage,_SMP_prvpt,_lapic,_SMP_ioapic
+ .globl _prv_CPAGE1,_prv_CPAGE2,_prv_CPAGE3
+ .globl _idlestack,_idlestack_top
.set _SMP_prvpage,_SMP_prvstart
.set _SMP_prvpt,_SMP_prvstart + PAGE_SIZE
.set _lapic,_SMP_prvstart + (2 * PAGE_SIZE)
+ .set _idlestack,_SMP_prvstart + (3 * PAGE_SIZE)
+ .set _idlestack_top,_SMP_prvstart + (4 * PAGE_SIZE)
+ .set _prv_CPAGE1,_SMP_prvstart + (4 * PAGE_SIZE)
+ .set _prv_CPAGE2,_SMP_prvstart + (5 * PAGE_SIZE)
+ .set _prv_CPAGE3,_SMP_prvstart + (6 * PAGE_SIZE)
.set _SMP_ioapic,_SMP_prvstart + (16 * PAGE_SIZE)
.globl _cpuid,_curproc,_curpcb,_npxproc,_runtime,_cpu_lockid
- .globl _common_tss,_other_cpus,_ss_tpr
- .set _cpuid,_SMP_prvpage+0
- .set _curproc,_SMP_prvpage+4
- .set _curpcb,_SMP_prvpage+8
- .set _npxproc,_SMP_prvpage+12
- .set _runtime,_SMP_prvpage+16 /* 8 bytes struct timeval */
- .set _cpu_lockid,_SMP_prvpage+24
- .set _common_tss,_SMP_prvpage+28 /* 104 bytes long, next = 132 */
- .set _other_cpus,_SMP_prvpage+132 /* bitmap of available CPUs,
+ .globl _common_tss,_other_cpus,_my_idlePTD,_ss_tpr
+ .globl _prv_CMAP1,_prv_CMAP2,_prv_CMAP3
+ .set _cpuid,_SMP_prvpage+0 /* [0] */
+ .set _curproc,_SMP_prvpage+4 /* [1] */
+ .set _curpcb,_SMP_prvpage+8 /* [2] */
+ .set _npxproc,_SMP_prvpage+12 /* [3] */
+ .set _runtime,_SMP_prvpage+16 /* [4,5] */
+ .set _cpu_lockid,_SMP_prvpage+24 /* [6] */
+ .set _other_cpus,_SMP_prvpage+28 /* [7] bitmap of available CPUs,
excluding ourself */
- .set _ss_tpr,_SMP_prvpage+136
-
+ .set _my_idlePTD,_SMP_prvpage+32 /* [8] */
+ .set _ss_tpr,_SMP_prvpage+36 /* [9] */
+ .set _prv_CMAP1,_SMP_prvpage+40 /* [10] */
+ .set _prv_CMAP2,_SMP_prvpage+44 /* [11] */
+ .set _prv_CMAP3,_SMP_prvpage+48 /* [12] */
+ .set _common_tss,_SMP_prvpage+52 /* 102 (ie: 104) bytes long */
+
/* Fetch the .set's for the local apic */
#include "i386/i386/mp_apicdefs.s"
diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s
index 23007033..a3df36f 100644
--- a/sys/amd64/amd64/locore.s
+++ b/sys/amd64/amd64/locore.s
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* from: @(#)locore.s 7.3 (Berkeley) 5/13/91
- * $Id: locore.s,v 1.3 1997/08/23 04:10:36 smp Exp smp $
+ * $Id: locore.s,v 1.95 1997/08/24 00:05:33 fsmp Exp $
*
* originally from: locore.s, by William F. Jolitz
*
@@ -90,24 +90,36 @@
.set _SMP_prvstart,(MPPTDI << PDRSHIFT)
.globl _SMP_prvpage,_SMP_prvpt,_lapic,_SMP_ioapic
+ .globl _prv_CPAGE1,_prv_CPAGE2,_prv_CPAGE3
+ .globl _idlestack,_idlestack_top
.set _SMP_prvpage,_SMP_prvstart
.set _SMP_prvpt,_SMP_prvstart + PAGE_SIZE
.set _lapic,_SMP_prvstart + (2 * PAGE_SIZE)
+ .set _idlestack,_SMP_prvstart + (3 * PAGE_SIZE)
+ .set _idlestack_top,_SMP_prvstart + (4 * PAGE_SIZE)
+ .set _prv_CPAGE1,_SMP_prvstart + (4 * PAGE_SIZE)
+ .set _prv_CPAGE2,_SMP_prvstart + (5 * PAGE_SIZE)
+ .set _prv_CPAGE3,_SMP_prvstart + (6 * PAGE_SIZE)
.set _SMP_ioapic,_SMP_prvstart + (16 * PAGE_SIZE)
.globl _cpuid,_curproc,_curpcb,_npxproc,_runtime,_cpu_lockid
- .globl _common_tss,_other_cpus,_ss_tpr
- .set _cpuid,_SMP_prvpage+0
- .set _curproc,_SMP_prvpage+4
- .set _curpcb,_SMP_prvpage+8
- .set _npxproc,_SMP_prvpage+12
- .set _runtime,_SMP_prvpage+16 /* 8 bytes struct timeval */
- .set _cpu_lockid,_SMP_prvpage+24
- .set _common_tss,_SMP_prvpage+28 /* 104 bytes long, next = 132 */
- .set _other_cpus,_SMP_prvpage+132 /* bitmap of available CPUs,
+ .globl _common_tss,_other_cpus,_my_idlePTD,_ss_tpr
+ .globl _prv_CMAP1,_prv_CMAP2,_prv_CMAP3
+ .set _cpuid,_SMP_prvpage+0 /* [0] */
+ .set _curproc,_SMP_prvpage+4 /* [1] */
+ .set _curpcb,_SMP_prvpage+8 /* [2] */
+ .set _npxproc,_SMP_prvpage+12 /* [3] */
+ .set _runtime,_SMP_prvpage+16 /* [4,5] */
+ .set _cpu_lockid,_SMP_prvpage+24 /* [6] */
+ .set _other_cpus,_SMP_prvpage+28 /* [7] bitmap of available CPUs,
excluding ourself */
- .set _ss_tpr,_SMP_prvpage+136
-
+ .set _my_idlePTD,_SMP_prvpage+32 /* [8] */
+ .set _ss_tpr,_SMP_prvpage+36 /* [9] */
+ .set _prv_CMAP1,_SMP_prvpage+40 /* [10] */
+ .set _prv_CMAP2,_SMP_prvpage+44 /* [11] */
+ .set _prv_CMAP3,_SMP_prvpage+48 /* [12] */
+ .set _common_tss,_SMP_prvpage+52 /* 102 (ie: 104) bytes long */
+
/* Fetch the .set's for the local apic */
#include "i386/i386/mp_apicdefs.s"
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 7376267..a11153a 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -35,7 +35,7 @@
* SUCH DAMAGE.
*
* from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
- * $Id: machdep.c,v 1.256 1997/08/18 06:58:09 charnier Exp $
+ * $Id: machdep.c,v 1.257 1997/08/21 06:32:38 charnier Exp $
*/
#include "apm.h"
@@ -392,6 +392,12 @@ again:
printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
ptoa(cnt.v_free_count) / 1024);
+ /*
+ * Set up buffers, so they can be used to read disk labels.
+ */
+ bufinit();
+ vm_pager_bufferinit();
+
#ifdef SMP
/*
* OK, enough kmem_alloc/malloc state should be up, lets get on with it!
@@ -399,12 +405,6 @@ again:
mp_start(); /* fire up the APs and APICs */
mp_announce();
#endif /* SMP */
-
- /*
- * Set up buffers, so they can be used to read disk labels.
- */
- bufinit();
- vm_pager_bufferinit();
}
int
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index f647070..a4a6423 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -22,17 +22,20 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: mp_machdep.c,v 1.44 1997/08/24 20:33:32 fsmp Exp $
+ * $Id: mp_machdep.c,v 1.45 1997/08/25 21:28:08 bde Exp $
*/
#include "opt_smp.h"
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
-#include <vm/vm.h> /* for KERNBASE */
-#include <vm/vm_param.h> /* for KERNBASE */
-#include <vm/pmap.h> /* for KERNBASE */
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
@@ -230,15 +233,24 @@ int apic_id_to_logical[NAPICID];
/* Bitmap of all available CPUs */
u_int all_cpus;
-/* Boot of AP uses this PTD */
-u_int *bootPTD;
+/* AP uses this PTD during bootstrap */
+pd_entry_t *bootPTD;
/* Hotwire a 0->4MB V==P mapping */
-extern pt_entry_t KPTphys;
+extern pt_entry_t *KPTphys;
/* Virtual address of per-cpu common_tss */
extern struct i386tss common_tss;
+/* IdlePTD per cpu */
+pd_entry_t *IdlePTDS[NCPU];
+
+/* "my" private page table page, for BSP init */
+extern pt_entry_t SMP_prvpt[];
+
+/* Private page pointer to curcpu's PTD, used during BSP init */
+extern pd_entry_t *my_idlePTD;
+
/*
* Local data and functions.
*/
@@ -1473,9 +1485,9 @@ start_all_aps(u_int boot_addr)
int x, i;
u_char mpbiosreason;
u_long mpbioswarmvec;
- pd_entry_t newptd;
- pt_entry_t newpt;
- int *newpp;
+ pd_entry_t *newptd;
+ pt_entry_t *newpt;
+ int *newpp, *stack;
POSTCODE(START_ALL_APS_POST);
@@ -1498,57 +1510,71 @@ start_all_aps(u_int boot_addr)
/* start each AP */
for (x = 1; x <= mp_naps; ++x) {
- /* HACK HACK HACK !!! */
+ /* This is a bit verbose, it will go away soon. */
/* alloc new page table directory */
- newptd = (pd_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE));
+ newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
+
+ /* Store the virtual PTD address for this CPU */
+ IdlePTDS[x] = newptd;
/* clone currently active one (ie: IdlePTD) */
bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */
/* set up 0 -> 4MB P==V mapping for AP boot */
- newptd[0] = PG_V | PG_RW | ((u_long)KPTphys & PG_FRAME);
+ newptd[0] = (pd_entry_t) (PG_V | PG_RW |
+ ((u_long)KPTphys & PG_FRAME));
- /* store PTD for this AP */
- bootPTD = (pd_entry_t)vtophys(newptd);
+ /* store PTD for this AP's boot sequence */
+ bootPTD = (pd_entry_t *)vtophys(newptd);
/* alloc new page table page */
- newpt = (pt_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE));
+ newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
/* set the new PTD's private page to point there */
- newptd[MPPTDI] = PG_V | PG_RW | vtophys(newpt);
+ newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt));
/* install self referential entry */
- newptd[PTDPTDI] = PG_V | PG_RW | vtophys(newptd);
+ newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd));
- /* get a new private data page */
+ /* allocate a new private data page */
newpp = (int *)kmem_alloc(kernel_map, PAGE_SIZE);
/* wire it into the private page table page */
- newpt[0] = PG_V | PG_RW | vtophys(newpp);
+ newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpp));
/* wire the ptp into itself for access */
- newpt[1] = PG_V | PG_RW | vtophys(newpt);
+ newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt));
- /* and the local apic */
+ /* copy in the pointer to the local apic */
newpt[2] = SMP_prvpt[2];
/* and the IO apic mapping[s] */
for (i = 16; i < 32; i++)
newpt[i] = SMP_prvpt[i];
- /* prime data page for it to use */
- newpp[0] = x; /* cpuid */
- newpp[1] = 0; /* curproc */
- newpp[2] = 0; /* curpcb */
- newpp[3] = 0; /* npxproc */
- newpp[4] = 0; /* runtime.tv_sec */
- newpp[5] = 0; /* runtime.tv_usec */
- newpp[6] = x << 24; /* cpu_lockid */
+ /* allocate and set up an idle stack data page */
+ stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE);
+ newpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack));
- /* XXX NOTE: ABANDON bootPTD for now!!!! */
+ newpt[4] = 0; /* *prv_CMAP1 */
+ newpt[5] = 0; /* *prv_CMAP2 */
+ newpt[6] = 0; /* *prv_CMAP3 */
- /* END REVOLTING HACKERY */
+ /* prime data page for it to use */
+ newpp[0] = x; /* cpuid */
+ newpp[1] = 0; /* curproc */
+ newpp[2] = 0; /* curpcb */
+ newpp[3] = 0; /* npxproc */
+ newpp[4] = 0; /* runtime.tv_sec */
+ newpp[5] = 0; /* runtime.tv_usec */
+ newpp[6] = x << 24; /* cpu_lockid */
+ newpp[7] = 0; /* other_cpus */
+ newpp[8] = (int)bootPTD; /* my_idlePTD */
+ newpp[9] = 0; /* ss_tpr */
+ newpp[10] = (int)&newpt[4]; /* prv_CMAP1 */
+ newpp[11] = (int)&newpt[5]; /* prv_CMAP2 */
+ newpp[12] = (int)&newpt[6]; /* prv_CMAP3 */
/* setup a vector to our boot code */
*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
@@ -1585,6 +1611,26 @@ start_all_aps(u_int boot_addr)
outb(CMOS_REG, BIOS_RESET);
outb(CMOS_DATA, mpbiosreason);
+ /*
+ * Set up the idle context for the BSP. Similar to above except
+ * that some was done by locore, some by pmap.c and some is implicit
+ * because the BSP is cpu#0 and the page is initially zero, and also
+ * because we can refer to variables by name on the BSP..
+ */
+ newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
+
+ bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */
+ IdlePTDS[0] = newptd;
+
+ /* Point PTD[] to this page instead of IdlePTD's physical page */
+ newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd));
+
+ my_idlePTD = (pd_entry_t *)vtophys(newptd);
+
+ /* Allocate and setup BSP idle stack */
+ stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE);
+ SMP_prvpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack));
+
pmap_set_opt_bsp();
/* number of APs actually started */
@@ -1800,7 +1846,7 @@ invltlb(void)
* from executing at same time.
*/
int
-stop_cpus( u_int map )
+stop_cpus(u_int map)
{
if (!smp_active)
return 0;
@@ -1832,7 +1878,7 @@ stop_cpus( u_int map )
* 1: ok
*/
int
-restart_cpus( u_int map )
+restart_cpus(u_int map)
{
if (!smp_active)
return 0;
@@ -1844,3 +1890,63 @@ restart_cpus( u_int map )
return 1;
}
+
+int smp_active = 0; /* are the APs allowed to run? */
+SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
+
+/* XXX maybe should be hw.ncpu */
+int smp_cpus = 1; /* how many cpu's running */
+SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
+
+int invltlb_ok = 0; /* throttle smp_invltlb() till safe */
+SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
+
+int do_page_zero_idle = 0; /* bzero pages for fun and profit in idleloop */
+SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW,
+ &do_page_zero_idle, 0, "");
+
+
+/*
+ * This is called once the rest of the system is up and running and we're
+ * ready to let the AP's out of the pen.
+ */
+void ap_init(void);
+
+void
+ap_init()
+{
+ u_int temp;
+ u_int apic_id;
+
+ smp_cpus++;
+
+ /* Build our map of 'other' CPUs. */
+ other_cpus = all_cpus & ~(1 << cpuid);
+
+ printf("SMP: AP CPU #%d Launched!\n", cpuid);
+
+ /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */
+ load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS);
+
+ /* A quick check from sanity claus */
+ apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
+ if (cpuid != apic_id) {
+ printf("SMP: cpuid = %d\n", cpuid);
+ printf("SMP: apic_id = %d\n", apic_id);
+ printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]);
+ panic("cpuid mismatch! boom!!");
+ }
+
+ /* Init local apic for irq's */
+ apic_initialize();
+
+ /*
+ * Activate smp_invltlb, although strictly speaking, this isn't
+ * quite correct yet. We should have a bitfield for cpus willing
+ * to accept TLB flush IPI's or something and sync them.
+ */
+ invltlb_ok = 1;
+ smp_active = 1; /* historic */
+
+ curproc = NULL; /* make sure */
+}
diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S
index 8a6af0a..9d51b1f 100644
--- a/sys/amd64/amd64/mpboot.S
+++ b/sys/amd64/amd64/mpboot.S
@@ -31,10 +31,9 @@
* mpboot.s: FreeBSD machine support for the Intel MP Spec
* multiprocessor systems.
*
- * $Id: mpboot.s,v 1.2 1997/06/22 16:03:22 peter Exp $
+ * $Id: mpboot.s,v 1.3 1997/08/25 10:57:36 peter Exp $
*/
-
#include <machine/asmacros.h> /* miscellaneous asm macros */
#include <machine/apic.h>
#include <machine/specialreg.h>
@@ -74,15 +73,13 @@
NON_GPROF_ENTRY(MPentry)
CHECKPOINT(0x36, 3)
- movl $mp_stk-KERNBASE,%esp /* mp boot stack end loc. */
/* Now enable paging mode */
movl _bootPTD-KERNBASE, %eax
movl %eax,%cr3
- movl %cr0,%eax
+ movl %cr0,%eax
orl $CR0_PE|CR0_PG,%eax /* enable paging */
- movl $0x80000011,%eax
movl %eax,%cr0 /* let the games begin! */
- movl $mp_stk,%esp /* mp boot stack end loc. */
+ movl $_idlestack_top,%esp /* boot stack end loc. */
pushl $mp_begin /* jump to high mem */
ret
@@ -105,30 +102,16 @@ mp_begin: /* now running relocated at KERNBASE */
movl %eax, _cpu_apic_versions /* into [ 0 ] */
incl _mp_ncpus /* signal BSP */
- /* One at a time, we are running on the shared mp_stk */
- /* This is the Intel reccomended semaphore method */
-#define BL_SET 0xff
-#define BL_CLR 0x00
- movb $BL_SET, %al
-1:
- xchgb %al, bootlock /* xchg is implicitly locked */
- cmpb $BL_SET, %al /* was is set? */
- jz 1b /* yes, keep trying... */
CHECKPOINT(0x39, 6)
- /* Now, let's do some REAL WORK :-) */
- call _secondary_main
-/* NOT REACHED */
-2: hlt
- jmp 2b
+ /* wait till we can get into the kernel */
+ call _boot_get_mplock
-/*
- * Let a CPU past the semaphore so it can use mp_stk
- */
-ENTRY(boot_unlock)
- movb $BL_CLR, %al
- xchgb %al, bootlock /* xchg is implicitly locked */
- ret
+ /* Now, let's prepare for some REAL WORK :-) */
+ call _ap_init
+
+ /* let her rip! (loads new stack) */
+ jmp _cpu_switch
/*
* This is the embedded trampoline or bootstrap that is
@@ -300,17 +283,3 @@ BOOTMP2:
.globl _bootMP_size
_bootMP_size:
.long BOOTMP2 - BOOTMP1
-
- /*
- * Temporary stack used while booting AP's
- * It is protected by:
- * 1: only one cpu is started at a time and it ends up waiting
- * for smp_active before continuing.
- * 2: Once smp_active != 0; further access is limited by _bootlock.
- */
- .globl mp_stk
- .space 0x2000 /* space for mp_stk - 2nd temporary stack */
-mp_stk:
-
- .globl bootlock
-bootlock: .byte BL_SET
diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c
index f647070..a4a6423 100644
--- a/sys/amd64/amd64/mptable.c
+++ b/sys/amd64/amd64/mptable.c
@@ -22,17 +22,20 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: mp_machdep.c,v 1.44 1997/08/24 20:33:32 fsmp Exp $
+ * $Id: mp_machdep.c,v 1.45 1997/08/25 21:28:08 bde Exp $
*/
#include "opt_smp.h"
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
-#include <vm/vm.h> /* for KERNBASE */
-#include <vm/vm_param.h> /* for KERNBASE */
-#include <vm/pmap.h> /* for KERNBASE */
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
@@ -230,15 +233,24 @@ int apic_id_to_logical[NAPICID];
/* Bitmap of all available CPUs */
u_int all_cpus;
-/* Boot of AP uses this PTD */
-u_int *bootPTD;
+/* AP uses this PTD during bootstrap */
+pd_entry_t *bootPTD;
/* Hotwire a 0->4MB V==P mapping */
-extern pt_entry_t KPTphys;
+extern pt_entry_t *KPTphys;
/* Virtual address of per-cpu common_tss */
extern struct i386tss common_tss;
+/* IdlePTD per cpu */
+pd_entry_t *IdlePTDS[NCPU];
+
+/* "my" private page table page, for BSP init */
+extern pt_entry_t SMP_prvpt[];
+
+/* Private page pointer to curcpu's PTD, used during BSP init */
+extern pd_entry_t *my_idlePTD;
+
/*
* Local data and functions.
*/
@@ -1473,9 +1485,9 @@ start_all_aps(u_int boot_addr)
int x, i;
u_char mpbiosreason;
u_long mpbioswarmvec;
- pd_entry_t newptd;
- pt_entry_t newpt;
- int *newpp;
+ pd_entry_t *newptd;
+ pt_entry_t *newpt;
+ int *newpp, *stack;
POSTCODE(START_ALL_APS_POST);
@@ -1498,57 +1510,71 @@ start_all_aps(u_int boot_addr)
/* start each AP */
for (x = 1; x <= mp_naps; ++x) {
- /* HACK HACK HACK !!! */
+ /* This is a bit verbose, it will go away soon. */
/* alloc new page table directory */
- newptd = (pd_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE));
+ newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
+
+ /* Store the virtual PTD address for this CPU */
+ IdlePTDS[x] = newptd;
/* clone currently active one (ie: IdlePTD) */
bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */
/* set up 0 -> 4MB P==V mapping for AP boot */
- newptd[0] = PG_V | PG_RW | ((u_long)KPTphys & PG_FRAME);
+ newptd[0] = (pd_entry_t) (PG_V | PG_RW |
+ ((u_long)KPTphys & PG_FRAME));
- /* store PTD for this AP */
- bootPTD = (pd_entry_t)vtophys(newptd);
+ /* store PTD for this AP's boot sequence */
+ bootPTD = (pd_entry_t *)vtophys(newptd);
/* alloc new page table page */
- newpt = (pt_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE));
+ newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
/* set the new PTD's private page to point there */
- newptd[MPPTDI] = PG_V | PG_RW | vtophys(newpt);
+ newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt));
/* install self referential entry */
- newptd[PTDPTDI] = PG_V | PG_RW | vtophys(newptd);
+ newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd));
- /* get a new private data page */
+ /* allocate a new private data page */
newpp = (int *)kmem_alloc(kernel_map, PAGE_SIZE);
/* wire it into the private page table page */
- newpt[0] = PG_V | PG_RW | vtophys(newpp);
+ newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpp));
/* wire the ptp into itself for access */
- newpt[1] = PG_V | PG_RW | vtophys(newpt);
+ newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt));
- /* and the local apic */
+ /* copy in the pointer to the local apic */
newpt[2] = SMP_prvpt[2];
/* and the IO apic mapping[s] */
for (i = 16; i < 32; i++)
newpt[i] = SMP_prvpt[i];
- /* prime data page for it to use */
- newpp[0] = x; /* cpuid */
- newpp[1] = 0; /* curproc */
- newpp[2] = 0; /* curpcb */
- newpp[3] = 0; /* npxproc */
- newpp[4] = 0; /* runtime.tv_sec */
- newpp[5] = 0; /* runtime.tv_usec */
- newpp[6] = x << 24; /* cpu_lockid */
+ /* allocate and set up an idle stack data page */
+ stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE);
+ newpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack));
- /* XXX NOTE: ABANDON bootPTD for now!!!! */
+ newpt[4] = 0; /* *prv_CMAP1 */
+ newpt[5] = 0; /* *prv_CMAP2 */
+ newpt[6] = 0; /* *prv_CMAP3 */
- /* END REVOLTING HACKERY */
+ /* prime data page for it to use */
+ newpp[0] = x; /* cpuid */
+ newpp[1] = 0; /* curproc */
+ newpp[2] = 0; /* curpcb */
+ newpp[3] = 0; /* npxproc */
+ newpp[4] = 0; /* runtime.tv_sec */
+ newpp[5] = 0; /* runtime.tv_usec */
+ newpp[6] = x << 24; /* cpu_lockid */
+ newpp[7] = 0; /* other_cpus */
+ newpp[8] = (int)bootPTD; /* my_idlePTD */
+ newpp[9] = 0; /* ss_tpr */
+ newpp[10] = (int)&newpt[4]; /* prv_CMAP1 */
+ newpp[11] = (int)&newpt[5]; /* prv_CMAP2 */
+ newpp[12] = (int)&newpt[6]; /* prv_CMAP3 */
/* setup a vector to our boot code */
*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
@@ -1585,6 +1611,26 @@ start_all_aps(u_int boot_addr)
outb(CMOS_REG, BIOS_RESET);
outb(CMOS_DATA, mpbiosreason);
+ /*
+ * Set up the idle context for the BSP. Similar to above except
+ * that some was done by locore, some by pmap.c and some is implicit
+ * because the BSP is cpu#0 and the page is initially zero, and also
+ * because we can refer to variables by name on the BSP..
+ */
+ newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
+
+ bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */
+ IdlePTDS[0] = newptd;
+
+ /* Point PTD[] to this page instead of IdlePTD's physical page */
+ newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd));
+
+ my_idlePTD = (pd_entry_t *)vtophys(newptd);
+
+ /* Allocate and setup BSP idle stack */
+ stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE);
+ SMP_prvpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack));
+
pmap_set_opt_bsp();
/* number of APs actually started */
@@ -1800,7 +1846,7 @@ invltlb(void)
* from executing at same time.
*/
int
-stop_cpus( u_int map )
+stop_cpus(u_int map)
{
if (!smp_active)
return 0;
@@ -1832,7 +1878,7 @@ stop_cpus( u_int map )
* 1: ok
*/
int
-restart_cpus( u_int map )
+restart_cpus(u_int map)
{
if (!smp_active)
return 0;
@@ -1844,3 +1890,63 @@ restart_cpus( u_int map )
return 1;
}
+
+int smp_active = 0; /* are the APs allowed to run? */
+SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
+
+/* XXX maybe should be hw.ncpu */
+int smp_cpus = 1; /* how many cpu's running */
+SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
+
+int invltlb_ok = 0; /* throttle smp_invltlb() till safe */
+SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
+
+int do_page_zero_idle = 0; /* bzero pages for fun and profit in idleloop */
+SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW,
+ &do_page_zero_idle, 0, "");
+
+
+/*
+ * This is called once the rest of the system is up and running and we're
+ * ready to let the AP's out of the pen.
+ */
+void ap_init(void);
+
+void
+ap_init()
+{
+ u_int temp;
+ u_int apic_id;
+
+ smp_cpus++;
+
+ /* Build our map of 'other' CPUs. */
+ other_cpus = all_cpus & ~(1 << cpuid);
+
+ printf("SMP: AP CPU #%d Launched!\n", cpuid);
+
+ /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */
+ load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS);
+
+ /* A quick check from sanity claus */
+ apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
+ if (cpuid != apic_id) {
+ printf("SMP: cpuid = %d\n", cpuid);
+ printf("SMP: apic_id = %d\n", apic_id);
+ printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]);
+ panic("cpuid mismatch! boom!!");
+ }
+
+ /* Init local apic for irq's */
+ apic_initialize();
+
+ /*
+ * Activate smp_invltlb, although strictly speaking, this isn't
+ * quite correct yet. We should have a bitfield for cpus willing
+ * to accept TLB flush IPI's or something and sync them.
+ */
+ invltlb_ok = 1;
+ smp_active = 1; /* historic */
+
+ curproc = NULL; /* make sure */
+}
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 3c43184..2b069f7 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -39,7 +39,7 @@
* SUCH DAMAGE.
*
* from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
- * $Id: pmap.c,v 1.158 1997/08/07 05:15:48 dyson Exp $
+ * $Id: pmap.c,v 1.159 1997/08/25 21:53:01 bde Exp $
*/
/*
@@ -183,6 +183,13 @@ static caddr_t CADDR2;
static pt_entry_t *msgbufmap;
struct msgbuf *msgbufp=0;
+#ifdef SMP
+extern char prv_CPAGE1[], prv_CPAGE2[], prv_CPAGE3[];
+extern pt_entry_t *prv_CMAP1, *prv_CMAP2, *prv_CMAP3;
+extern pd_entry_t *IdlePTDS[];
+extern pt_entry_t SMP_prvpt[];
+#endif
+
pt_entry_t *PMAP1 = 0;
unsigned *PADDR1 = 0;
@@ -408,7 +415,7 @@ pmap_bootstrap(firstaddr, loadaddr)
/* 1 = page table page */
/* 2 = local apic */
/* 16-31 = io apics */
- SMP_prvpt[2] = PG_V | PG_RW | pgeflag | ((u_long)cpu_apic_address & PG_FRAME);
+ SMP_prvpt[2] = (pt_entry_t)(PG_V | PG_RW | pgeflag | ((u_long)cpu_apic_address & PG_FRAME));
for (i = 0; i < mp_napics; i++) {
for (j = 0; j < 16; j++) {
@@ -420,8 +427,8 @@ pmap_bootstrap(firstaddr, loadaddr)
}
/* use this slot if available */
if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) == 0) {
- SMP_prvpt[j + 16] = PG_V | PG_RW | pgeflag |
- ((u_long)io_apic_address[i] & PG_FRAME);
+ SMP_prvpt[j + 16] = (pt_entry_t)(PG_V | PG_RW | pgeflag |
+ ((u_long)io_apic_address[i] & PG_FRAME));
ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
break;
}
@@ -429,6 +436,11 @@ pmap_bootstrap(firstaddr, loadaddr)
if (j == 16)
panic("no space to map IO apic %d!", i);
}
+
+ /* BSP does this itself, AP's get it pre-set */
+ prv_CMAP1 = (pt_entry_t *)&SMP_prvpt[4];
+ prv_CMAP2 = (pt_entry_t *)&SMP_prvpt[5];
+ prv_CMAP3 = (pt_entry_t *)&SMP_prvpt[6];
#endif
invltlb();
@@ -463,7 +475,8 @@ pmap_set_opt(unsigned *pdir) {
* Setup the PTD for the boot processor
*/
void
-pmap_set_opt_bsp(void) {
+pmap_set_opt_bsp(void)
+{
pmap_set_opt((unsigned *)kernel_pmap->pm_pdir);
pmap_set_opt((unsigned *)PTD);
invltlb();
@@ -1414,6 +1427,9 @@ pmap_growkernel(vm_offset_t addr)
struct proc *p;
struct pmap *pmap;
int s;
+#ifdef SMP
+ int i;
+#endif
s = splhigh();
if (kernel_vm_end == 0) {
@@ -1446,6 +1462,14 @@ pmap_growkernel(vm_offset_t addr)
pmap_zero_page(VM_PAGE_TO_PHYS(nkpg));
}
pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_RW | pgeflag);
+
+#ifdef SMP
+ for (i = 0; i < mp_naps; i++) {
+ if (IdlePTDS[i])
+ pdir_pde(IdlePTDS[i], kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_RW | pgeflag);
+ }
+#endif
+
nkpg = NULL;
for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
@@ -2591,6 +2615,18 @@ void
pmap_zero_page(phys)
vm_offset_t phys;
{
+#ifdef SMP
+ if (*(int *) prv_CMAP3)
+ panic("pmap_zero_page: prv_CMAP3 busy");
+
+ *(int *) prv_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME);
+ invltlb_1pg((vm_offset_t) &prv_CPAGE3);
+
+ bzero(&prv_CPAGE3, PAGE_SIZE);
+
+ *(int *) prv_CMAP3 = 0;
+ invltlb_1pg((vm_offset_t) &prv_CPAGE3);
+#else
if (*(int *) CMAP2)
panic("pmap_zero_page: CMAP busy");
@@ -2598,6 +2634,7 @@ pmap_zero_page(phys)
bzero(CADDR2, PAGE_SIZE);
*(int *) CMAP2 = 0;
invltlb_1pg((vm_offset_t) CADDR2);
+#endif
}
/*
@@ -2611,6 +2648,23 @@ pmap_copy_page(src, dst)
vm_offset_t src;
vm_offset_t dst;
{
+#ifdef SMP
+ if (*(int *) prv_CMAP1)
+ panic("pmap_copy_page: prv_CMAP1 busy");
+ if (*(int *) prv_CMAP2)
+ panic("pmap_copy_page: prv_CMAP2 busy");
+
+ *(int *) prv_CMAP1 = PG_V | PG_RW | (src & PG_FRAME);
+ *(int *) prv_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME);
+
+ invltlb_2pg( (vm_offset_t) &prv_CPAGE1, (vm_offset_t) &prv_CPAGE2);
+
+ bcopy(&prv_CPAGE1, &prv_CPAGE2, PAGE_SIZE);
+
+ *(int *) prv_CMAP1 = 0;
+ *(int *) prv_CMAP2 = 0;
+ invltlb_2pg( (vm_offset_t) &prv_CPAGE1, (vm_offset_t) &prv_CPAGE2);
+#else
if (*(int *) CMAP1 || *(int *) CMAP2)
panic("pmap_copy_page: CMAP busy");
@@ -2622,6 +2676,7 @@ pmap_copy_page(src, dst)
*(int *) CMAP1 = 0;
*(int *) CMAP2 = 0;
invltlb_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2);
+#endif
}
diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s
index 61b1dfa..f749f03 100644
--- a/sys/amd64/amd64/swtch.s
+++ b/sys/amd64/amd64/swtch.s
@@ -33,7 +33,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: swtch.s,v 1.58 1997/08/04 17:31:43 fsmp Exp $
+ * $Id: swtch.s,v 1.59 1997/08/09 00:02:47 dyson Exp $
*/
#include "npx.h"
@@ -240,17 +240,94 @@ rem3id: .asciz "remrq.id"
/*
* When no processes are on the runq, cpu_switch() branches to _idle
* to wait for something to come ready.
- *
- * NOTE: on an SMP system this routine is a startup-only code path.
- * once initialization is over, meaning the idle procs have been
- * created, we should NEVER branch here.
*/
ALIGN_TEXT
_idle:
-#if defined(SMP) && defined(DIAGNOSTIC)
- cmpl $0, _smp_active
- jnz badsw3
-#endif /* SMP && DIAGNOSTIC */
+#ifdef SMP
+ /* when called, we have the mplock, intr disabled */
+
+ xorl %ebp,%ebp
+
+ /* use our idleproc's "context" */
+ movl _my_idlePTD,%ecx
+ movl %ecx,%cr3
+ movl $_idlestack_top,%ecx
+ movl %ecx,%esp
+
+ /* update common_tss.tss_esp0 pointer */
+ movl $_common_tss, %eax
+ movl %ecx, TSS_ESP0(%eax)
+
+ sti
+
+ /*
+ * XXX callers of cpu_switch() do a bogus splclock(). Locking should
+ * be left to cpu_switch().
+ */
+ call _spl0
+
+ cli
+
+ /*
+ * _REALLY_ free the lock, no matter how deep the prior nesting.
+ * We will recover the nesting on the way out when we have a new
+ * proc to load.
+ *
+ * XXX: we had damn well better be sure we had it before doing this!
+ */
+ movl $FREE_LOCK, %eax
+ movl %eax, _mp_lock
+
+ /* do NOT have lock, intrs disabled */
+ .globl idle_loop
+idle_loop:
+
+ movl %cr3,%eax /* ouch! */
+ movl %eax,%cr3
+
+ cmpl $0,_smp_active
+ jne 1f
+ cmpl $0,_cpuid
+ je 1f
+ jmp 2f
+
+1: cmpl $0,_whichrtqs /* real-time queue */
+ jne 3f
+ cmpl $0,_whichqs /* normal queue */
+ jne 3f
+ cmpl $0,_whichidqs /* 'idle' queue */
+ jne 3f
+
+ cmpl $0,_do_page_zero_idle
+ je 2f
+ /* XXX appears to cause panics */
+ /*
+ * Inside zero_idle we enable interrupts and grab the mplock
+ * as needed. It needs to be careful about entry/exit mutexes.
+ */
+ call _vm_page_zero_idle /* internal locking */
+ testl %eax, %eax
+ jnz idle_loop
+2:
+
+ /* enable intrs for a halt */
+ sti
+ call *_hlt_vector /* wait for interrupt */
+ cli
+ jmp idle_loop
+
+3:
+ call _get_mplock
+ cmpl $0,_whichrtqs /* real-time queue */
+ CROSSJUMP(jne, sw1a, je)
+ cmpl $0,_whichqs /* normal queue */
+ CROSSJUMP(jne, nortqr, je)
+ cmpl $0,_whichidqs /* 'idle' queue */
+ CROSSJUMP(jne, idqr, je)
+ call _rel_mplock
+ jmp idle_loop
+
+#else
xorl %ebp,%ebp
movl $HIDENAME(tmpstk),%esp
movl _IdlePTD,%ecx
@@ -302,6 +379,7 @@ idle_loop:
sti
call *_hlt_vector /* wait for interrupt */
jmp idle_loop
+#endif
CROSSJUMPTARGET(_idle)
@@ -367,6 +445,17 @@ ENTRY(cpu_switch)
/* save is done, now choose a new process or idle */
sw1:
cli
+
+#ifdef SMP
+ /* Stop scheduling if smp_active goes zero and we are not BSP */
+ cmpl $0,_smp_active
+ jne 1f
+ cmpl $0,_cpuid
+ je 1f
+ CROSSJUMP(je, _idle, jne) /* wind down */
+1:
+#endif
+
sw1a:
movl _whichrtqs,%edi /* pick next p. from rtqs */
testl %edi,%edi
@@ -594,12 +683,6 @@ sw0_2: .asciz "cpu_switch: not SRUN"
#endif
#if defined(SMP) && defined(DIAGNOSTIC)
-badsw3:
- pushl $sw0_3
- call _panic
-
-sw0_3: .asciz "cpu_switch: went idle with smp_active"
-
badsw4:
pushl $sw0_4
call _panic
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index ac139d5..5895361 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -35,7 +35,7 @@
* SUCH DAMAGE.
*
* from: @(#)trap.c 7.4 (Berkeley) 5/13/91
- * $Id: trap.c,v 1.106 1997/08/20 05:25:48 fsmp Exp $
+ * $Id: trap.c,v 1.107 1997/08/21 06:32:39 charnier Exp $
*/
/*
@@ -725,6 +725,7 @@ trap_fatal(frame)
ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
#ifdef SMP
printf("cpuid = %d\n", cpuid);
+ printf("lapic.id = %d\n", lapic.id);
#endif
if (type == T_PAGEFLT) {
printf("fault virtual address = 0x%x\n", eva);
@@ -769,6 +770,9 @@ trap_fatal(frame)
} else {
printf("Idle\n");
}
+#ifdef SMP
+ printf("mp_lock = %08x\n", mp_lock);
+#endif
printf("interrupt mask = ");
if ((cpl & net_imask) == net_imask)
printf("net ");
@@ -824,6 +828,7 @@ dblfault_handler()
printf("ebp = 0x%x\n", common_tss.tss_ebp);
#ifdef SMP
printf("cpuid = %d\n", cpuid);
+ printf("lapic.id = %d\n", lapic.id);
#endif
panic("double fault");
}
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 2db2844..0d2c672 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -38,7 +38,7 @@
*
* from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
* Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
- * $Id: vm_machdep.c,v 1.84 1997/07/20 08:37:24 bde Exp $
+ * $Id: vm_machdep.c,v 1.85 1997/08/09 00:02:56 dyson Exp $
*/
#include "npx.h"
@@ -927,6 +927,9 @@ vm_page_zero_idle()
*/
if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
return (0);
+#ifdef SMP
+ get_mplock();
+#endif
s = splvm();
enable_intr();
m = vm_page_list_find(PQ_FREE, free_rover);
@@ -934,7 +937,13 @@ vm_page_zero_idle()
--(*vm_page_queues[m->queue].lcnt);
TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
splx(s);
+#ifdef SMP
+ rel_mplock();
+#endif
pmap_zero_page(VM_PAGE_TO_PHYS(m));
+#ifdef SMP
+ get_mplock();
+#endif
(void)splvm();
m->queue = PQ_ZERO + m->pc;
++(*vm_page_queues[m->queue].lcnt);
@@ -944,5 +953,8 @@ vm_page_zero_idle()
}
splx(s);
disable_intr();
+#ifdef SMP
+ rel_mplock();
+#endif
return (1);
}
diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h
index f647070..a4a6423 100644
--- a/sys/amd64/include/mptable.h
+++ b/sys/amd64/include/mptable.h
@@ -22,17 +22,20 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: mp_machdep.c,v 1.44 1997/08/24 20:33:32 fsmp Exp $
+ * $Id: mp_machdep.c,v 1.45 1997/08/25 21:28:08 bde Exp $
*/
#include "opt_smp.h"
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
-#include <vm/vm.h> /* for KERNBASE */
-#include <vm/vm_param.h> /* for KERNBASE */
-#include <vm/pmap.h> /* for KERNBASE */
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
@@ -230,15 +233,24 @@ int apic_id_to_logical[NAPICID];
/* Bitmap of all available CPUs */
u_int all_cpus;
-/* Boot of AP uses this PTD */
-u_int *bootPTD;
+/* AP uses this PTD during bootstrap */
+pd_entry_t *bootPTD;
/* Hotwire a 0->4MB V==P mapping */
-extern pt_entry_t KPTphys;
+extern pt_entry_t *KPTphys;
/* Virtual address of per-cpu common_tss */
extern struct i386tss common_tss;
+/* IdlePTD per cpu */
+pd_entry_t *IdlePTDS[NCPU];
+
+/* "my" private page table page, for BSP init */
+extern pt_entry_t SMP_prvpt[];
+
+/* Private page pointer to curcpu's PTD, used during BSP init */
+extern pd_entry_t *my_idlePTD;
+
/*
* Local data and functions.
*/
@@ -1473,9 +1485,9 @@ start_all_aps(u_int boot_addr)
int x, i;
u_char mpbiosreason;
u_long mpbioswarmvec;
- pd_entry_t newptd;
- pt_entry_t newpt;
- int *newpp;
+ pd_entry_t *newptd;
+ pt_entry_t *newpt;
+ int *newpp, *stack;
POSTCODE(START_ALL_APS_POST);
@@ -1498,57 +1510,71 @@ start_all_aps(u_int boot_addr)
/* start each AP */
for (x = 1; x <= mp_naps; ++x) {
- /* HACK HACK HACK !!! */
+ /* This is a bit verbose, it will go away soon. */
/* alloc new page table directory */
- newptd = (pd_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE));
+ newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
+
+ /* Store the virtual PTD address for this CPU */
+ IdlePTDS[x] = newptd;
/* clone currently active one (ie: IdlePTD) */
bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */
/* set up 0 -> 4MB P==V mapping for AP boot */
- newptd[0] = PG_V | PG_RW | ((u_long)KPTphys & PG_FRAME);
+ newptd[0] = (pd_entry_t) (PG_V | PG_RW |
+ ((u_long)KPTphys & PG_FRAME));
- /* store PTD for this AP */
- bootPTD = (pd_entry_t)vtophys(newptd);
+ /* store PTD for this AP's boot sequence */
+ bootPTD = (pd_entry_t *)vtophys(newptd);
/* alloc new page table page */
- newpt = (pt_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE));
+ newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
/* set the new PTD's private page to point there */
- newptd[MPPTDI] = PG_V | PG_RW | vtophys(newpt);
+ newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt));
/* install self referential entry */
- newptd[PTDPTDI] = PG_V | PG_RW | vtophys(newptd);
+ newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd));
- /* get a new private data page */
+ /* allocate a new private data page */
newpp = (int *)kmem_alloc(kernel_map, PAGE_SIZE);
/* wire it into the private page table page */
- newpt[0] = PG_V | PG_RW | vtophys(newpp);
+ newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpp));
/* wire the ptp into itself for access */
- newpt[1] = PG_V | PG_RW | vtophys(newpt);
+ newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt));
- /* and the local apic */
+ /* copy in the pointer to the local apic */
newpt[2] = SMP_prvpt[2];
/* and the IO apic mapping[s] */
for (i = 16; i < 32; i++)
newpt[i] = SMP_prvpt[i];
- /* prime data page for it to use */
- newpp[0] = x; /* cpuid */
- newpp[1] = 0; /* curproc */
- newpp[2] = 0; /* curpcb */
- newpp[3] = 0; /* npxproc */
- newpp[4] = 0; /* runtime.tv_sec */
- newpp[5] = 0; /* runtime.tv_usec */
- newpp[6] = x << 24; /* cpu_lockid */
+ /* allocate and set up an idle stack data page */
+ stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE);
+ newpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack));
- /* XXX NOTE: ABANDON bootPTD for now!!!! */
+ newpt[4] = 0; /* *prv_CMAP1 */
+ newpt[5] = 0; /* *prv_CMAP2 */
+ newpt[6] = 0; /* *prv_CMAP3 */
- /* END REVOLTING HACKERY */
+ /* prime data page for it to use */
+ newpp[0] = x; /* cpuid */
+ newpp[1] = 0; /* curproc */
+ newpp[2] = 0; /* curpcb */
+ newpp[3] = 0; /* npxproc */
+ newpp[4] = 0; /* runtime.tv_sec */
+ newpp[5] = 0; /* runtime.tv_usec */
+ newpp[6] = x << 24; /* cpu_lockid */
+ newpp[7] = 0; /* other_cpus */
+ newpp[8] = (int)bootPTD; /* my_idlePTD */
+ newpp[9] = 0; /* ss_tpr */
+ newpp[10] = (int)&newpt[4]; /* prv_CMAP1 */
+ newpp[11] = (int)&newpt[5]; /* prv_CMAP2 */
+ newpp[12] = (int)&newpt[6]; /* prv_CMAP3 */
/* setup a vector to our boot code */
*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
@@ -1585,6 +1611,26 @@ start_all_aps(u_int boot_addr)
outb(CMOS_REG, BIOS_RESET);
outb(CMOS_DATA, mpbiosreason);
+ /*
+ * Set up the idle context for the BSP. Similar to above except
+ * that some was done by locore, some by pmap.c and some is implicit
+ * because the BSP is cpu#0 and the page is initially zero, and also
+ * because we can refer to variables by name on the BSP..
+ */
+ newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
+
+ bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */
+ IdlePTDS[0] = newptd;
+
+ /* Point PTD[] to this page instead of IdlePTD's physical page */
+ newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd));
+
+ my_idlePTD = (pd_entry_t *)vtophys(newptd);
+
+ /* Allocate and setup BSP idle stack */
+ stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE);
+ SMP_prvpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack));
+
pmap_set_opt_bsp();
/* number of APs actually started */
@@ -1800,7 +1846,7 @@ invltlb(void)
* from executing at same time.
*/
int
-stop_cpus( u_int map )
+stop_cpus(u_int map)
{
if (!smp_active)
return 0;
@@ -1832,7 +1878,7 @@ stop_cpus( u_int map )
* 1: ok
*/
int
-restart_cpus( u_int map )
+restart_cpus(u_int map)
{
if (!smp_active)
return 0;
@@ -1844,3 +1890,63 @@ restart_cpus( u_int map )
return 1;
}
+
+int smp_active = 0; /* are the APs allowed to run? */
+SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
+
+/* XXX maybe should be hw.ncpu */
+int smp_cpus = 1; /* how many cpu's running */
+SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
+
+int invltlb_ok = 0; /* throttle smp_invltlb() till safe */
+SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
+
+int do_page_zero_idle = 0; /* bzero pages for fun and profit in idleloop */
+SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW,
+ &do_page_zero_idle, 0, "");
+
+
+/*
+ * This is called once the rest of the system is up and running and we're
+ * ready to let the AP's out of the pen.
+ */
+void ap_init(void);
+
+void
+ap_init()
+{
+ u_int temp;
+ u_int apic_id;
+
+ smp_cpus++;
+
+ /* Build our map of 'other' CPUs. */
+ other_cpus = all_cpus & ~(1 << cpuid);
+
+ printf("SMP: AP CPU #%d Launched!\n", cpuid);
+
+ /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */
+ load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS);
+
+ /* A quick check from sanity claus */
+ apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
+ if (cpuid != apic_id) {
+ printf("SMP: cpuid = %d\n", cpuid);
+ printf("SMP: apic_id = %d\n", apic_id);
+ printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]);
+ panic("cpuid mismatch! boom!!");
+ }
+
+ /* Init local apic for irq's */
+ apic_initialize();
+
+ /*
+ * Activate smp_invltlb, although strictly speaking, this isn't
+ * quite correct yet. We should have a bitfield for cpus willing
+ * to accept TLB flush IPI's or something and sync them.
+ */
+ invltlb_ok = 1;
+ smp_active = 1; /* historic */
+
+ curproc = NULL; /* make sure */
+}
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index f433b2f..276b838 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -6,7 +6,7 @@
* this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
* ----------------------------------------------------------------------------
*
- * $Id: smp.h,v 1.24 1997/08/24 20:17:05 smp Exp smp $
+ * $Id: smp.h,v 1.29 1997/08/24 20:33:24 fsmp Exp $
*
*/
@@ -100,7 +100,6 @@ extern int cpu_num_to_apic_id[];
extern int io_num_to_apic_id[];
extern int apic_id_to_logical[];
extern u_int all_cpus;
-extern u_int SMP_prvpt[];
extern u_char SMP_ioapic[];
/* functions in mp_machdep.c */
OpenPOWER on IntegriCloud