diff options
author | peter <peter@FreeBSD.org> | 1997-08-26 18:10:38 +0000 |
---|---|---|
committer | peter <peter@FreeBSD.org> | 1997-08-26 18:10:38 +0000 |
commit | 7dfe3e5abe7df9a2db7c225b0ce9e73fc5955654 (patch) | |
tree | 02a81dce529d428cea86379dd50d22676d1b7fbc /sys | |
parent | 4ef08431e3258c177c4a081bb0bba2eeef3a3ac3 (diff) | |
download | FreeBSD-src-7dfe3e5abe7df9a2db7c225b0ce9e73fc5955654.zip FreeBSD-src-7dfe3e5abe7df9a2db7c225b0ce9e73fc5955654.tar.gz |
Clean up the SMP AP bootstrap and eliminate the wretched idle procs.
- We now have enough per-cpu idle context, the real idle loop has been
revived (cpu's halt now with nothing to do).
- Some preliminary support for running some operations outside the
global lock (eg: zeroing "free but not yet zeroed pages") is present
but appears to cause problems. Off by default.
- the smp_active sysctl now behaves differently. It's merely a 'true/false'
option. Setting smp_active to zero causes the AP's to halt in the idle
loop and stop scheduling processes.
- bootstrap is a lot safer. Instead of sharing a statically compiled in
stack a number of times (which has caused lots of problems) and then
abandoning it, we use the idle context to boot the AP's directly. This
should help >2 cpu support since the bootlock stuff was in doubt.
- print physical apic id in traps.. helps identify private pages getting
out of sync. (You don't want to know how much hair I tore out with this!)
More cleanup to follow, this is more of a checkpoint than a
'finished' thing.
Diffstat (limited to 'sys')
31 files changed, 1569 insertions, 779 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 61b1dfa..f749f03 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -33,7 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: swtch.s,v 1.58 1997/08/04 17:31:43 fsmp Exp $ + * $Id: swtch.s,v 1.59 1997/08/09 00:02:47 dyson Exp $ */ #include "npx.h" @@ -240,17 +240,94 @@ rem3id: .asciz "remrq.id" /* * When no processes are on the runq, cpu_switch() branches to _idle * to wait for something to come ready. - * - * NOTE: on an SMP system this routine is a startup-only code path. - * once initialization is over, meaning the idle procs have been - * created, we should NEVER branch here. */ ALIGN_TEXT _idle: -#if defined(SMP) && defined(DIAGNOSTIC) - cmpl $0, _smp_active - jnz badsw3 -#endif /* SMP && DIAGNOSTIC */ +#ifdef SMP + /* when called, we have the mplock, intr disabled */ + + xorl %ebp,%ebp + + /* use our idleproc's "context" */ + movl _my_idlePTD,%ecx + movl %ecx,%cr3 + movl $_idlestack_top,%ecx + movl %ecx,%esp + + /* update common_tss.tss_esp0 pointer */ + movl $_common_tss, %eax + movl %ecx, TSS_ESP0(%eax) + + sti + + /* + * XXX callers of cpu_switch() do a bogus splclock(). Locking should + * be left to cpu_switch(). + */ + call _spl0 + + cli + + /* + * _REALLY_ free the lock, no matter how deep the prior nesting. + * We will recover the nesting on the way out when we have a new + * proc to load. + * + * XXX: we had damn well better be sure we had it before doing this! + */ + movl $FREE_LOCK, %eax + movl %eax, _mp_lock + + /* do NOT have lock, intrs disabled */ + .globl idle_loop +idle_loop: + + movl %cr3,%eax /* ouch! */ + movl %eax,%cr3 + + cmpl $0,_smp_active + jne 1f + cmpl $0,_cpuid + je 1f + jmp 2f + +1: cmpl $0,_whichrtqs /* real-time queue */ + jne 3f + cmpl $0,_whichqs /* normal queue */ + jne 3f + cmpl $0,_whichidqs /* 'idle' queue */ + jne 3f + + cmpl $0,_do_page_zero_idle + je 2f + /* XXX appears to cause panics */ + /* + * Inside zero_idle we enable interrupts and grab the mplock + * as needed. It needs to be careful about entry/exit mutexes. + */ + call _vm_page_zero_idle /* internal locking */ + testl %eax, %eax + jnz idle_loop +2: + + /* enable intrs for a halt */ + sti + call *_hlt_vector /* wait for interrupt */ + cli + jmp idle_loop + +3: + call _get_mplock + cmpl $0,_whichrtqs /* real-time queue */ + CROSSJUMP(jne, sw1a, je) + cmpl $0,_whichqs /* normal queue */ + CROSSJUMP(jne, nortqr, je) + cmpl $0,_whichidqs /* 'idle' queue */ + CROSSJUMP(jne, idqr, je) + call _rel_mplock + jmp idle_loop + +#else xorl %ebp,%ebp movl $HIDENAME(tmpstk),%esp movl _IdlePTD,%ecx @@ -302,6 +379,7 @@ idle_loop: sti call *_hlt_vector /* wait for interrupt */ jmp idle_loop +#endif CROSSJUMPTARGET(_idle) @@ -367,6 +445,17 @@ ENTRY(cpu_switch) /* save is done, now choose a new process or idle */ sw1: cli + +#ifdef SMP + /* Stop scheduling if smp_active goes zero and we are not BSP */ + cmpl $0,_smp_active + jne 1f + cmpl $0,_cpuid + je 1f + CROSSJUMP(je, _idle, jne) /* wind down */ +1: +#endif + sw1a: movl _whichrtqs,%edi /* pick next p. from rtqs */ testl %edi,%edi @@ -594,12 +683,6 @@ sw0_2: .asciz "cpu_switch: not SRUN" #endif #if defined(SMP) && defined(DIAGNOSTIC) -badsw3: - pushl $sw0_3 - call _panic - -sw0_3: .asciz "cpu_switch: went idle with smp_active" - badsw4: pushl $sw0_4 call _panic diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S index 23007033..a3df36f 100644 --- a/sys/amd64/amd64/locore.S +++ b/sys/amd64/amd64/locore.S @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 - * $Id: locore.s,v 1.3 1997/08/23 04:10:36 smp Exp smp $ + * $Id: locore.s,v 1.95 1997/08/24 00:05:33 fsmp Exp $ * * originally from: locore.s, by William F. Jolitz * @@ -90,24 +90,36 @@ .set _SMP_prvstart,(MPPTDI << PDRSHIFT) .globl _SMP_prvpage,_SMP_prvpt,_lapic,_SMP_ioapic + .globl _prv_CPAGE1,_prv_CPAGE2,_prv_CPAGE3 + .globl _idlestack,_idlestack_top .set _SMP_prvpage,_SMP_prvstart .set _SMP_prvpt,_SMP_prvstart + PAGE_SIZE .set _lapic,_SMP_prvstart + (2 * PAGE_SIZE) + .set _idlestack,_SMP_prvstart + (3 * PAGE_SIZE) + .set _idlestack_top,_SMP_prvstart + (4 * PAGE_SIZE) + .set _prv_CPAGE1,_SMP_prvstart + (4 * PAGE_SIZE) + .set _prv_CPAGE2,_SMP_prvstart + (5 * PAGE_SIZE) + .set _prv_CPAGE3,_SMP_prvstart + (6 * PAGE_SIZE) .set _SMP_ioapic,_SMP_prvstart + (16 * PAGE_SIZE) .globl _cpuid,_curproc,_curpcb,_npxproc,_runtime,_cpu_lockid - .globl _common_tss,_other_cpus,_ss_tpr - .set _cpuid,_SMP_prvpage+0 - .set _curproc,_SMP_prvpage+4 - .set _curpcb,_SMP_prvpage+8 - .set _npxproc,_SMP_prvpage+12 - .set _runtime,_SMP_prvpage+16 /* 8 bytes struct timeval */ - .set _cpu_lockid,_SMP_prvpage+24 - .set _common_tss,_SMP_prvpage+28 /* 104 bytes long, next = 132 */ - .set _other_cpus,_SMP_prvpage+132 /* bitmap of available CPUs, + .globl _common_tss,_other_cpus,_my_idlePTD,_ss_tpr + .globl _prv_CMAP1,_prv_CMAP2,_prv_CMAP3 + .set _cpuid,_SMP_prvpage+0 /* [0] */ + .set _curproc,_SMP_prvpage+4 /* [1] */ + .set _curpcb,_SMP_prvpage+8 /* [2] */ + .set _npxproc,_SMP_prvpage+12 /* [3] */ + .set _runtime,_SMP_prvpage+16 /* [4,5] */ + .set _cpu_lockid,_SMP_prvpage+24 /* [6] */ + .set _other_cpus,_SMP_prvpage+28 /* [7] bitmap of available CPUs, excluding ourself */ - .set _ss_tpr,_SMP_prvpage+136 - + .set _my_idlePTD,_SMP_prvpage+32 /* [8] */ + .set _ss_tpr,_SMP_prvpage+36 /* [9] */ + .set _prv_CMAP1,_SMP_prvpage+40 /* [10] */ + .set _prv_CMAP2,_SMP_prvpage+44 /* [11] */ + .set _prv_CMAP3,_SMP_prvpage+48 /* [12] */ + .set _common_tss,_SMP_prvpage+52 /* 102 (ie: 104) bytes long */ + /* Fetch the .set's for the local apic */ #include "i386/i386/mp_apicdefs.s" diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s index 23007033..a3df36f 100644 --- a/sys/amd64/amd64/locore.s +++ b/sys/amd64/amd64/locore.s @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 - * $Id: locore.s,v 1.3 1997/08/23 04:10:36 smp Exp smp $ + * $Id: locore.s,v 1.95 1997/08/24 00:05:33 fsmp Exp $ * * originally from: locore.s, by William F. Jolitz * @@ -90,24 +90,36 @@ .set _SMP_prvstart,(MPPTDI << PDRSHIFT) .globl _SMP_prvpage,_SMP_prvpt,_lapic,_SMP_ioapic + .globl _prv_CPAGE1,_prv_CPAGE2,_prv_CPAGE3 + .globl _idlestack,_idlestack_top .set _SMP_prvpage,_SMP_prvstart .set _SMP_prvpt,_SMP_prvstart + PAGE_SIZE .set _lapic,_SMP_prvstart + (2 * PAGE_SIZE) + .set _idlestack,_SMP_prvstart + (3 * PAGE_SIZE) + .set _idlestack_top,_SMP_prvstart + (4 * PAGE_SIZE) + .set _prv_CPAGE1,_SMP_prvstart + (4 * PAGE_SIZE) + .set _prv_CPAGE2,_SMP_prvstart + (5 * PAGE_SIZE) + .set _prv_CPAGE3,_SMP_prvstart + (6 * PAGE_SIZE) .set _SMP_ioapic,_SMP_prvstart + (16 * PAGE_SIZE) .globl _cpuid,_curproc,_curpcb,_npxproc,_runtime,_cpu_lockid - .globl _common_tss,_other_cpus,_ss_tpr - .set _cpuid,_SMP_prvpage+0 - .set _curproc,_SMP_prvpage+4 - .set _curpcb,_SMP_prvpage+8 - .set _npxproc,_SMP_prvpage+12 - .set _runtime,_SMP_prvpage+16 /* 8 bytes struct timeval */ - .set _cpu_lockid,_SMP_prvpage+24 - .set _common_tss,_SMP_prvpage+28 /* 104 bytes long, next = 132 */ - .set _other_cpus,_SMP_prvpage+132 /* bitmap of available CPUs, + .globl _common_tss,_other_cpus,_my_idlePTD,_ss_tpr + .globl _prv_CMAP1,_prv_CMAP2,_prv_CMAP3 + .set _cpuid,_SMP_prvpage+0 /* [0] */ + .set _curproc,_SMP_prvpage+4 /* [1] */ + .set _curpcb,_SMP_prvpage+8 /* [2] */ + .set _npxproc,_SMP_prvpage+12 /* [3] */ + .set _runtime,_SMP_prvpage+16 /* [4,5] */ + .set _cpu_lockid,_SMP_prvpage+24 /* [6] */ + .set _other_cpus,_SMP_prvpage+28 /* [7] bitmap of available CPUs, excluding ourself */ - .set _ss_tpr,_SMP_prvpage+136 - + .set _my_idlePTD,_SMP_prvpage+32 /* [8] */ + .set _ss_tpr,_SMP_prvpage+36 /* [9] */ + .set _prv_CMAP1,_SMP_prvpage+40 /* [10] */ + .set _prv_CMAP2,_SMP_prvpage+44 /* [11] */ + .set _prv_CMAP3,_SMP_prvpage+48 /* [12] */ + .set _common_tss,_SMP_prvpage+52 /* 102 (ie: 104) bytes long */ + /* Fetch the .set's for the local apic */ #include "i386/i386/mp_apicdefs.s" diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 7376267..a11153a 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.256 1997/08/18 06:58:09 charnier Exp $ + * $Id: machdep.c,v 1.257 1997/08/21 06:32:38 charnier Exp $ */ #include "apm.h" @@ -392,6 +392,12 @@ again: printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1024); + /* + * Set up buffers, so they can be used to read disk labels. + */ + bufinit(); + vm_pager_bufferinit(); + #ifdef SMP /* * OK, enough kmem_alloc/malloc state should be up, lets get on with it! @@ -399,12 +405,6 @@ again: mp_start(); /* fire up the APs and APICs */ mp_announce(); #endif /* SMP */ - - /* - * Set up buffers, so they can be used to read disk labels. - */ - bufinit(); - vm_pager_bufferinit(); } int diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index f647070..a4a6423 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -22,17 +22,20 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.44 1997/08/24 20:33:32 fsmp Exp $ + * $Id: mp_machdep.c,v 1.45 1997/08/25 21:28:08 bde Exp $ */ #include "opt_smp.h" #include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/sysctl.h> -#include <vm/vm.h> /* for KERNBASE */ -#include <vm/vm_param.h> /* for KERNBASE */ -#include <vm/pmap.h> /* for KERNBASE */ +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> @@ -230,15 +233,24 @@ int apic_id_to_logical[NAPICID]; /* Bitmap of all available CPUs */ u_int all_cpus; -/* Boot of AP uses this PTD */ -u_int *bootPTD; +/* AP uses this PTD during bootstrap */ +pd_entry_t *bootPTD; /* Hotwire a 0->4MB V==P mapping */ -extern pt_entry_t KPTphys; +extern pt_entry_t *KPTphys; /* Virtual address of per-cpu common_tss */ extern struct i386tss common_tss; +/* IdlePTD per cpu */ +pd_entry_t *IdlePTDS[NCPU]; + +/* "my" private page table page, for BSP init */ +extern pt_entry_t SMP_prvpt[]; + +/* Private page pointer to curcpu's PTD, used during BSP init */ +extern pd_entry_t *my_idlePTD; + /* * Local data and functions. */ @@ -1473,9 +1485,9 @@ start_all_aps(u_int boot_addr) int x, i; u_char mpbiosreason; u_long mpbioswarmvec; - pd_entry_t newptd; - pt_entry_t newpt; - int *newpp; + pd_entry_t *newptd; + pt_entry_t *newpt; + int *newpp, *stack; POSTCODE(START_ALL_APS_POST); @@ -1498,57 +1510,71 @@ start_all_aps(u_int boot_addr) /* start each AP */ for (x = 1; x <= mp_naps; ++x) { - /* HACK HACK HACK !!! */ + /* This is a bit verbose, it will go away soon. */ /* alloc new page table directory */ - newptd = (pd_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + /* Store the virtual PTD address for this CPU */ + IdlePTDS[x] = newptd; /* clone currently active one (ie: IdlePTD) */ bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ /* set up 0 -> 4MB P==V mapping for AP boot */ - newptd[0] = PG_V | PG_RW | ((u_long)KPTphys & PG_FRAME); + newptd[0] = (pd_entry_t) (PG_V | PG_RW | + ((u_long)KPTphys & PG_FRAME)); - /* store PTD for this AP */ - bootPTD = (pd_entry_t)vtophys(newptd); + /* store PTD for this AP's boot sequence */ + bootPTD = (pd_entry_t *)vtophys(newptd); /* alloc new page table page */ - newpt = (pt_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); /* set the new PTD's private page to point there */ - newptd[MPPTDI] = PG_V | PG_RW | vtophys(newpt); + newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); /* install self referential entry */ - newptd[PTDPTDI] = PG_V | PG_RW | vtophys(newptd); + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); - /* get a new private data page */ + /* allocate a new private data page */ newpp = (int *)kmem_alloc(kernel_map, PAGE_SIZE); /* wire it into the private page table page */ - newpt[0] = PG_V | PG_RW | vtophys(newpp); + newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpp)); /* wire the ptp into itself for access */ - newpt[1] = PG_V | PG_RW | vtophys(newpt); + newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); - /* and the local apic */ + /* copy in the pointer to the local apic */ newpt[2] = SMP_prvpt[2]; /* and the IO apic mapping[s] */ for (i = 16; i < 32; i++) newpt[i] = SMP_prvpt[i]; - /* prime data page for it to use */ - newpp[0] = x; /* cpuid */ - newpp[1] = 0; /* curproc */ - newpp[2] = 0; /* curpcb */ - newpp[3] = 0; /* npxproc */ - newpp[4] = 0; /* runtime.tv_sec */ - newpp[5] = 0; /* runtime.tv_usec */ - newpp[6] = x << 24; /* cpu_lockid */ + /* allocate and set up an idle stack data page */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + newpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); - /* XXX NOTE: ABANDON bootPTD for now!!!! */ + newpt[4] = 0; /* *prv_CMAP1 */ + newpt[5] = 0; /* *prv_CMAP2 */ + newpt[6] = 0; /* *prv_CMAP3 */ - /* END REVOLTING HACKERY */ + /* prime data page for it to use */ + newpp[0] = x; /* cpuid */ + newpp[1] = 0; /* curproc */ + newpp[2] = 0; /* curpcb */ + newpp[3] = 0; /* npxproc */ + newpp[4] = 0; /* runtime.tv_sec */ + newpp[5] = 0; /* runtime.tv_usec */ + newpp[6] = x << 24; /* cpu_lockid */ + newpp[7] = 0; /* other_cpus */ + newpp[8] = (int)bootPTD; /* my_idlePTD */ + newpp[9] = 0; /* ss_tpr */ + newpp[10] = (int)&newpt[4]; /* prv_CMAP1 */ + newpp[11] = (int)&newpt[5]; /* prv_CMAP2 */ + newpp[12] = (int)&newpt[6]; /* prv_CMAP3 */ /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; @@ -1585,6 +1611,26 @@ start_all_aps(u_int boot_addr) outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); + /* + * Set up the idle context for the BSP. Similar to above except + * that some was done by locore, some by pmap.c and some is implicit + * because the BSP is cpu#0 and the page is initially zero, and also + * because we can refer to variables by name on the BSP.. + */ + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ + IdlePTDS[0] = newptd; + + /* Point PTD[] to this page instead of IdlePTD's physical page */ + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); + + my_idlePTD = (pd_entry_t *)vtophys(newptd); + + /* Allocate and setup BSP idle stack */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + SMP_prvpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); + pmap_set_opt_bsp(); /* number of APs actually started */ @@ -1800,7 +1846,7 @@ invltlb(void) * from executing at same time. */ int -stop_cpus( u_int map ) +stop_cpus(u_int map) { if (!smp_active) return 0; @@ -1832,7 +1878,7 @@ stop_cpus( u_int map ) * 1: ok */ int -restart_cpus( u_int map ) +restart_cpus(u_int map) { if (!smp_active) return 0; @@ -1844,3 +1890,63 @@ restart_cpus( u_int map ) return 1; } + +int smp_active = 0; /* are the APs allowed to run? */ +SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, ""); + +/* XXX maybe should be hw.ncpu */ +int smp_cpus = 1; /* how many cpu's running */ +SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, ""); + +int invltlb_ok = 0; /* throttle smp_invltlb() till safe */ +SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, ""); + +int do_page_zero_idle = 0; /* bzero pages for fun and profit in idleloop */ +SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW, + &do_page_zero_idle, 0, ""); + + +/* + * This is called once the rest of the system is up and running and we're + * ready to let the AP's out of the pen. + */ +void ap_init(void); + +void +ap_init() +{ + u_int temp; + u_int apic_id; + + smp_cpus++; + + /* Build our map of 'other' CPUs. */ + other_cpus = all_cpus & ~(1 << cpuid); + + printf("SMP: AP CPU #%d Launched!\n", cpuid); + + /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */ + load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS); + + /* A quick check from sanity claus */ + apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); + if (cpuid != apic_id) { + printf("SMP: cpuid = %d\n", cpuid); + printf("SMP: apic_id = %d\n", apic_id); + printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]); + panic("cpuid mismatch! boom!!"); + } + + /* Init local apic for irq's */ + apic_initialize(); + + /* + * Activate smp_invltlb, although strictly speaking, this isn't + * quite correct yet. We should have a bitfield for cpus willing + * to accept TLB flush IPI's or something and sync them. + */ + invltlb_ok = 1; + smp_active = 1; /* historic */ + + curproc = NULL; /* make sure */ +} diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S index 8a6af0a..9d51b1f 100644 --- a/sys/amd64/amd64/mpboot.S +++ b/sys/amd64/amd64/mpboot.S @@ -31,10 +31,9 @@ * mpboot.s: FreeBSD machine support for the Intel MP Spec * multiprocessor systems. * - * $Id: mpboot.s,v 1.2 1997/06/22 16:03:22 peter Exp $ + * $Id: mpboot.s,v 1.3 1997/08/25 10:57:36 peter Exp $ */ - #include <machine/asmacros.h> /* miscellaneous asm macros */ #include <machine/apic.h> #include <machine/specialreg.h> @@ -74,15 +73,13 @@ NON_GPROF_ENTRY(MPentry) CHECKPOINT(0x36, 3) - movl $mp_stk-KERNBASE,%esp /* mp boot stack end loc. */ /* Now enable paging mode */ movl _bootPTD-KERNBASE, %eax movl %eax,%cr3 - movl %cr0,%eax + movl %cr0,%eax orl $CR0_PE|CR0_PG,%eax /* enable paging */ - movl $0x80000011,%eax movl %eax,%cr0 /* let the games begin! */ - movl $mp_stk,%esp /* mp boot stack end loc. */ + movl $_idlestack_top,%esp /* boot stack end loc. */ pushl $mp_begin /* jump to high mem */ ret @@ -105,30 +102,16 @@ mp_begin: /* now running relocated at KERNBASE */ movl %eax, _cpu_apic_versions /* into [ 0 ] */ incl _mp_ncpus /* signal BSP */ - /* One at a time, we are running on the shared mp_stk */ - /* This is the Intel reccomended semaphore method */ -#define BL_SET 0xff -#define BL_CLR 0x00 - movb $BL_SET, %al -1: - xchgb %al, bootlock /* xchg is implicitly locked */ - cmpb $BL_SET, %al /* was is set? */ - jz 1b /* yes, keep trying... */ CHECKPOINT(0x39, 6) - /* Now, let's do some REAL WORK :-) */ - call _secondary_main -/* NOT REACHED */ -2: hlt - jmp 2b + /* wait till we can get into the kernel */ + call _boot_get_mplock -/* - * Let a CPU past the semaphore so it can use mp_stk - */ -ENTRY(boot_unlock) - movb $BL_CLR, %al - xchgb %al, bootlock /* xchg is implicitly locked */ - ret + /* Now, let's prepare for some REAL WORK :-) */ + call _ap_init + + /* let her rip! (loads new stack) */ + jmp _cpu_switch /* * This is the embedded trampoline or bootstrap that is @@ -300,17 +283,3 @@ BOOTMP2: .globl _bootMP_size _bootMP_size: .long BOOTMP2 - BOOTMP1 - - /* - * Temporary stack used while booting AP's - * It is protected by: - * 1: only one cpu is started at a time and it ends up waiting - * for smp_active before continuing. - * 2: Once smp_active != 0; further access is limited by _bootlock. - */ - .globl mp_stk - .space 0x2000 /* space for mp_stk - 2nd temporary stack */ -mp_stk: - - .globl bootlock -bootlock: .byte BL_SET diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c index f647070..a4a6423 100644 --- a/sys/amd64/amd64/mptable.c +++ b/sys/amd64/amd64/mptable.c @@ -22,17 +22,20 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.44 1997/08/24 20:33:32 fsmp Exp $ + * $Id: mp_machdep.c,v 1.45 1997/08/25 21:28:08 bde Exp $ */ #include "opt_smp.h" #include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/sysctl.h> -#include <vm/vm.h> /* for KERNBASE */ -#include <vm/vm_param.h> /* for KERNBASE */ -#include <vm/pmap.h> /* for KERNBASE */ +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> @@ -230,15 +233,24 @@ int apic_id_to_logical[NAPICID]; /* Bitmap of all available CPUs */ u_int all_cpus; -/* Boot of AP uses this PTD */ -u_int *bootPTD; +/* AP uses this PTD during bootstrap */ +pd_entry_t *bootPTD; /* Hotwire a 0->4MB V==P mapping */ -extern pt_entry_t KPTphys; +extern pt_entry_t *KPTphys; /* Virtual address of per-cpu common_tss */ extern struct i386tss common_tss; +/* IdlePTD per cpu */ +pd_entry_t *IdlePTDS[NCPU]; + +/* "my" private page table page, for BSP init */ +extern pt_entry_t SMP_prvpt[]; + +/* Private page pointer to curcpu's PTD, used during BSP init */ +extern pd_entry_t *my_idlePTD; + /* * Local data and functions. */ @@ -1473,9 +1485,9 @@ start_all_aps(u_int boot_addr) int x, i; u_char mpbiosreason; u_long mpbioswarmvec; - pd_entry_t newptd; - pt_entry_t newpt; - int *newpp; + pd_entry_t *newptd; + pt_entry_t *newpt; + int *newpp, *stack; POSTCODE(START_ALL_APS_POST); @@ -1498,57 +1510,71 @@ start_all_aps(u_int boot_addr) /* start each AP */ for (x = 1; x <= mp_naps; ++x) { - /* HACK HACK HACK !!! */ + /* This is a bit verbose, it will go away soon. */ /* alloc new page table directory */ - newptd = (pd_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + /* Store the virtual PTD address for this CPU */ + IdlePTDS[x] = newptd; /* clone currently active one (ie: IdlePTD) */ bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ /* set up 0 -> 4MB P==V mapping for AP boot */ - newptd[0] = PG_V | PG_RW | ((u_long)KPTphys & PG_FRAME); + newptd[0] = (pd_entry_t) (PG_V | PG_RW | + ((u_long)KPTphys & PG_FRAME)); - /* store PTD for this AP */ - bootPTD = (pd_entry_t)vtophys(newptd); + /* store PTD for this AP's boot sequence */ + bootPTD = (pd_entry_t *)vtophys(newptd); /* alloc new page table page */ - newpt = (pt_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); /* set the new PTD's private page to point there */ - newptd[MPPTDI] = PG_V | PG_RW | vtophys(newpt); + newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); /* install self referential entry */ - newptd[PTDPTDI] = PG_V | PG_RW | vtophys(newptd); + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); - /* get a new private data page */ + /* allocate a new private data page */ newpp = (int *)kmem_alloc(kernel_map, PAGE_SIZE); /* wire it into the private page table page */ - newpt[0] = PG_V | PG_RW | vtophys(newpp); + newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpp)); /* wire the ptp into itself for access */ - newpt[1] = PG_V | PG_RW | vtophys(newpt); + newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); - /* and the local apic */ + /* copy in the pointer to the local apic */ newpt[2] = SMP_prvpt[2]; /* and the IO apic mapping[s] */ for (i = 16; i < 32; i++) newpt[i] = SMP_prvpt[i]; - /* prime data page for it to use */ - newpp[0] = x; /* cpuid */ - newpp[1] = 0; /* curproc */ - newpp[2] = 0; /* curpcb */ - newpp[3] = 0; /* npxproc */ - newpp[4] = 0; /* runtime.tv_sec */ - newpp[5] = 0; /* runtime.tv_usec */ - newpp[6] = x << 24; /* cpu_lockid */ + /* allocate and set up an idle stack data page */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + newpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); - /* XXX NOTE: ABANDON bootPTD for now!!!! */ + newpt[4] = 0; /* *prv_CMAP1 */ + newpt[5] = 0; /* *prv_CMAP2 */ + newpt[6] = 0; /* *prv_CMAP3 */ - /* END REVOLTING HACKERY */ + /* prime data page for it to use */ + newpp[0] = x; /* cpuid */ + newpp[1] = 0; /* curproc */ + newpp[2] = 0; /* curpcb */ + newpp[3] = 0; /* npxproc */ + newpp[4] = 0; /* runtime.tv_sec */ + newpp[5] = 0; /* runtime.tv_usec */ + newpp[6] = x << 24; /* cpu_lockid */ + newpp[7] = 0; /* other_cpus */ + newpp[8] = (int)bootPTD; /* my_idlePTD */ + newpp[9] = 0; /* ss_tpr */ + newpp[10] = (int)&newpt[4]; /* prv_CMAP1 */ + newpp[11] = (int)&newpt[5]; /* prv_CMAP2 */ + newpp[12] = (int)&newpt[6]; /* prv_CMAP3 */ /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; @@ -1585,6 +1611,26 @@ start_all_aps(u_int boot_addr) outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); + /* + * Set up the idle context for the BSP. Similar to above except + * that some was done by locore, some by pmap.c and some is implicit + * because the BSP is cpu#0 and the page is initially zero, and also + * because we can refer to variables by name on the BSP.. + */ + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ + IdlePTDS[0] = newptd; + + /* Point PTD[] to this page instead of IdlePTD's physical page */ + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); + + my_idlePTD = (pd_entry_t *)vtophys(newptd); + + /* Allocate and setup BSP idle stack */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + SMP_prvpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); + pmap_set_opt_bsp(); /* number of APs actually started */ @@ -1800,7 +1846,7 @@ invltlb(void) * from executing at same time. */ int -stop_cpus( u_int map ) +stop_cpus(u_int map) { if (!smp_active) return 0; @@ -1832,7 +1878,7 @@ stop_cpus( u_int map ) * 1: ok */ int -restart_cpus( u_int map ) +restart_cpus(u_int map) { if (!smp_active) return 0; @@ -1844,3 +1890,63 @@ restart_cpus( u_int map ) return 1; } + +int smp_active = 0; /* are the APs allowed to run? */ +SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, ""); + +/* XXX maybe should be hw.ncpu */ +int smp_cpus = 1; /* how many cpu's running */ +SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, ""); + +int invltlb_ok = 0; /* throttle smp_invltlb() till safe */ +SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, ""); + +int do_page_zero_idle = 0; /* bzero pages for fun and profit in idleloop */ +SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW, + &do_page_zero_idle, 0, ""); + + +/* + * This is called once the rest of the system is up and running and we're + * ready to let the AP's out of the pen. + */ +void ap_init(void); + +void +ap_init() +{ + u_int temp; + u_int apic_id; + + smp_cpus++; + + /* Build our map of 'other' CPUs. */ + other_cpus = all_cpus & ~(1 << cpuid); + + printf("SMP: AP CPU #%d Launched!\n", cpuid); + + /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */ + load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS); + + /* A quick check from sanity claus */ + apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); + if (cpuid != apic_id) { + printf("SMP: cpuid = %d\n", cpuid); + printf("SMP: apic_id = %d\n", apic_id); + printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]); + panic("cpuid mismatch! boom!!"); + } + + /* Init local apic for irq's */ + apic_initialize(); + + /* + * Activate smp_invltlb, although strictly speaking, this isn't + * quite correct yet. We should have a bitfield for cpus willing + * to accept TLB flush IPI's or something and sync them. + */ + invltlb_ok = 1; + smp_active = 1; /* historic */ + + curproc = NULL; /* make sure */ +} diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 3c43184..2b069f7 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id: pmap.c,v 1.158 1997/08/07 05:15:48 dyson Exp $ + * $Id: pmap.c,v 1.159 1997/08/25 21:53:01 bde Exp $ */ /* @@ -183,6 +183,13 @@ static caddr_t CADDR2; static pt_entry_t *msgbufmap; struct msgbuf *msgbufp=0; +#ifdef SMP +extern char prv_CPAGE1[], prv_CPAGE2[], prv_CPAGE3[]; +extern pt_entry_t *prv_CMAP1, *prv_CMAP2, *prv_CMAP3; +extern pd_entry_t *IdlePTDS[]; +extern pt_entry_t SMP_prvpt[]; +#endif + pt_entry_t *PMAP1 = 0; unsigned *PADDR1 = 0; @@ -408,7 +415,7 @@ pmap_bootstrap(firstaddr, loadaddr) /* 1 = page table page */ /* 2 = local apic */ /* 16-31 = io apics */ - SMP_prvpt[2] = PG_V | PG_RW | pgeflag | ((u_long)cpu_apic_address & PG_FRAME); + SMP_prvpt[2] = (pt_entry_t)(PG_V | PG_RW | pgeflag | ((u_long)cpu_apic_address & PG_FRAME)); for (i = 0; i < mp_napics; i++) { for (j = 0; j < 16; j++) { @@ -420,8 +427,8 @@ pmap_bootstrap(firstaddr, loadaddr) } /* use this slot if available */ if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) == 0) { - SMP_prvpt[j + 16] = PG_V | PG_RW | pgeflag | - ((u_long)io_apic_address[i] & PG_FRAME); + SMP_prvpt[j + 16] = (pt_entry_t)(PG_V | PG_RW | pgeflag | + ((u_long)io_apic_address[i] & PG_FRAME)); ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE]; break; } @@ -429,6 +436,11 @@ pmap_bootstrap(firstaddr, loadaddr) if (j == 16) panic("no space to map IO apic %d!", i); } + + /* BSP does this itself, AP's get it pre-set */ + prv_CMAP1 = (pt_entry_t *)&SMP_prvpt[4]; + prv_CMAP2 = (pt_entry_t *)&SMP_prvpt[5]; + prv_CMAP3 = (pt_entry_t *)&SMP_prvpt[6]; #endif invltlb(); @@ -463,7 +475,8 @@ pmap_set_opt(unsigned *pdir) { * Setup the PTD for the boot processor */ void -pmap_set_opt_bsp(void) { +pmap_set_opt_bsp(void) +{ pmap_set_opt((unsigned *)kernel_pmap->pm_pdir); pmap_set_opt((unsigned *)PTD); invltlb(); @@ -1414,6 +1427,9 @@ pmap_growkernel(vm_offset_t addr) struct proc *p; struct pmap *pmap; int s; +#ifdef SMP + int i; +#endif s = splhigh(); if (kernel_vm_end == 0) { @@ -1446,6 +1462,14 @@ pmap_growkernel(vm_offset_t addr) pmap_zero_page(VM_PAGE_TO_PHYS(nkpg)); } pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_RW | pgeflag); + +#ifdef SMP + for (i = 0; i < mp_naps; i++) { + if (IdlePTDS[i]) + pdir_pde(IdlePTDS[i], kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_RW | pgeflag); + } +#endif + nkpg = NULL; for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { @@ -2591,6 +2615,18 @@ void pmap_zero_page(phys) vm_offset_t phys; { +#ifdef SMP + if (*(int *) prv_CMAP3) + panic("pmap_zero_page: prv_CMAP3 busy"); + + *(int *) prv_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME); + invltlb_1pg((vm_offset_t) &prv_CPAGE3); + + bzero(&prv_CPAGE3, PAGE_SIZE); + + *(int *) prv_CMAP3 = 0; + invltlb_1pg((vm_offset_t) &prv_CPAGE3); +#else if (*(int *) CMAP2) panic("pmap_zero_page: CMAP busy"); @@ -2598,6 +2634,7 @@ pmap_zero_page(phys) bzero(CADDR2, PAGE_SIZE); *(int *) CMAP2 = 0; invltlb_1pg((vm_offset_t) CADDR2); +#endif } /* @@ -2611,6 +2648,23 @@ pmap_copy_page(src, dst) vm_offset_t src; vm_offset_t dst; { +#ifdef SMP + if (*(int *) prv_CMAP1) + panic("pmap_copy_page: prv_CMAP1 busy"); + if (*(int *) prv_CMAP2) + panic("pmap_copy_page: prv_CMAP2 busy"); + + *(int *) prv_CMAP1 = PG_V | PG_RW | (src & PG_FRAME); + *(int *) prv_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME); + + invltlb_2pg( (vm_offset_t) &prv_CPAGE1, (vm_offset_t) &prv_CPAGE2); + + bcopy(&prv_CPAGE1, &prv_CPAGE2, PAGE_SIZE); + + *(int *) prv_CMAP1 = 0; + *(int *) prv_CMAP2 = 0; + invltlb_2pg( (vm_offset_t) &prv_CPAGE1, (vm_offset_t) &prv_CPAGE2); +#else if (*(int *) CMAP1 || *(int *) CMAP2) panic("pmap_copy_page: CMAP busy"); @@ -2622,6 +2676,7 @@ pmap_copy_page(src, dst) *(int *) CMAP1 = 0; *(int *) CMAP2 = 0; invltlb_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2); +#endif } diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s index 61b1dfa..f749f03 100644 --- a/sys/amd64/amd64/swtch.s +++ b/sys/amd64/amd64/swtch.s @@ -33,7 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: swtch.s,v 1.58 1997/08/04 17:31:43 fsmp Exp $ + * $Id: swtch.s,v 1.59 1997/08/09 00:02:47 dyson Exp $ */ #include "npx.h" @@ -240,17 +240,94 @@ rem3id: .asciz "remrq.id" /* * When no processes are on the runq, cpu_switch() branches to _idle * to wait for something to come ready. - * - * NOTE: on an SMP system this routine is a startup-only code path. - * once initialization is over, meaning the idle procs have been - * created, we should NEVER branch here. */ ALIGN_TEXT _idle: -#if defined(SMP) && defined(DIAGNOSTIC) - cmpl $0, _smp_active - jnz badsw3 -#endif /* SMP && DIAGNOSTIC */ +#ifdef SMP + /* when called, we have the mplock, intr disabled */ + + xorl %ebp,%ebp + + /* use our idleproc's "context" */ + movl _my_idlePTD,%ecx + movl %ecx,%cr3 + movl $_idlestack_top,%ecx + movl %ecx,%esp + + /* update common_tss.tss_esp0 pointer */ + movl $_common_tss, %eax + movl %ecx, TSS_ESP0(%eax) + + sti + + /* + * XXX callers of cpu_switch() do a bogus splclock(). Locking should + * be left to cpu_switch(). + */ + call _spl0 + + cli + + /* + * _REALLY_ free the lock, no matter how deep the prior nesting. + * We will recover the nesting on the way out when we have a new + * proc to load. + * + * XXX: we had damn well better be sure we had it before doing this! + */ + movl $FREE_LOCK, %eax + movl %eax, _mp_lock + + /* do NOT have lock, intrs disabled */ + .globl idle_loop +idle_loop: + + movl %cr3,%eax /* ouch! */ + movl %eax,%cr3 + + cmpl $0,_smp_active + jne 1f + cmpl $0,_cpuid + je 1f + jmp 2f + +1: cmpl $0,_whichrtqs /* real-time queue */ + jne 3f + cmpl $0,_whichqs /* normal queue */ + jne 3f + cmpl $0,_whichidqs /* 'idle' queue */ + jne 3f + + cmpl $0,_do_page_zero_idle + je 2f + /* XXX appears to cause panics */ + /* + * Inside zero_idle we enable interrupts and grab the mplock + * as needed. It needs to be careful about entry/exit mutexes. + */ + call _vm_page_zero_idle /* internal locking */ + testl %eax, %eax + jnz idle_loop +2: + + /* enable intrs for a halt */ + sti + call *_hlt_vector /* wait for interrupt */ + cli + jmp idle_loop + +3: + call _get_mplock + cmpl $0,_whichrtqs /* real-time queue */ + CROSSJUMP(jne, sw1a, je) + cmpl $0,_whichqs /* normal queue */ + CROSSJUMP(jne, nortqr, je) + cmpl $0,_whichidqs /* 'idle' queue */ + CROSSJUMP(jne, idqr, je) + call _rel_mplock + jmp idle_loop + +#else xorl %ebp,%ebp movl $HIDENAME(tmpstk),%esp movl _IdlePTD,%ecx @@ -302,6 +379,7 @@ idle_loop: sti call *_hlt_vector /* wait for interrupt */ jmp idle_loop +#endif CROSSJUMPTARGET(_idle) @@ -367,6 +445,17 @@ ENTRY(cpu_switch) /* save is done, now choose a new process or idle */ sw1: cli + +#ifdef SMP + /* Stop scheduling if smp_active goes zero and we are not BSP */ + cmpl $0,_smp_active + jne 1f + cmpl $0,_cpuid + je 1f + CROSSJUMP(je, _idle, jne) /* wind down */ +1: +#endif + sw1a: movl _whichrtqs,%edi /* pick next p. from rtqs */ testl %edi,%edi @@ -594,12 +683,6 @@ sw0_2: .asciz "cpu_switch: not SRUN" #endif #if defined(SMP) && defined(DIAGNOSTIC) -badsw3: - pushl $sw0_3 - call _panic - -sw0_3: .asciz "cpu_switch: went idle with smp_active" - badsw4: pushl $sw0_4 call _panic diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index ac139d5..5895361 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.106 1997/08/20 05:25:48 fsmp Exp $ + * $Id: trap.c,v 1.107 1997/08/21 06:32:39 charnier Exp $ */ /* @@ -725,6 +725,7 @@ trap_fatal(frame) ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP printf("cpuid = %d\n", cpuid); + printf("lapic.id = %d\n", lapic.id); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%x\n", eva); @@ -769,6 +770,9 @@ trap_fatal(frame) } else { printf("Idle\n"); } +#ifdef SMP + printf("mp_lock = %08x\n", mp_lock); +#endif printf("interrupt mask = "); if ((cpl & net_imask) == net_imask) printf("net "); @@ -824,6 +828,7 @@ dblfault_handler() printf("ebp = 0x%x\n", common_tss.tss_ebp); #ifdef SMP printf("cpuid = %d\n", cpuid); + printf("lapic.id = %d\n", lapic.id); #endif panic("double fault"); } diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 2db2844..0d2c672 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.84 1997/07/20 08:37:24 bde Exp $ + * $Id: vm_machdep.c,v 1.85 1997/08/09 00:02:56 dyson Exp $ */ #include "npx.h" @@ -927,6 +927,9 @@ vm_page_zero_idle() */ if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2) return (0); +#ifdef SMP + get_mplock(); +#endif s = splvm(); enable_intr(); m = vm_page_list_find(PQ_FREE, free_rover); @@ -934,7 +937,13 @@ vm_page_zero_idle() --(*vm_page_queues[m->queue].lcnt); TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq); splx(s); +#ifdef SMP + rel_mplock(); +#endif pmap_zero_page(VM_PAGE_TO_PHYS(m)); +#ifdef SMP + get_mplock(); +#endif (void)splvm(); m->queue = PQ_ZERO + m->pc; ++(*vm_page_queues[m->queue].lcnt); @@ -944,5 +953,8 @@ vm_page_zero_idle() } splx(s); disable_intr(); +#ifdef SMP + rel_mplock(); +#endif return (1); } diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h index f647070..a4a6423 100644 --- a/sys/amd64/include/mptable.h +++ b/sys/amd64/include/mptable.h @@ -22,17 +22,20 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.44 1997/08/24 20:33:32 fsmp Exp $ + * $Id: mp_machdep.c,v 1.45 1997/08/25 21:28:08 bde Exp $ */ #include "opt_smp.h" #include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/sysctl.h> -#include <vm/vm.h> /* for KERNBASE */ -#include <vm/vm_param.h> /* for KERNBASE */ -#include <vm/pmap.h> /* for KERNBASE */ +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> @@ -230,15 +233,24 @@ int apic_id_to_logical[NAPICID]; /* Bitmap of all available CPUs */ u_int all_cpus; -/* Boot of AP uses this PTD */ -u_int *bootPTD; +/* AP uses this PTD during bootstrap */ +pd_entry_t *bootPTD; /* Hotwire a 0->4MB V==P mapping */ -extern pt_entry_t KPTphys; +extern pt_entry_t *KPTphys; /* Virtual address of per-cpu common_tss */ extern struct i386tss common_tss; +/* IdlePTD per cpu */ +pd_entry_t *IdlePTDS[NCPU]; + +/* "my" private page table page, for BSP init */ +extern pt_entry_t SMP_prvpt[]; + +/* Private page pointer to curcpu's PTD, used during BSP init */ +extern pd_entry_t *my_idlePTD; + /* * Local data and functions. */ @@ -1473,9 +1485,9 @@ start_all_aps(u_int boot_addr) int x, i; u_char mpbiosreason; u_long mpbioswarmvec; - pd_entry_t newptd; - pt_entry_t newpt; - int *newpp; + pd_entry_t *newptd; + pt_entry_t *newpt; + int *newpp, *stack; POSTCODE(START_ALL_APS_POST); @@ -1498,57 +1510,71 @@ start_all_aps(u_int boot_addr) /* start each AP */ for (x = 1; x <= mp_naps; ++x) { - /* HACK HACK HACK !!! */ + /* This is a bit verbose, it will go away soon. */ /* alloc new page table directory */ - newptd = (pd_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + /* Store the virtual PTD address for this CPU */ + IdlePTDS[x] = newptd; /* clone currently active one (ie: IdlePTD) */ bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ /* set up 0 -> 4MB P==V mapping for AP boot */ - newptd[0] = PG_V | PG_RW | ((u_long)KPTphys & PG_FRAME); + newptd[0] = (pd_entry_t) (PG_V | PG_RW | + ((u_long)KPTphys & PG_FRAME)); - /* store PTD for this AP */ - bootPTD = (pd_entry_t)vtophys(newptd); + /* store PTD for this AP's boot sequence */ + bootPTD = (pd_entry_t *)vtophys(newptd); /* alloc new page table page */ - newpt = (pt_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); /* set the new PTD's private page to point there */ - newptd[MPPTDI] = PG_V | PG_RW | vtophys(newpt); + newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); /* install self referential entry */ - newptd[PTDPTDI] = PG_V | PG_RW | vtophys(newptd); + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); - /* get a new private data page */ + /* allocate a new private data page */ newpp = (int *)kmem_alloc(kernel_map, PAGE_SIZE); /* wire it into the private page table page */ - newpt[0] = PG_V | PG_RW | vtophys(newpp); + newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpp)); /* wire the ptp into itself for access */ - newpt[1] = PG_V | PG_RW | vtophys(newpt); + newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); - /* and the local apic */ + /* copy in the pointer to the local apic */ newpt[2] = SMP_prvpt[2]; /* and the IO apic mapping[s] */ for (i = 16; i < 32; i++) newpt[i] = SMP_prvpt[i]; - /* prime data page for it to use */ - newpp[0] = x; /* cpuid */ - newpp[1] = 0; /* curproc */ - newpp[2] = 0; /* curpcb */ - newpp[3] = 0; /* npxproc */ - newpp[4] = 0; /* runtime.tv_sec */ - newpp[5] = 0; /* runtime.tv_usec */ - newpp[6] = x << 24; /* cpu_lockid */ + /* allocate and set up an idle stack data page */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + newpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); - /* XXX NOTE: ABANDON bootPTD for now!!!! */ + newpt[4] = 0; /* *prv_CMAP1 */ + newpt[5] = 0; /* *prv_CMAP2 */ + newpt[6] = 0; /* *prv_CMAP3 */ - /* END REVOLTING HACKERY */ + /* prime data page for it to use */ + newpp[0] = x; /* cpuid */ + newpp[1] = 0; /* curproc */ + newpp[2] = 0; /* curpcb */ + newpp[3] = 0; /* npxproc */ + newpp[4] = 0; /* runtime.tv_sec */ + newpp[5] = 0; /* runtime.tv_usec */ + newpp[6] = x << 24; /* cpu_lockid */ + newpp[7] = 0; /* other_cpus */ + newpp[8] = (int)bootPTD; /* my_idlePTD */ + newpp[9] = 0; /* ss_tpr */ + newpp[10] = (int)&newpt[4]; /* prv_CMAP1 */ + newpp[11] = (int)&newpt[5]; /* prv_CMAP2 */ + newpp[12] = (int)&newpt[6]; /* prv_CMAP3 */ /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; @@ -1585,6 +1611,26 @@ start_all_aps(u_int boot_addr) outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); + /* + * Set up the idle context for the BSP. Similar to above except + * that some was done by locore, some by pmap.c and some is implicit + * because the BSP is cpu#0 and the page is initially zero, and also + * because we can refer to variables by name on the BSP.. + */ + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ + IdlePTDS[0] = newptd; + + /* Point PTD[] to this page instead of IdlePTD's physical page */ + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); + + my_idlePTD = (pd_entry_t *)vtophys(newptd); + + /* Allocate and setup BSP idle stack */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + SMP_prvpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); + pmap_set_opt_bsp(); /* number of APs actually started */ @@ -1800,7 +1846,7 @@ invltlb(void) * from executing at same time. */ int -stop_cpus( u_int map ) +stop_cpus(u_int map) { if (!smp_active) return 0; @@ -1832,7 +1878,7 @@ stop_cpus( u_int map ) * 1: ok */ int -restart_cpus( u_int map ) +restart_cpus(u_int map) { if (!smp_active) return 0; @@ -1844,3 +1890,63 @@ restart_cpus( u_int map ) return 1; } + +int smp_active = 0; /* are the APs allowed to run? */ +SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, ""); + +/* XXX maybe should be hw.ncpu */ +int smp_cpus = 1; /* how many cpu's running */ +SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, ""); + +int invltlb_ok = 0; /* throttle smp_invltlb() till safe */ +SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, ""); + +int do_page_zero_idle = 0; /* bzero pages for fun and profit in idleloop */ +SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW, + &do_page_zero_idle, 0, ""); + + +/* + * This is called once the rest of the system is up and running and we're + * ready to let the AP's out of the pen. + */ +void ap_init(void); + +void +ap_init() +{ + u_int temp; + u_int apic_id; + + smp_cpus++; + + /* Build our map of 'other' CPUs. */ + other_cpus = all_cpus & ~(1 << cpuid); + + printf("SMP: AP CPU #%d Launched!\n", cpuid); + + /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */ + load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS); + + /* A quick check from sanity claus */ + apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); + if (cpuid != apic_id) { + printf("SMP: cpuid = %d\n", cpuid); + printf("SMP: apic_id = %d\n", apic_id); + printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]); + panic("cpuid mismatch! boom!!"); + } + + /* Init local apic for irq's */ + apic_initialize(); + + /* + * Activate smp_invltlb, although strictly speaking, this isn't + * quite correct yet. We should have a bitfield for cpus willing + * to accept TLB flush IPI's or something and sync them. + */ + invltlb_ok = 1; + smp_active = 1; /* historic */ + + curproc = NULL; /* make sure */ +} diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index f433b2f..276b838 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -6,7 +6,7 @@ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * - * $Id: smp.h,v 1.24 1997/08/24 20:17:05 smp Exp smp $ + * $Id: smp.h,v 1.29 1997/08/24 20:33:24 fsmp Exp $ * */ @@ -100,7 +100,6 @@ extern int cpu_num_to_apic_id[]; extern int io_num_to_apic_id[]; extern int apic_id_to_logical[]; extern u_int all_cpus; -extern u_int SMP_prvpt[]; extern u_char SMP_ioapic[]; /* functions in mp_machdep.c */ diff --git a/sys/conf/files b/sys/conf/files index 65d28d9..a414190 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -52,7 +52,6 @@ kern/imgact_gzip.c optional gzip kern/imgact_shell.c standard kern/inflate.c optional gzip kern/init_main.c standard -kern/init_smp.c optional smp kern/init_sysent.c standard kern/init_sysvec.c standard kern/kern_intr.c standard diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s index 23007033..a3df36f 100644 --- a/sys/i386/i386/locore.s +++ b/sys/i386/i386/locore.s @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 - * $Id: locore.s,v 1.3 1997/08/23 04:10:36 smp Exp smp $ + * $Id: locore.s,v 1.95 1997/08/24 00:05:33 fsmp Exp $ * * originally from: locore.s, by William F. Jolitz * @@ -90,24 +90,36 @@ .set _SMP_prvstart,(MPPTDI << PDRSHIFT) .globl _SMP_prvpage,_SMP_prvpt,_lapic,_SMP_ioapic + .globl _prv_CPAGE1,_prv_CPAGE2,_prv_CPAGE3 + .globl _idlestack,_idlestack_top .set _SMP_prvpage,_SMP_prvstart .set _SMP_prvpt,_SMP_prvstart + PAGE_SIZE .set _lapic,_SMP_prvstart + (2 * PAGE_SIZE) + .set _idlestack,_SMP_prvstart + (3 * PAGE_SIZE) + .set _idlestack_top,_SMP_prvstart + (4 * PAGE_SIZE) + .set _prv_CPAGE1,_SMP_prvstart + (4 * PAGE_SIZE) + .set _prv_CPAGE2,_SMP_prvstart + (5 * PAGE_SIZE) + .set _prv_CPAGE3,_SMP_prvstart + (6 * PAGE_SIZE) .set _SMP_ioapic,_SMP_prvstart + (16 * PAGE_SIZE) .globl _cpuid,_curproc,_curpcb,_npxproc,_runtime,_cpu_lockid - .globl _common_tss,_other_cpus,_ss_tpr - .set _cpuid,_SMP_prvpage+0 - .set _curproc,_SMP_prvpage+4 - .set _curpcb,_SMP_prvpage+8 - .set _npxproc,_SMP_prvpage+12 - .set _runtime,_SMP_prvpage+16 /* 8 bytes struct timeval */ - .set _cpu_lockid,_SMP_prvpage+24 - .set _common_tss,_SMP_prvpage+28 /* 104 bytes long, next = 132 */ - .set _other_cpus,_SMP_prvpage+132 /* bitmap of available CPUs, + .globl _common_tss,_other_cpus,_my_idlePTD,_ss_tpr + .globl _prv_CMAP1,_prv_CMAP2,_prv_CMAP3 + .set _cpuid,_SMP_prvpage+0 /* [0] */ + .set _curproc,_SMP_prvpage+4 /* [1] */ + .set _curpcb,_SMP_prvpage+8 /* [2] */ + .set _npxproc,_SMP_prvpage+12 /* [3] */ + .set _runtime,_SMP_prvpage+16 /* [4,5] */ + .set _cpu_lockid,_SMP_prvpage+24 /* [6] */ + .set _other_cpus,_SMP_prvpage+28 /* [7] bitmap of available CPUs, excluding ourself */ - .set _ss_tpr,_SMP_prvpage+136 - + .set _my_idlePTD,_SMP_prvpage+32 /* [8] */ + .set _ss_tpr,_SMP_prvpage+36 /* [9] */ + .set _prv_CMAP1,_SMP_prvpage+40 /* [10] */ + .set _prv_CMAP2,_SMP_prvpage+44 /* [11] */ + .set _prv_CMAP3,_SMP_prvpage+48 /* [12] */ + .set _common_tss,_SMP_prvpage+52 /* 102 (ie: 104) bytes long */ + /* Fetch the .set's for the local apic */ #include "i386/i386/mp_apicdefs.s" diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 7376267..a11153a 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.256 1997/08/18 06:58:09 charnier Exp $ + * $Id: machdep.c,v 1.257 1997/08/21 06:32:38 charnier Exp $ */ #include "apm.h" @@ -392,6 +392,12 @@ again: printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1024); + /* + * Set up buffers, so they can be used to read disk labels. + */ + bufinit(); + vm_pager_bufferinit(); + #ifdef SMP /* * OK, enough kmem_alloc/malloc state should be up, lets get on with it! @@ -399,12 +405,6 @@ again: mp_start(); /* fire up the APs and APICs */ mp_announce(); #endif /* SMP */ - - /* - * Set up buffers, so they can be used to read disk labels. - */ - bufinit(); - vm_pager_bufferinit(); } int diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index f647070..a4a6423 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -22,17 +22,20 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.44 1997/08/24 20:33:32 fsmp Exp $ + * $Id: mp_machdep.c,v 1.45 1997/08/25 21:28:08 bde Exp $ */ #include "opt_smp.h" #include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/sysctl.h> -#include <vm/vm.h> /* for KERNBASE */ -#include <vm/vm_param.h> /* for KERNBASE */ -#include <vm/pmap.h> /* for KERNBASE */ +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> @@ -230,15 +233,24 @@ int apic_id_to_logical[NAPICID]; /* Bitmap of all available CPUs */ u_int all_cpus; -/* Boot of AP uses this PTD */ -u_int *bootPTD; +/* AP uses this PTD during bootstrap */ +pd_entry_t *bootPTD; /* Hotwire a 0->4MB V==P mapping */ -extern pt_entry_t KPTphys; +extern pt_entry_t *KPTphys; /* Virtual address of per-cpu common_tss */ extern struct i386tss common_tss; +/* IdlePTD per cpu */ +pd_entry_t *IdlePTDS[NCPU]; + +/* "my" private page table page, for BSP init */ +extern pt_entry_t SMP_prvpt[]; + +/* Private page pointer to curcpu's PTD, used during BSP init */ +extern pd_entry_t *my_idlePTD; + /* * Local data and functions. */ @@ -1473,9 +1485,9 @@ start_all_aps(u_int boot_addr) int x, i; u_char mpbiosreason; u_long mpbioswarmvec; - pd_entry_t newptd; - pt_entry_t newpt; - int *newpp; + pd_entry_t *newptd; + pt_entry_t *newpt; + int *newpp, *stack; POSTCODE(START_ALL_APS_POST); @@ -1498,57 +1510,71 @@ start_all_aps(u_int boot_addr) /* start each AP */ for (x = 1; x <= mp_naps; ++x) { - /* HACK HACK HACK !!! */ + /* This is a bit verbose, it will go away soon. */ /* alloc new page table directory */ - newptd = (pd_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + /* Store the virtual PTD address for this CPU */ + IdlePTDS[x] = newptd; /* clone currently active one (ie: IdlePTD) */ bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ /* set up 0 -> 4MB P==V mapping for AP boot */ - newptd[0] = PG_V | PG_RW | ((u_long)KPTphys & PG_FRAME); + newptd[0] = (pd_entry_t) (PG_V | PG_RW | + ((u_long)KPTphys & PG_FRAME)); - /* store PTD for this AP */ - bootPTD = (pd_entry_t)vtophys(newptd); + /* store PTD for this AP's boot sequence */ + bootPTD = (pd_entry_t *)vtophys(newptd); /* alloc new page table page */ - newpt = (pt_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); /* set the new PTD's private page to point there */ - newptd[MPPTDI] = PG_V | PG_RW | vtophys(newpt); + newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); /* install self referential entry */ - newptd[PTDPTDI] = PG_V | PG_RW | vtophys(newptd); + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); - /* get a new private data page */ + /* allocate a new private data page */ newpp = (int *)kmem_alloc(kernel_map, PAGE_SIZE); /* wire it into the private page table page */ - newpt[0] = PG_V | PG_RW | vtophys(newpp); + newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpp)); /* wire the ptp into itself for access */ - newpt[1] = PG_V | PG_RW | vtophys(newpt); + newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); - /* and the local apic */ + /* copy in the pointer to the local apic */ newpt[2] = SMP_prvpt[2]; /* and the IO apic mapping[s] */ for (i = 16; i < 32; i++) newpt[i] = SMP_prvpt[i]; - /* prime data page for it to use */ - newpp[0] = x; /* cpuid */ - newpp[1] = 0; /* curproc */ - newpp[2] = 0; /* curpcb */ - newpp[3] = 0; /* npxproc */ - newpp[4] = 0; /* runtime.tv_sec */ - newpp[5] = 0; /* runtime.tv_usec */ - newpp[6] = x << 24; /* cpu_lockid */ + /* allocate and set up an idle stack data page */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + newpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); - /* XXX NOTE: ABANDON bootPTD for now!!!! */ + newpt[4] = 0; /* *prv_CMAP1 */ + newpt[5] = 0; /* *prv_CMAP2 */ + newpt[6] = 0; /* *prv_CMAP3 */ - /* END REVOLTING HACKERY */ + /* prime data page for it to use */ + newpp[0] = x; /* cpuid */ + newpp[1] = 0; /* curproc */ + newpp[2] = 0; /* curpcb */ + newpp[3] = 0; /* npxproc */ + newpp[4] = 0; /* runtime.tv_sec */ + newpp[5] = 0; /* runtime.tv_usec */ + newpp[6] = x << 24; /* cpu_lockid */ + newpp[7] = 0; /* other_cpus */ + newpp[8] = (int)bootPTD; /* my_idlePTD */ + newpp[9] = 0; /* ss_tpr */ + newpp[10] = (int)&newpt[4]; /* prv_CMAP1 */ + newpp[11] = (int)&newpt[5]; /* prv_CMAP2 */ + newpp[12] = (int)&newpt[6]; /* prv_CMAP3 */ /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; @@ -1585,6 +1611,26 @@ start_all_aps(u_int boot_addr) outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); + /* + * Set up the idle context for the BSP. Similar to above except + * that some was done by locore, some by pmap.c and some is implicit + * because the BSP is cpu#0 and the page is initially zero, and also + * because we can refer to variables by name on the BSP.. + */ + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ + IdlePTDS[0] = newptd; + + /* Point PTD[] to this page instead of IdlePTD's physical page */ + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); + + my_idlePTD = (pd_entry_t *)vtophys(newptd); + + /* Allocate and setup BSP idle stack */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + SMP_prvpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); + pmap_set_opt_bsp(); /* number of APs actually started */ @@ -1800,7 +1846,7 @@ invltlb(void) * from executing at same time. */ int -stop_cpus( u_int map ) +stop_cpus(u_int map) { if (!smp_active) return 0; @@ -1832,7 +1878,7 @@ stop_cpus( u_int map ) * 1: ok */ int -restart_cpus( u_int map ) +restart_cpus(u_int map) { if (!smp_active) return 0; @@ -1844,3 +1890,63 @@ restart_cpus( u_int map ) return 1; } + +int smp_active = 0; /* are the APs allowed to run? */ +SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, ""); + +/* XXX maybe should be hw.ncpu */ +int smp_cpus = 1; /* how many cpu's running */ +SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, ""); + +int invltlb_ok = 0; /* throttle smp_invltlb() till safe */ +SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, ""); + +int do_page_zero_idle = 0; /* bzero pages for fun and profit in idleloop */ +SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW, + &do_page_zero_idle, 0, ""); + + +/* + * This is called once the rest of the system is up and running and we're + * ready to let the AP's out of the pen. + */ +void ap_init(void); + +void +ap_init() +{ + u_int temp; + u_int apic_id; + + smp_cpus++; + + /* Build our map of 'other' CPUs. */ + other_cpus = all_cpus & ~(1 << cpuid); + + printf("SMP: AP CPU #%d Launched!\n", cpuid); + + /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */ + load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS); + + /* A quick check from sanity claus */ + apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); + if (cpuid != apic_id) { + printf("SMP: cpuid = %d\n", cpuid); + printf("SMP: apic_id = %d\n", apic_id); + printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]); + panic("cpuid mismatch! boom!!"); + } + + /* Init local apic for irq's */ + apic_initialize(); + + /* + * Activate smp_invltlb, although strictly speaking, this isn't + * quite correct yet. We should have a bitfield for cpus willing + * to accept TLB flush IPI's or something and sync them. + */ + invltlb_ok = 1; + smp_active = 1; /* historic */ + + curproc = NULL; /* make sure */ +} diff --git a/sys/i386/i386/mpboot.s b/sys/i386/i386/mpboot.s index 8a6af0a..9d51b1f 100644 --- a/sys/i386/i386/mpboot.s +++ b/sys/i386/i386/mpboot.s @@ -31,10 +31,9 @@ * mpboot.s: FreeBSD machine support for the Intel MP Spec * multiprocessor systems. * - * $Id: mpboot.s,v 1.2 1997/06/22 16:03:22 peter Exp $ + * $Id: mpboot.s,v 1.3 1997/08/25 10:57:36 peter Exp $ */ - #include <machine/asmacros.h> /* miscellaneous asm macros */ #include <machine/apic.h> #include <machine/specialreg.h> @@ -74,15 +73,13 @@ NON_GPROF_ENTRY(MPentry) CHECKPOINT(0x36, 3) - movl $mp_stk-KERNBASE,%esp /* mp boot stack end loc. */ /* Now enable paging mode */ movl _bootPTD-KERNBASE, %eax movl %eax,%cr3 - movl %cr0,%eax + movl %cr0,%eax orl $CR0_PE|CR0_PG,%eax /* enable paging */ - movl $0x80000011,%eax movl %eax,%cr0 /* let the games begin! */ - movl $mp_stk,%esp /* mp boot stack end loc. */ + movl $_idlestack_top,%esp /* boot stack end loc. */ pushl $mp_begin /* jump to high mem */ ret @@ -105,30 +102,16 @@ mp_begin: /* now running relocated at KERNBASE */ movl %eax, _cpu_apic_versions /* into [ 0 ] */ incl _mp_ncpus /* signal BSP */ - /* One at a time, we are running on the shared mp_stk */ - /* This is the Intel reccomended semaphore method */ -#define BL_SET 0xff -#define BL_CLR 0x00 - movb $BL_SET, %al -1: - xchgb %al, bootlock /* xchg is implicitly locked */ - cmpb $BL_SET, %al /* was is set? */ - jz 1b /* yes, keep trying... */ CHECKPOINT(0x39, 6) - /* Now, let's do some REAL WORK :-) */ - call _secondary_main -/* NOT REACHED */ -2: hlt - jmp 2b + /* wait till we can get into the kernel */ + call _boot_get_mplock -/* - * Let a CPU past the semaphore so it can use mp_stk - */ -ENTRY(boot_unlock) - movb $BL_CLR, %al - xchgb %al, bootlock /* xchg is implicitly locked */ - ret + /* Now, let's prepare for some REAL WORK :-) */ + call _ap_init + + /* let her rip! (loads new stack) */ + jmp _cpu_switch /* * This is the embedded trampoline or bootstrap that is @@ -300,17 +283,3 @@ BOOTMP2: .globl _bootMP_size _bootMP_size: .long BOOTMP2 - BOOTMP1 - - /* - * Temporary stack used while booting AP's - * It is protected by: - * 1: only one cpu is started at a time and it ends up waiting - * for smp_active before continuing. - * 2: Once smp_active != 0; further access is limited by _bootlock. - */ - .globl mp_stk - .space 0x2000 /* space for mp_stk - 2nd temporary stack */ -mp_stk: - - .globl bootlock -bootlock: .byte BL_SET diff --git a/sys/i386/i386/mplock.s b/sys/i386/i386/mplock.s index 9a43cfb..415a904 100644 --- a/sys/i386/i386/mplock.s +++ b/sys/i386/i386/mplock.s @@ -6,7 +6,7 @@ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * - * $Id: mplock.s,v 1.19 1997/08/25 10:57:38 peter Exp $ + * $Id: mplock.s,v 1.20 1997/08/25 21:31:38 bde Exp $ * * Functions for locking between CPUs in a SMP system. * @@ -227,6 +227,26 @@ NON_GPROF_ENTRY(get_mplock) popl %eax ret +/* + * Special version of get_mplock that is used during bootstrap when we can't + * yet enable interrupts of any sort since the APIC isn't online yet. + * + * XXX FIXME.. - APIC should be online from the start to simplify IPI's. + */ +NON_GPROF_ENTRY(boot_get_mplock) + pushl %eax + pushl %ecx + pushl %edx + + pushl $_mp_lock + call _MPgetlock + add $4, %esp + + popl %edx + popl %ecx + popl %eax + ret + /*********************************************************************** * void try_mplock() * ----------------- diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c index f647070..a4a6423 100644 --- a/sys/i386/i386/mptable.c +++ b/sys/i386/i386/mptable.c @@ -22,17 +22,20 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.44 1997/08/24 20:33:32 fsmp Exp $ + * $Id: mp_machdep.c,v 1.45 1997/08/25 21:28:08 bde Exp $ */ #include "opt_smp.h" #include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/sysctl.h> -#include <vm/vm.h> /* for KERNBASE */ -#include <vm/vm_param.h> /* for KERNBASE */ -#include <vm/pmap.h> /* for KERNBASE */ +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> @@ -230,15 +233,24 @@ int apic_id_to_logical[NAPICID]; /* Bitmap of all available CPUs */ u_int all_cpus; -/* Boot of AP uses this PTD */ -u_int *bootPTD; +/* AP uses this PTD during bootstrap */ +pd_entry_t *bootPTD; /* Hotwire a 0->4MB V==P mapping */ -extern pt_entry_t KPTphys; +extern pt_entry_t *KPTphys; /* Virtual address of per-cpu common_tss */ extern struct i386tss common_tss; +/* IdlePTD per cpu */ +pd_entry_t *IdlePTDS[NCPU]; + +/* "my" private page table page, for BSP init */ +extern pt_entry_t SMP_prvpt[]; + +/* Private page pointer to curcpu's PTD, used during BSP init */ +extern pd_entry_t *my_idlePTD; + /* * Local data and functions. */ @@ -1473,9 +1485,9 @@ start_all_aps(u_int boot_addr) int x, i; u_char mpbiosreason; u_long mpbioswarmvec; - pd_entry_t newptd; - pt_entry_t newpt; - int *newpp; + pd_entry_t *newptd; + pt_entry_t *newpt; + int *newpp, *stack; POSTCODE(START_ALL_APS_POST); @@ -1498,57 +1510,71 @@ start_all_aps(u_int boot_addr) /* start each AP */ for (x = 1; x <= mp_naps; ++x) { - /* HACK HACK HACK !!! */ + /* This is a bit verbose, it will go away soon. */ /* alloc new page table directory */ - newptd = (pd_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + /* Store the virtual PTD address for this CPU */ + IdlePTDS[x] = newptd; /* clone currently active one (ie: IdlePTD) */ bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ /* set up 0 -> 4MB P==V mapping for AP boot */ - newptd[0] = PG_V | PG_RW | ((u_long)KPTphys & PG_FRAME); + newptd[0] = (pd_entry_t) (PG_V | PG_RW | + ((u_long)KPTphys & PG_FRAME)); - /* store PTD for this AP */ - bootPTD = (pd_entry_t)vtophys(newptd); + /* store PTD for this AP's boot sequence */ + bootPTD = (pd_entry_t *)vtophys(newptd); /* alloc new page table page */ - newpt = (pt_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); /* set the new PTD's private page to point there */ - newptd[MPPTDI] = PG_V | PG_RW | vtophys(newpt); + newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); /* install self referential entry */ - newptd[PTDPTDI] = PG_V | PG_RW | vtophys(newptd); + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); - /* get a new private data page */ + /* allocate a new private data page */ newpp = (int *)kmem_alloc(kernel_map, PAGE_SIZE); /* wire it into the private page table page */ - newpt[0] = PG_V | PG_RW | vtophys(newpp); + newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpp)); /* wire the ptp into itself for access */ - newpt[1] = PG_V | PG_RW | vtophys(newpt); + newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); - /* and the local apic */ + /* copy in the pointer to the local apic */ newpt[2] = SMP_prvpt[2]; /* and the IO apic mapping[s] */ for (i = 16; i < 32; i++) newpt[i] = SMP_prvpt[i]; - /* prime data page for it to use */ - newpp[0] = x; /* cpuid */ - newpp[1] = 0; /* curproc */ - newpp[2] = 0; /* curpcb */ - newpp[3] = 0; /* npxproc */ - newpp[4] = 0; /* runtime.tv_sec */ - newpp[5] = 0; /* runtime.tv_usec */ - newpp[6] = x << 24; /* cpu_lockid */ + /* allocate and set up an idle stack data page */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + newpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); - /* XXX NOTE: ABANDON bootPTD for now!!!! */ + newpt[4] = 0; /* *prv_CMAP1 */ + newpt[5] = 0; /* *prv_CMAP2 */ + newpt[6] = 0; /* *prv_CMAP3 */ - /* END REVOLTING HACKERY */ + /* prime data page for it to use */ + newpp[0] = x; /* cpuid */ + newpp[1] = 0; /* curproc */ + newpp[2] = 0; /* curpcb */ + newpp[3] = 0; /* npxproc */ + newpp[4] = 0; /* runtime.tv_sec */ + newpp[5] = 0; /* runtime.tv_usec */ + newpp[6] = x << 24; /* cpu_lockid */ + newpp[7] = 0; /* other_cpus */ + newpp[8] = (int)bootPTD; /* my_idlePTD */ + newpp[9] = 0; /* ss_tpr */ + newpp[10] = (int)&newpt[4]; /* prv_CMAP1 */ + newpp[11] = (int)&newpt[5]; /* prv_CMAP2 */ + newpp[12] = (int)&newpt[6]; /* prv_CMAP3 */ /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; @@ -1585,6 +1611,26 @@ start_all_aps(u_int boot_addr) outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); + /* + * Set up the idle context for the BSP. Similar to above except + * that some was done by locore, some by pmap.c and some is implicit + * because the BSP is cpu#0 and the page is initially zero, and also + * because we can refer to variables by name on the BSP.. + */ + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ + IdlePTDS[0] = newptd; + + /* Point PTD[] to this page instead of IdlePTD's physical page */ + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); + + my_idlePTD = (pd_entry_t *)vtophys(newptd); + + /* Allocate and setup BSP idle stack */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + SMP_prvpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); + pmap_set_opt_bsp(); /* number of APs actually started */ @@ -1800,7 +1846,7 @@ invltlb(void) * from executing at same time. */ int -stop_cpus( u_int map ) +stop_cpus(u_int map) { if (!smp_active) return 0; @@ -1832,7 +1878,7 @@ stop_cpus( u_int map ) * 1: ok */ int -restart_cpus( u_int map ) +restart_cpus(u_int map) { if (!smp_active) return 0; @@ -1844,3 +1890,63 @@ restart_cpus( u_int map ) return 1; } + +int smp_active = 0; /* are the APs allowed to run? */ +SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, ""); + +/* XXX maybe should be hw.ncpu */ +int smp_cpus = 1; /* how many cpu's running */ +SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, ""); + +int invltlb_ok = 0; /* throttle smp_invltlb() till safe */ +SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, ""); + +int do_page_zero_idle = 0; /* bzero pages for fun and profit in idleloop */ +SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW, + &do_page_zero_idle, 0, ""); + + +/* + * This is called once the rest of the system is up and running and we're + * ready to let the AP's out of the pen. + */ +void ap_init(void); + +void +ap_init() +{ + u_int temp; + u_int apic_id; + + smp_cpus++; + + /* Build our map of 'other' CPUs. */ + other_cpus = all_cpus & ~(1 << cpuid); + + printf("SMP: AP CPU #%d Launched!\n", cpuid); + + /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */ + load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS); + + /* A quick check from sanity claus */ + apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); + if (cpuid != apic_id) { + printf("SMP: cpuid = %d\n", cpuid); + printf("SMP: apic_id = %d\n", apic_id); + printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]); + panic("cpuid mismatch! boom!!"); + } + + /* Init local apic for irq's */ + apic_initialize(); + + /* + * Activate smp_invltlb, although strictly speaking, this isn't + * quite correct yet. We should have a bitfield for cpus willing + * to accept TLB flush IPI's or something and sync them. + */ + invltlb_ok = 1; + smp_active = 1; /* historic */ + + curproc = NULL; /* make sure */ +} diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 3c43184..2b069f7 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id: pmap.c,v 1.158 1997/08/07 05:15:48 dyson Exp $ + * $Id: pmap.c,v 1.159 1997/08/25 21:53:01 bde Exp $ */ /* @@ -183,6 +183,13 @@ static caddr_t CADDR2; static pt_entry_t *msgbufmap; struct msgbuf *msgbufp=0; +#ifdef SMP +extern char prv_CPAGE1[], prv_CPAGE2[], prv_CPAGE3[]; +extern pt_entry_t *prv_CMAP1, *prv_CMAP2, *prv_CMAP3; +extern pd_entry_t *IdlePTDS[]; +extern pt_entry_t SMP_prvpt[]; +#endif + pt_entry_t *PMAP1 = 0; unsigned *PADDR1 = 0; @@ -408,7 +415,7 @@ pmap_bootstrap(firstaddr, loadaddr) /* 1 = page table page */ /* 2 = local apic */ /* 16-31 = io apics */ - SMP_prvpt[2] = PG_V | PG_RW | pgeflag | ((u_long)cpu_apic_address & PG_FRAME); + SMP_prvpt[2] = (pt_entry_t)(PG_V | PG_RW | pgeflag | ((u_long)cpu_apic_address & PG_FRAME)); for (i = 0; i < mp_napics; i++) { for (j = 0; j < 16; j++) { @@ -420,8 +427,8 @@ pmap_bootstrap(firstaddr, loadaddr) } /* use this slot if available */ if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) == 0) { - SMP_prvpt[j + 16] = PG_V | PG_RW | pgeflag | - ((u_long)io_apic_address[i] & PG_FRAME); + SMP_prvpt[j + 16] = (pt_entry_t)(PG_V | PG_RW | pgeflag | + ((u_long)io_apic_address[i] & PG_FRAME)); ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE]; break; } @@ -429,6 +436,11 @@ pmap_bootstrap(firstaddr, loadaddr) if (j == 16) panic("no space to map IO apic %d!", i); } + + /* BSP does this itself, AP's get it pre-set */ + prv_CMAP1 = (pt_entry_t *)&SMP_prvpt[4]; + prv_CMAP2 = (pt_entry_t *)&SMP_prvpt[5]; + prv_CMAP3 = (pt_entry_t *)&SMP_prvpt[6]; #endif invltlb(); @@ -463,7 +475,8 @@ pmap_set_opt(unsigned *pdir) { * Setup the PTD for the boot processor */ void -pmap_set_opt_bsp(void) { +pmap_set_opt_bsp(void) +{ pmap_set_opt((unsigned *)kernel_pmap->pm_pdir); pmap_set_opt((unsigned *)PTD); invltlb(); @@ -1414,6 +1427,9 @@ pmap_growkernel(vm_offset_t addr) struct proc *p; struct pmap *pmap; int s; +#ifdef SMP + int i; +#endif s = splhigh(); if (kernel_vm_end == 0) { @@ -1446,6 +1462,14 @@ pmap_growkernel(vm_offset_t addr) pmap_zero_page(VM_PAGE_TO_PHYS(nkpg)); } pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_RW | pgeflag); + +#ifdef SMP + for (i = 0; i < mp_naps; i++) { + if (IdlePTDS[i]) + pdir_pde(IdlePTDS[i], kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_RW | pgeflag); + } +#endif + nkpg = NULL; for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { @@ -2591,6 +2615,18 @@ void pmap_zero_page(phys) vm_offset_t phys; { +#ifdef SMP + if (*(int *) prv_CMAP3) + panic("pmap_zero_page: prv_CMAP3 busy"); + + *(int *) prv_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME); + invltlb_1pg((vm_offset_t) &prv_CPAGE3); + + bzero(&prv_CPAGE3, PAGE_SIZE); + + *(int *) prv_CMAP3 = 0; + invltlb_1pg((vm_offset_t) &prv_CPAGE3); +#else if (*(int *) CMAP2) panic("pmap_zero_page: CMAP busy"); @@ -2598,6 +2634,7 @@ pmap_zero_page(phys) bzero(CADDR2, PAGE_SIZE); *(int *) CMAP2 = 0; invltlb_1pg((vm_offset_t) CADDR2); +#endif } /* @@ -2611,6 +2648,23 @@ pmap_copy_page(src, dst) vm_offset_t src; vm_offset_t dst; { +#ifdef SMP + if (*(int *) prv_CMAP1) + panic("pmap_copy_page: prv_CMAP1 busy"); + if (*(int *) prv_CMAP2) + panic("pmap_copy_page: prv_CMAP2 busy"); + + *(int *) prv_CMAP1 = PG_V | PG_RW | (src & PG_FRAME); + *(int *) prv_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME); + + invltlb_2pg( (vm_offset_t) &prv_CPAGE1, (vm_offset_t) &prv_CPAGE2); + + bcopy(&prv_CPAGE1, &prv_CPAGE2, PAGE_SIZE); + + *(int *) prv_CMAP1 = 0; + *(int *) prv_CMAP2 = 0; + invltlb_2pg( (vm_offset_t) &prv_CPAGE1, (vm_offset_t) &prv_CPAGE2); +#else if (*(int *) CMAP1 || *(int *) CMAP2) panic("pmap_copy_page: CMAP busy"); @@ -2622,6 +2676,7 @@ pmap_copy_page(src, dst) *(int *) CMAP1 = 0; *(int *) CMAP2 = 0; invltlb_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2); +#endif } diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index 61b1dfa..f749f03 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -33,7 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: swtch.s,v 1.58 1997/08/04 17:31:43 fsmp Exp $ + * $Id: swtch.s,v 1.59 1997/08/09 00:02:47 dyson Exp $ */ #include "npx.h" @@ -240,17 +240,94 @@ rem3id: .asciz "remrq.id" /* * When no processes are on the runq, cpu_switch() branches to _idle * to wait for something to come ready. - * - * NOTE: on an SMP system this routine is a startup-only code path. - * once initialization is over, meaning the idle procs have been - * created, we should NEVER branch here. */ ALIGN_TEXT _idle: -#if defined(SMP) && defined(DIAGNOSTIC) - cmpl $0, _smp_active - jnz badsw3 -#endif /* SMP && DIAGNOSTIC */ +#ifdef SMP + /* when called, we have the mplock, intr disabled */ + + xorl %ebp,%ebp + + /* use our idleproc's "context" */ + movl _my_idlePTD,%ecx + movl %ecx,%cr3 + movl $_idlestack_top,%ecx + movl %ecx,%esp + + /* update common_tss.tss_esp0 pointer */ + movl $_common_tss, %eax + movl %ecx, TSS_ESP0(%eax) + + sti + + /* + * XXX callers of cpu_switch() do a bogus splclock(). Locking should + * be left to cpu_switch(). + */ + call _spl0 + + cli + + /* + * _REALLY_ free the lock, no matter how deep the prior nesting. + * We will recover the nesting on the way out when we have a new + * proc to load. + * + * XXX: we had damn well better be sure we had it before doing this! + */ + movl $FREE_LOCK, %eax + movl %eax, _mp_lock + + /* do NOT have lock, intrs disabled */ + .globl idle_loop +idle_loop: + + movl %cr3,%eax /* ouch! */ + movl %eax,%cr3 + + cmpl $0,_smp_active + jne 1f + cmpl $0,_cpuid + je 1f + jmp 2f + +1: cmpl $0,_whichrtqs /* real-time queue */ + jne 3f + cmpl $0,_whichqs /* normal queue */ + jne 3f + cmpl $0,_whichidqs /* 'idle' queue */ + jne 3f + + cmpl $0,_do_page_zero_idle + je 2f + /* XXX appears to cause panics */ + /* + * Inside zero_idle we enable interrupts and grab the mplock + * as needed. It needs to be careful about entry/exit mutexes. + */ + call _vm_page_zero_idle /* internal locking */ + testl %eax, %eax + jnz idle_loop +2: + + /* enable intrs for a halt */ + sti + call *_hlt_vector /* wait for interrupt */ + cli + jmp idle_loop + +3: + call _get_mplock + cmpl $0,_whichrtqs /* real-time queue */ + CROSSJUMP(jne, sw1a, je) + cmpl $0,_whichqs /* normal queue */ + CROSSJUMP(jne, nortqr, je) + cmpl $0,_whichidqs /* 'idle' queue */ + CROSSJUMP(jne, idqr, je) + call _rel_mplock + jmp idle_loop + +#else xorl %ebp,%ebp movl $HIDENAME(tmpstk),%esp movl _IdlePTD,%ecx @@ -302,6 +379,7 @@ idle_loop: sti call *_hlt_vector /* wait for interrupt */ jmp idle_loop +#endif CROSSJUMPTARGET(_idle) @@ -367,6 +445,17 @@ ENTRY(cpu_switch) /* save is done, now choose a new process or idle */ sw1: cli + +#ifdef SMP + /* Stop scheduling if smp_active goes zero and we are not BSP */ + cmpl $0,_smp_active + jne 1f + cmpl $0,_cpuid + je 1f + CROSSJUMP(je, _idle, jne) /* wind down */ +1: +#endif + sw1a: movl _whichrtqs,%edi /* pick next p. from rtqs */ testl %edi,%edi @@ -594,12 +683,6 @@ sw0_2: .asciz "cpu_switch: not SRUN" #endif #if defined(SMP) && defined(DIAGNOSTIC) -badsw3: - pushl $sw0_3 - call _panic - -sw0_3: .asciz "cpu_switch: went idle with smp_active" - badsw4: pushl $sw0_4 call _panic diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index ac139d5..5895361 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.106 1997/08/20 05:25:48 fsmp Exp $ + * $Id: trap.c,v 1.107 1997/08/21 06:32:39 charnier Exp $ */ /* @@ -725,6 +725,7 @@ trap_fatal(frame) ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP printf("cpuid = %d\n", cpuid); + printf("lapic.id = %d\n", lapic.id); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%x\n", eva); @@ -769,6 +770,9 @@ trap_fatal(frame) } else { printf("Idle\n"); } +#ifdef SMP + printf("mp_lock = %08x\n", mp_lock); +#endif printf("interrupt mask = "); if ((cpl & net_imask) == net_imask) printf("net "); @@ -824,6 +828,7 @@ dblfault_handler() printf("ebp = 0x%x\n", common_tss.tss_ebp); #ifdef SMP printf("cpuid = %d\n", cpuid); + printf("lapic.id = %d\n", lapic.id); #endif panic("double fault"); } diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 2db2844..0d2c672 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.84 1997/07/20 08:37:24 bde Exp $ + * $Id: vm_machdep.c,v 1.85 1997/08/09 00:02:56 dyson Exp $ */ #include "npx.h" @@ -927,6 +927,9 @@ vm_page_zero_idle() */ if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2) return (0); +#ifdef SMP + get_mplock(); +#endif s = splvm(); enable_intr(); m = vm_page_list_find(PQ_FREE, free_rover); @@ -934,7 +937,13 @@ vm_page_zero_idle() --(*vm_page_queues[m->queue].lcnt); TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq); splx(s); +#ifdef SMP + rel_mplock(); +#endif pmap_zero_page(VM_PAGE_TO_PHYS(m)); +#ifdef SMP + get_mplock(); +#endif (void)splvm(); m->queue = PQ_ZERO + m->pc; ++(*vm_page_queues[m->queue].lcnt); @@ -944,5 +953,8 @@ vm_page_zero_idle() } splx(s); disable_intr(); +#ifdef SMP + rel_mplock(); +#endif return (1); } diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h index f647070..a4a6423 100644 --- a/sys/i386/include/mptable.h +++ b/sys/i386/include/mptable.h @@ -22,17 +22,20 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.44 1997/08/24 20:33:32 fsmp Exp $ + * $Id: mp_machdep.c,v 1.45 1997/08/25 21:28:08 bde Exp $ */ #include "opt_smp.h" #include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/sysctl.h> -#include <vm/vm.h> /* for KERNBASE */ -#include <vm/vm_param.h> /* for KERNBASE */ -#include <vm/pmap.h> /* for KERNBASE */ +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> @@ -230,15 +233,24 @@ int apic_id_to_logical[NAPICID]; /* Bitmap of all available CPUs */ u_int all_cpus; -/* Boot of AP uses this PTD */ -u_int *bootPTD; +/* AP uses this PTD during bootstrap */ +pd_entry_t *bootPTD; /* Hotwire a 0->4MB V==P mapping */ -extern pt_entry_t KPTphys; +extern pt_entry_t *KPTphys; /* Virtual address of per-cpu common_tss */ extern struct i386tss common_tss; +/* IdlePTD per cpu */ +pd_entry_t *IdlePTDS[NCPU]; + +/* "my" private page table page, for BSP init */ +extern pt_entry_t SMP_prvpt[]; + +/* Private page pointer to curcpu's PTD, used during BSP init */ +extern pd_entry_t *my_idlePTD; + /* * Local data and functions. */ @@ -1473,9 +1485,9 @@ start_all_aps(u_int boot_addr) int x, i; u_char mpbiosreason; u_long mpbioswarmvec; - pd_entry_t newptd; - pt_entry_t newpt; - int *newpp; + pd_entry_t *newptd; + pt_entry_t *newpt; + int *newpp, *stack; POSTCODE(START_ALL_APS_POST); @@ -1498,57 +1510,71 @@ start_all_aps(u_int boot_addr) /* start each AP */ for (x = 1; x <= mp_naps; ++x) { - /* HACK HACK HACK !!! */ + /* This is a bit verbose, it will go away soon. */ /* alloc new page table directory */ - newptd = (pd_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + /* Store the virtual PTD address for this CPU */ + IdlePTDS[x] = newptd; /* clone currently active one (ie: IdlePTD) */ bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ /* set up 0 -> 4MB P==V mapping for AP boot */ - newptd[0] = PG_V | PG_RW | ((u_long)KPTphys & PG_FRAME); + newptd[0] = (pd_entry_t) (PG_V | PG_RW | + ((u_long)KPTphys & PG_FRAME)); - /* store PTD for this AP */ - bootPTD = (pd_entry_t)vtophys(newptd); + /* store PTD for this AP's boot sequence */ + bootPTD = (pd_entry_t *)vtophys(newptd); /* alloc new page table page */ - newpt = (pt_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); /* set the new PTD's private page to point there */ - newptd[MPPTDI] = PG_V | PG_RW | vtophys(newpt); + newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); /* install self referential entry */ - newptd[PTDPTDI] = PG_V | PG_RW | vtophys(newptd); + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); - /* get a new private data page */ + /* allocate a new private data page */ newpp = (int *)kmem_alloc(kernel_map, PAGE_SIZE); /* wire it into the private page table page */ - newpt[0] = PG_V | PG_RW | vtophys(newpp); + newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpp)); /* wire the ptp into itself for access */ - newpt[1] = PG_V | PG_RW | vtophys(newpt); + newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); - /* and the local apic */ + /* copy in the pointer to the local apic */ newpt[2] = SMP_prvpt[2]; /* and the IO apic mapping[s] */ for (i = 16; i < 32; i++) newpt[i] = SMP_prvpt[i]; - /* prime data page for it to use */ - newpp[0] = x; /* cpuid */ - newpp[1] = 0; /* curproc */ - newpp[2] = 0; /* curpcb */ - newpp[3] = 0; /* npxproc */ - newpp[4] = 0; /* runtime.tv_sec */ - newpp[5] = 0; /* runtime.tv_usec */ - newpp[6] = x << 24; /* cpu_lockid */ + /* allocate and set up an idle stack data page */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + newpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); - /* XXX NOTE: ABANDON bootPTD for now!!!! */ + newpt[4] = 0; /* *prv_CMAP1 */ + newpt[5] = 0; /* *prv_CMAP2 */ + newpt[6] = 0; /* *prv_CMAP3 */ - /* END REVOLTING HACKERY */ + /* prime data page for it to use */ + newpp[0] = x; /* cpuid */ + newpp[1] = 0; /* curproc */ + newpp[2] = 0; /* curpcb */ + newpp[3] = 0; /* npxproc */ + newpp[4] = 0; /* runtime.tv_sec */ + newpp[5] = 0; /* runtime.tv_usec */ + newpp[6] = x << 24; /* cpu_lockid */ + newpp[7] = 0; /* other_cpus */ + newpp[8] = (int)bootPTD; /* my_idlePTD */ + newpp[9] = 0; /* ss_tpr */ + newpp[10] = (int)&newpt[4]; /* prv_CMAP1 */ + newpp[11] = (int)&newpt[5]; /* prv_CMAP2 */ + newpp[12] = (int)&newpt[6]; /* prv_CMAP3 */ /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; @@ -1585,6 +1611,26 @@ start_all_aps(u_int boot_addr) outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); + /* + * Set up the idle context for the BSP. Similar to above except + * that some was done by locore, some by pmap.c and some is implicit + * because the BSP is cpu#0 and the page is initially zero, and also + * because we can refer to variables by name on the BSP.. + */ + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ + IdlePTDS[0] = newptd; + + /* Point PTD[] to this page instead of IdlePTD's physical page */ + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); + + my_idlePTD = (pd_entry_t *)vtophys(newptd); + + /* Allocate and setup BSP idle stack */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + SMP_prvpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); + pmap_set_opt_bsp(); /* number of APs actually started */ @@ -1800,7 +1846,7 @@ invltlb(void) * from executing at same time. */ int -stop_cpus( u_int map ) +stop_cpus(u_int map) { if (!smp_active) return 0; @@ -1832,7 +1878,7 @@ stop_cpus( u_int map ) * 1: ok */ int -restart_cpus( u_int map ) +restart_cpus(u_int map) { if (!smp_active) return 0; @@ -1844,3 +1890,63 @@ restart_cpus( u_int map ) return 1; } + +int smp_active = 0; /* are the APs allowed to run? */ +SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, ""); + +/* XXX maybe should be hw.ncpu */ +int smp_cpus = 1; /* how many cpu's running */ +SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, ""); + +int invltlb_ok = 0; /* throttle smp_invltlb() till safe */ +SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, ""); + +int do_page_zero_idle = 0; /* bzero pages for fun and profit in idleloop */ +SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW, + &do_page_zero_idle, 0, ""); + + +/* + * This is called once the rest of the system is up and running and we're + * ready to let the AP's out of the pen. + */ +void ap_init(void); + +void +ap_init() +{ + u_int temp; + u_int apic_id; + + smp_cpus++; + + /* Build our map of 'other' CPUs. */ + other_cpus = all_cpus & ~(1 << cpuid); + + printf("SMP: AP CPU #%d Launched!\n", cpuid); + + /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */ + load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS); + + /* A quick check from sanity claus */ + apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); + if (cpuid != apic_id) { + printf("SMP: cpuid = %d\n", cpuid); + printf("SMP: apic_id = %d\n", apic_id); + printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]); + panic("cpuid mismatch! boom!!"); + } + + /* Init local apic for irq's */ + apic_initialize(); + + /* + * Activate smp_invltlb, although strictly speaking, this isn't + * quite correct yet. We should have a bitfield for cpus willing + * to accept TLB flush IPI's or something and sync them. + */ + invltlb_ok = 1; + smp_active = 1; /* historic */ + + curproc = NULL; /* make sure */ +} diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index f433b2f..276b838 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -6,7 +6,7 @@ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * - * $Id: smp.h,v 1.24 1997/08/24 20:17:05 smp Exp smp $ + * $Id: smp.h,v 1.29 1997/08/24 20:33:24 fsmp Exp $ * */ @@ -100,7 +100,6 @@ extern int cpu_num_to_apic_id[]; extern int io_num_to_apic_id[]; extern int apic_id_to_logical[]; extern u_int all_cpus; -extern u_int SMP_prvpt[]; extern u_char SMP_ioapic[]; /* functions in mp_machdep.c */ diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 90c113c..4c4024a 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 - * $Id: init_main.c,v 1.5 1997/08/15 02:13:31 smp Exp smp $ + * $Id: init_main.c,v 1.69 1997/08/15 02:33:30 fsmp Exp $ */ #include "opt_rlimit.h" @@ -541,12 +541,6 @@ kthread_init(dummy) /* Create process 1 (init(8)). */ start_init(curproc); -#ifdef SMP - /* wait for the SMP idle loops to come online */ - while (smp_idle_loops < mp_ncpus) - tsleep((caddr_t *)&smp_idle_loops, PWAIT, "smpilw", 0); -#endif /* SMP */ - prepare_usermode(); /* diff --git a/sys/kern/init_smp.c b/sys/kern/init_smp.c deleted file mode 100644 index 6d56a60..0000000 --- a/sys/kern/init_smp.c +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Copyright (c) 1996, Peter Wemm <peter@freebsd.org> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id: init_smp.c,v 1.10 1997/08/15 02:13:31 smp Exp smp $ - */ - -#include "opt_smp.h" - -#include <sys/param.h> -#include <sys/filedesc.h> -#include <sys/kernel.h> -#include <sys/sysctl.h> -#include <sys/proc.h> -#include <sys/resourcevar.h> -#include <sys/signalvar.h> -#include <sys/systm.h> -#include <sys/vnode.h> -#include <sys/sysent.h> -#include <sys/reboot.h> -#include <sys/sysproto.h> -#include <sys/vmmeter.h> -#include <sys/lock.h> - -#include <machine/cpu.h> -#include <machine/smp.h> -#include <machine/smptests.h> /** IGNORE_IDLEPROCS */ -#include <machine/specialreg.h> - -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/vm_prot.h> -#include <vm/pmap.h> -#include <vm/vm_map.h> -#include <sys/user.h> - -int smp_active = 0; /* are the APs allowed to run? */ - -static int -sysctl_smp_active SYSCTL_HANDLER_ARGS -{ - int error = 0; - int new_val; - - error = SYSCTL_OUT(req, &smp_active, sizeof(int)); - - if (error || !req->newptr) - return (error); - - error = SYSCTL_IN(req, &new_val, sizeof(int)); - if (error) - return (error); - if (new_val < 1) - return (EBUSY); - if (new_val > mp_ncpus) - return (EINVAL); - smp_active = new_val; - return (0); -} - -SYSCTL_PROC(_kern, OID_AUTO, smp_active, CTLTYPE_INT|CTLFLAG_RW, - 0, 0, &sysctl_smp_active, "I", ""); - -int smp_cpus = 0; /* how many cpu's running */ -SYSCTL_INT(_kern, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, ""); - -int idle_debug = 0; -SYSCTL_INT(_kern, OID_AUTO, idle_debug, CTLFLAG_RW, &idle_debug, 0, ""); - -int invltlb_ok = 0; /* throttle smp_invltlb() till safe */ - -#if defined(IGNORE_IDLEPROCS) -int ignore_idleprocs = 1; -#else -int ignore_idleprocs = 0; -#endif -SYSCTL_INT(_kern, OID_AUTO, ignore_idleprocs, CTLFLAG_RW, &ignore_idleprocs, - 0, ""); - -static void smp_kickoff __P((void *dummy)); -SYSINIT(smpkick, SI_SUB_SMP, SI_ORDER_FIRST, smp_kickoff, NULL) - -static void smp_idleloop __P((void *)); - -void secondary_main __P((void)); - -volatile int smp_idle_loops = 0; -void boot_unlock __P((void)); - -struct proc *SMPidleproc[NCPU]; -static int cpu_starting = -1; - -static void -smp_kickoff(dummy) - void *dummy; -{ - int rval[2]; /* return from fork */ - struct proc *p; - int i; - - /* - * Create the appropriate number of cpu-idle-eaters - */ - for (i = 0; i < mp_ncpus; i++) { - /* kernel thread*/ - if (fork(&proc0, NULL, rval)) - panic("cannot fork idle process"); - p = pfind(rval[0]); - cpu_set_fork_handler(p, smp_idleloop, NULL); - SMPidleproc[i] = p; - p->p_flag |= P_INMEM | P_SYSTEM | P_IDLEPROC; - sprintf(p->p_comm, "cpuidle%d", i); - /* - * PRIO_IDLE is the last scheduled of the three - * classes and we choose the lowest priority possible - * for there. - */ - p->p_rtprio.type = RTP_PRIO_IDLE; - p->p_rtprio.prio = RTP_PRIO_MAX; - } - -} - - -#define MSG_CPU_MADEIT \ - printf("SMP: TADA! CPU #%d made it into the scheduler!.\n", \ - cpuid) -#define MSG_NEXT_CPU \ - printf("SMP: %d of %d CPU's online. Unlocking next CPU..\n", \ - smp_cpus, mp_ncpus) -#define MSG_FINAL_CPU \ - printf("SMP: All %d CPU's are online!\n", \ - smp_cpus) -#define MSG_TOOMANY_CPU \ - printf("SMP: Hey! Too many cpu's started, %d of %d running!\n", \ - smp_cpus, mp_ncpus) - -/* - * This is run by the secondary processor to kick things off. - * It basically drops into the switch routine to pick the first - * available process to run, which is probably an idle process. - */ - -void -secondary_main() -{ - u_int temp; - - get_mplock(); - - /* - * Record our ID so we know when we've released the mp_stk. - * We must remain single threaded through this. - */ - cpu_starting = cpuid; - smp_cpus++; - - /* Build our map of 'other' CPUs. */ - other_cpus = all_cpus & ~(1 << cpuid); - - printf("SMP: AP CPU #%d LAUNCHED!! Starting Scheduling...\n", - cpuid); - - /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */ - load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS); - - curproc = NULL; /* ensure no context to save */ - cpu_switch(curproc); /* start first process */ - - panic("switch returned!"); -} - - -/* - * The main program loop for the idle process - */ - -static void -smp_idleloop(dummy) -void *dummy; -{ - int dcnt = 0; - int apic_id; - - /* - * This code is executed only on startup of the idleprocs - * The fact that this is executed is an indication that the - * idle procs are online and it's safe to kick off the first - * AP cpu. - */ - if ( ++smp_idle_loops == mp_ncpus ) { - printf("SMP: All idle procs online.\n"); - - /* let the init process finish */ - wakeup((caddr_t *)&smp_idle_loops); - -#ifndef NO_AUTOSTART - printf("SMP: *** AUTO *** starting 1st AP!\n"); - smp_cpus = 1; - smp_active = mp_ncpus; /* XXX */ - boot_unlock(); -#else - printf("You can now activate SMP processing, use: sysctl -w kern.smp_active=%d\n", mp_ncpus); -#endif - } - - spl0(); - rel_mplock(); - - while (1) { - /* - * make the optimiser assume nothing about the - * which*qs variables - */ - __asm __volatile("" : : : "memory"); - -#ifdef NO_AUTOSTART - if (smp_cpus == 0 && smp_active != 0) { - get_mplock(); - printf("SMP: Starting 1st AP!\n"); - smp_cpus = 1; - smp_active = mp_ncpus; /* XXX */ - boot_unlock(); - rel_mplock(); - } -#endif - - /* - * If smp_active is set to (say) 1, we want cpu id's - * 1,2,etc to freeze here. - */ - if (smp_active && smp_active <= cpuid) { - get_mplock(); - printf("SMP: cpu#%d freezing\n", cpuid); - wakeup((caddr_t)&smp_active); - rel_mplock(); - - while (smp_active <= cpuid) { - __asm __volatile("" : : : "memory"); - } - get_mplock(); - printf("SMP: cpu#%d waking up!\n", cpuid); - rel_mplock(); - } - -/* XXX DEBUG */ - apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); - if (cpuid != apic_id) { - printf("SMP: cpuid = %d\n", cpuid); - printf("SMP: apic_id = %d\n", apic_id); - printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]); - panic("cpuid mismatch! boom!!"); - } -/* XXX END DEBUG */ - - if (whichqs || whichrtqs || (!ignore_idleprocs && whichidqs)) { - /* grab lock for kernel "entry" */ - get_mplock(); - - /* We need to retest due to the spin lock */ - __asm __volatile("" : : : "memory"); - - if (whichqs || whichrtqs || - (!ignore_idleprocs && whichidqs)) { - splhigh(); - if (curproc) - setrunqueue(curproc); - cnt.v_swtch++; - cpu_switch(curproc); - microtime(&runtime); - - if (cpu_starting != -1 && - cpu_starting == cpuid) { - /* - * TADA! we have arrived! unlock the - * next cpu now that we have released - * the single mp_stk. - */ - MSG_CPU_MADEIT; - cpu_starting = -1; - - /* Init local apic for irq's */ - apic_initialize(); - - if (smp_cpus < mp_ncpus) { - MSG_NEXT_CPU; - boot_unlock(); - } else if (smp_cpus > mp_ncpus) { - MSG_TOOMANY_CPU; - panic("too many cpus"); - } else { - MSG_FINAL_CPU; - /* - * It's safe to send IPI's now - * that all CPUs are online. - */ - invltlb_ok = 1; - } - } - - (void)spl0(); - } - rel_mplock(); - } else { - dcnt++; - if (idle_debug && (dcnt % idle_debug) == 0) { - get_mplock(); - printf("idleproc pid#%d on cpu#%d, lock %08x\n", - curproc->p_pid, cpuid, mp_lock); - rel_mplock(); - } - } - } -} diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index f647070..a4a6423 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -22,17 +22,20 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.44 1997/08/24 20:33:32 fsmp Exp $ + * $Id: mp_machdep.c,v 1.45 1997/08/25 21:28:08 bde Exp $ */ #include "opt_smp.h" #include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/sysctl.h> -#include <vm/vm.h> /* for KERNBASE */ -#include <vm/vm_param.h> /* for KERNBASE */ -#include <vm/pmap.h> /* for KERNBASE */ +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> @@ -230,15 +233,24 @@ int apic_id_to_logical[NAPICID]; /* Bitmap of all available CPUs */ u_int all_cpus; -/* Boot of AP uses this PTD */ -u_int *bootPTD; +/* AP uses this PTD during bootstrap */ +pd_entry_t *bootPTD; /* Hotwire a 0->4MB V==P mapping */ -extern pt_entry_t KPTphys; +extern pt_entry_t *KPTphys; /* Virtual address of per-cpu common_tss */ extern struct i386tss common_tss; +/* IdlePTD per cpu */ +pd_entry_t *IdlePTDS[NCPU]; + +/* "my" private page table page, for BSP init */ +extern pt_entry_t SMP_prvpt[]; + +/* Private page pointer to curcpu's PTD, used during BSP init */ +extern pd_entry_t *my_idlePTD; + /* * Local data and functions. */ @@ -1473,9 +1485,9 @@ start_all_aps(u_int boot_addr) int x, i; u_char mpbiosreason; u_long mpbioswarmvec; - pd_entry_t newptd; - pt_entry_t newpt; - int *newpp; + pd_entry_t *newptd; + pt_entry_t *newpt; + int *newpp, *stack; POSTCODE(START_ALL_APS_POST); @@ -1498,57 +1510,71 @@ start_all_aps(u_int boot_addr) /* start each AP */ for (x = 1; x <= mp_naps; ++x) { - /* HACK HACK HACK !!! */ + /* This is a bit verbose, it will go away soon. */ /* alloc new page table directory */ - newptd = (pd_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + /* Store the virtual PTD address for this CPU */ + IdlePTDS[x] = newptd; /* clone currently active one (ie: IdlePTD) */ bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ /* set up 0 -> 4MB P==V mapping for AP boot */ - newptd[0] = PG_V | PG_RW | ((u_long)KPTphys & PG_FRAME); + newptd[0] = (pd_entry_t) (PG_V | PG_RW | + ((u_long)KPTphys & PG_FRAME)); - /* store PTD for this AP */ - bootPTD = (pd_entry_t)vtophys(newptd); + /* store PTD for this AP's boot sequence */ + bootPTD = (pd_entry_t *)vtophys(newptd); /* alloc new page table page */ - newpt = (pt_entry_t)(kmem_alloc(kernel_map, PAGE_SIZE)); + newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); /* set the new PTD's private page to point there */ - newptd[MPPTDI] = PG_V | PG_RW | vtophys(newpt); + newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); /* install self referential entry */ - newptd[PTDPTDI] = PG_V | PG_RW | vtophys(newptd); + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); - /* get a new private data page */ + /* allocate a new private data page */ newpp = (int *)kmem_alloc(kernel_map, PAGE_SIZE); /* wire it into the private page table page */ - newpt[0] = PG_V | PG_RW | vtophys(newpp); + newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpp)); /* wire the ptp into itself for access */ - newpt[1] = PG_V | PG_RW | vtophys(newpt); + newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); - /* and the local apic */ + /* copy in the pointer to the local apic */ newpt[2] = SMP_prvpt[2]; /* and the IO apic mapping[s] */ for (i = 16; i < 32; i++) newpt[i] = SMP_prvpt[i]; - /* prime data page for it to use */ - newpp[0] = x; /* cpuid */ - newpp[1] = 0; /* curproc */ - newpp[2] = 0; /* curpcb */ - newpp[3] = 0; /* npxproc */ - newpp[4] = 0; /* runtime.tv_sec */ - newpp[5] = 0; /* runtime.tv_usec */ - newpp[6] = x << 24; /* cpu_lockid */ + /* allocate and set up an idle stack data page */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + newpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); - /* XXX NOTE: ABANDON bootPTD for now!!!! */ + newpt[4] = 0; /* *prv_CMAP1 */ + newpt[5] = 0; /* *prv_CMAP2 */ + newpt[6] = 0; /* *prv_CMAP3 */ - /* END REVOLTING HACKERY */ + /* prime data page for it to use */ + newpp[0] = x; /* cpuid */ + newpp[1] = 0; /* curproc */ + newpp[2] = 0; /* curpcb */ + newpp[3] = 0; /* npxproc */ + newpp[4] = 0; /* runtime.tv_sec */ + newpp[5] = 0; /* runtime.tv_usec */ + newpp[6] = x << 24; /* cpu_lockid */ + newpp[7] = 0; /* other_cpus */ + newpp[8] = (int)bootPTD; /* my_idlePTD */ + newpp[9] = 0; /* ss_tpr */ + newpp[10] = (int)&newpt[4]; /* prv_CMAP1 */ + newpp[11] = (int)&newpt[5]; /* prv_CMAP2 */ + newpp[12] = (int)&newpt[6]; /* prv_CMAP3 */ /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; @@ -1585,6 +1611,26 @@ start_all_aps(u_int boot_addr) outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); + /* + * Set up the idle context for the BSP. Similar to above except + * that some was done by locore, some by pmap.c and some is implicit + * because the BSP is cpu#0 and the page is initially zero, and also + * because we can refer to variables by name on the BSP.. + */ + newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); + + bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ + IdlePTDS[0] = newptd; + + /* Point PTD[] to this page instead of IdlePTD's physical page */ + newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); + + my_idlePTD = (pd_entry_t *)vtophys(newptd); + + /* Allocate and setup BSP idle stack */ + stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); + SMP_prvpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); + pmap_set_opt_bsp(); /* number of APs actually started */ @@ -1800,7 +1846,7 @@ invltlb(void) * from executing at same time. */ int -stop_cpus( u_int map ) +stop_cpus(u_int map) { if (!smp_active) return 0; @@ -1832,7 +1878,7 @@ stop_cpus( u_int map ) * 1: ok */ int -restart_cpus( u_int map ) +restart_cpus(u_int map) { if (!smp_active) return 0; @@ -1844,3 +1890,63 @@ restart_cpus( u_int map ) return 1; } + +int smp_active = 0; /* are the APs allowed to run? */ +SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, ""); + +/* XXX maybe should be hw.ncpu */ +int smp_cpus = 1; /* how many cpu's running */ +SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, ""); + +int invltlb_ok = 0; /* throttle smp_invltlb() till safe */ +SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, ""); + +int do_page_zero_idle = 0; /* bzero pages for fun and profit in idleloop */ +SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW, + &do_page_zero_idle, 0, ""); + + +/* + * This is called once the rest of the system is up and running and we're + * ready to let the AP's out of the pen. + */ +void ap_init(void); + +void +ap_init() +{ + u_int temp; + u_int apic_id; + + smp_cpus++; + + /* Build our map of 'other' CPUs. */ + other_cpus = all_cpus & ~(1 << cpuid); + + printf("SMP: AP CPU #%d Launched!\n", cpuid); + + /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */ + load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS); + + /* A quick check from sanity claus */ + apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); + if (cpuid != apic_id) { + printf("SMP: cpuid = %d\n", cpuid); + printf("SMP: apic_id = %d\n", apic_id); + printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]); + panic("cpuid mismatch! boom!!"); + } + + /* Init local apic for irq's */ + apic_initialize(); + + /* + * Activate smp_invltlb, although strictly speaking, this isn't + * quite correct yet. We should have a bitfield for cpus willing + * to accept TLB flush IPI's or something and sync them. + */ + invltlb_ok = 1; + smp_active = 1; /* historic */ + + curproc = NULL; /* make sure */ +} diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index ac139d5..5895361 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.106 1997/08/20 05:25:48 fsmp Exp $ + * $Id: trap.c,v 1.107 1997/08/21 06:32:39 charnier Exp $ */ /* @@ -725,6 +725,7 @@ trap_fatal(frame) ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP printf("cpuid = %d\n", cpuid); + printf("lapic.id = %d\n", lapic.id); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%x\n", eva); @@ -769,6 +770,9 @@ trap_fatal(frame) } else { printf("Idle\n"); } +#ifdef SMP + printf("mp_lock = %08x\n", mp_lock); +#endif printf("interrupt mask = "); if ((cpl & net_imask) == net_imask) printf("net "); @@ -824,6 +828,7 @@ dblfault_handler() printf("ebp = 0x%x\n", common_tss.tss_ebp); #ifdef SMP printf("cpuid = %d\n", cpuid); + printf("lapic.id = %d\n", lapic.id); #endif panic("double fault"); } diff --git a/sys/sys/smp.h b/sys/sys/smp.h index f433b2f..276b838 100644 --- a/sys/sys/smp.h +++ b/sys/sys/smp.h @@ -6,7 +6,7 @@ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * - * $Id: smp.h,v 1.24 1997/08/24 20:17:05 smp Exp smp $ + * $Id: smp.h,v 1.29 1997/08/24 20:33:24 fsmp Exp $ * */ @@ -100,7 +100,6 @@ extern int cpu_num_to_apic_id[]; extern int io_num_to_apic_id[]; extern int apic_id_to_logical[]; extern u_int all_cpus; -extern u_int SMP_prvpt[]; extern u_char SMP_ioapic[]; /* functions in mp_machdep.c */ |