From 83227618098af1ac7c74fc80c5bc890fe26c9b55 Mon Sep 17 00:00:00 2001 From: jake Date: Mon, 4 Mar 2002 05:20:29 +0000 Subject: Allocate tlb contexts on the fly in cpu_switch, instead of statically 1 to 1 with pmaps. When the context numbers wrap around we flush all user mappings from the tlb. This makes use of the array indexed by cpuid to allow a pmap to have a different context number on a different cpu. If the context numbers are then divided evenly among cpus such that none are shared, we can avoid sending tlb shootdown ipis in an smp system for non-shared pmaps. This also removes a limit of 8192 processes (pmaps) that could be active at any given time due to running out of tlb contexts. Inspired by: the brown book Crucial bugfix from: tmm --- sys/sparc64/include/pcpu.h | 3 + sys/sparc64/include/pmap.h | 1 + sys/sparc64/include/tlb.h | 29 ++++----- sys/sparc64/sparc64/genassym.c | 7 ++ sys/sparc64/sparc64/machdep.c | 4 ++ sys/sparc64/sparc64/pmap.c | 105 +++++++++++++++--------------- sys/sparc64/sparc64/swtch.S | 142 +++++++++++++++++++++++++++++++---------- sys/sparc64/sparc64/swtch.s | 142 +++++++++++++++++++++++++++++++---------- 8 files changed, 297 insertions(+), 136 deletions(-) diff --git a/sys/sparc64/include/pcpu.h b/sys/sparc64/include/pcpu.h index 671cec5..c2fd512 100644 --- a/sys/sparc64/include/pcpu.h +++ b/sys/sparc64/include/pcpu.h @@ -45,6 +45,9 @@ #define PCPU_MD_FIELDS \ struct intr_queue pc_iq; /* interrupt queue */ \ u_int pc_mid; \ + u_int pc_tlb_ctx; \ + u_int pc_tlb_ctx_max; \ + u_int pc_tlb_ctx_min; \ u_int pc_wp_insn; /* watch point support */ \ u_long pc_wp_pstate; \ u_long pc_wp_va; \ diff --git a/sys/sparc64/include/pmap.h b/sys/sparc64/include/pmap.h index 858d894..f31c3d6 100644 --- a/sys/sparc64/include/pmap.h +++ b/sys/sparc64/include/pmap.h @@ -84,6 +84,7 @@ struct pv_entry { }; void pmap_bootstrap(vm_offset_t ekva); +void pmap_context_rollover(void); vm_offset_t pmap_kextract(vm_offset_t va); void pmap_kenter_flags(vm_offset_t va, vm_offset_t pa, u_long flags); void pmap_qenter_flags(vm_offset_t va, vm_page_t *m, int count, u_long fl); diff --git a/sys/sparc64/include/tlb.h b/sys/sparc64/include/tlb.h index 6f72bca..d3697e5 100644 --- a/sys/sparc64/include/tlb.h +++ b/sys/sparc64/include/tlb.h @@ -64,6 +64,8 @@ #define TLB_DEMAP_NUCLEUS (TLB_DEMAP_ID(TLB_DEMAP_ID_NUCLEUS)) #define TLB_CTX_KERNEL (0) +#define TLB_CTX_USER_MIN (1) +#define TLB_CTX_USER_MAX (8192) #define TLB_DTLB (1 << 0) #define TLB_ITLB (1 << 1) @@ -105,14 +107,10 @@ tlb_dtlb_page_demap(u_long ctx, vm_offset_t va) stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, ASI_DMMU_DEMAP, 0); membar(Sync); - } else { - stxa(AA_DMMU_SCXR, ASI_DMMU, ctx); - membar(Sync); - stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_SECONDARY | TLB_DEMAP_PAGE, + } else if (ctx != -1) { + stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, ASI_DMMU_DEMAP, 0); membar(Sync); - stxa(AA_DMMU_SCXR, ASI_DMMU, 0); - membar(Sync); } } @@ -155,15 +153,10 @@ tlb_itlb_page_demap(u_long ctx, vm_offset_t va) stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, ASI_IMMU_DEMAP, 0); flush(KERNBASE); - } else { - stxa(AA_DMMU_SCXR, ASI_DMMU, ctx); - membar(Sync); - stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_SECONDARY | TLB_DEMAP_PAGE, + } else if (ctx != -1) { + stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, ASI_IMMU_DEMAP, 0); membar(Sync); - stxa(AA_DMMU_SCXR, ASI_DMMU, 0); - /* flush probably not needed. */ - membar(Sync); } } @@ -188,10 +181,12 @@ tlb_itlb_store(vm_offset_t va, u_long ctx, struct tte tte) } static __inline void -tlb_context_demap(u_int context) +tlb_context_demap(u_int ctx) { - tlb_dtlb_context_primary_demap(); - tlb_itlb_context_primary_demap(); + if (ctx != -1) { + tlb_dtlb_context_primary_demap(); + tlb_itlb_context_primary_demap(); + } } static __inline void @@ -231,6 +226,7 @@ tlb_tte_demap(struct tte tte, u_int ctx) static __inline void tlb_store(u_int tlb, vm_offset_t va, u_long ctx, struct tte tte) { + KASSERT(ctx != -1, ("tlb_store: invalid context")); if (tlb & TLB_DTLB) tlb_dtlb_store(va, ctx, tte); if (tlb & TLB_ITLB) @@ -240,6 +236,7 @@ tlb_store(u_int tlb, vm_offset_t va, u_long ctx, struct tte tte) static __inline void tlb_store_slot(u_int tlb, vm_offset_t va, u_long ctx, struct tte tte, int slot) { + KASSERT(ctx != -1, ("tlb_store_slot: invalid context")); if (tlb & TLB_DTLB) tlb_dtlb_store_slot(va, ctx, tte, slot); if (tlb & TLB_ITLB) diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c index 61ee811..993c6e5 100644 --- a/sys/sparc64/sparc64/genassym.c +++ b/sys/sparc64/sparc64/genassym.c @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include #include #include @@ -153,6 +155,9 @@ ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid)); ASSYM(PC_CPUMASK, offsetof(struct pcpu, pc_cpumask)); ASSYM(PC_IQ, offsetof(struct pcpu, pc_iq)); ASSYM(PC_MID, offsetof(struct pcpu, pc_mid)); +ASSYM(PC_TLB_CTX, offsetof(struct pcpu, pc_tlb_ctx)); +ASSYM(PC_TLB_CTX_MAX, offsetof(struct pcpu, pc_tlb_ctx_max)); +ASSYM(PC_TLB_CTX_MIN, offsetof(struct pcpu, pc_tlb_ctx_min)); ASSYM(PC_SIZEOF, sizeof(struct pcpu)); ASSYM(IH_SHIFT, IH_SHIFT); @@ -188,6 +193,8 @@ ASSYM(KEF_NEEDRESCHED, KEF_NEEDRESCHED); ASSYM(MD_UTRAP, offsetof(struct mdproc, md_utrap)); +ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock)); + ASSYM(P_COMM, offsetof(struct proc, p_comm)); ASSYM(P_MD, offsetof(struct proc, p_md)); ASSYM(P_PID, offsetof(struct proc, p_pid)); diff --git a/sys/sparc64/sparc64/machdep.c b/sys/sparc64/sparc64/machdep.c index 0517ede..50e1daf 100644 --- a/sys/sparc64/sparc64/machdep.c +++ b/sys/sparc64/sparc64/machdep.c @@ -96,6 +96,7 @@ #include #include #include +#include #include #include @@ -296,6 +297,9 @@ sparc64_init(caddr_t mdp, u_int *state, u_int mid, u_int bootmid, pc->pc_curthread = &thread0; pc->pc_curpcb = thread0.td_pcb; pc->pc_mid = mid; + pc->pc_tlb_ctx = TLB_CTX_USER_MIN; + pc->pc_tlb_ctx_min = TLB_CTX_USER_MIN; + pc->pc_tlb_ctx_max = TLB_CTX_USER_MAX; /* * Initialize global registers. diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index e92402d..19409e9 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -155,13 +155,6 @@ vm_offset_t kernel_page; */ struct pmap kernel_pmap_store; -/* - * Map of free and in use hardware contexts and index of first potentially - * free context. - */ -static char pmap_context_map[PMAP_CONTEXT_MAX]; -static u_int pmap_context_base; - static boolean_t pmap_initialized = FALSE; /* Convert a tte data field into a page mask */ @@ -175,12 +168,6 @@ static vm_offset_t pmap_page_masks[] = { #define PMAP_TD_GET_MASK(d) pmap_page_masks[TD_GET_SIZE((d))] /* - * Allocate and free hardware context numbers. - */ -static u_int pmap_context_alloc(void); -static void pmap_context_destroy(u_int i); - -/* * Allocate physical memory for use in pmap_bootstrap. */ static vm_offset_t pmap_bootstrap_alloc(vm_size_t size); @@ -478,6 +465,43 @@ pmap_bootstrap_alloc(vm_size_t size) panic("pmap_bootstrap_alloc"); } +void +pmap_context_rollover(void) +{ + u_long data; + int i; + + mtx_assert(&sched_lock, MA_OWNED); + CTR0(KTR_PMAP, "pmap_context_rollover"); + for (i = 0; i < 64; i++) { + data = ldxa(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG); + if ((data & TD_V) != 0 && (data & TD_P) == 0) { + stxa(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG, 0); + membar(Sync); + } + data = ldxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG); + if ((data & TD_V) != 0 && (data & TD_P) == 0) { + stxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG, 0); + membar(Sync); + } + } + PCPU_SET(tlb_ctx, PCPU_GET(tlb_ctx_min)); +} + +static __inline u_int +pmap_context_alloc(void) +{ + u_int context; + + mtx_assert(&sched_lock, MA_OWNED); + context = PCPU_GET(tlb_ctx); + if (context + 1 == PCPU_GET(tlb_ctx_max)) + pmap_context_rollover(); + else + PCPU_SET(tlb_ctx, context + 1); + return (context); +} + /* * Initialize the pmap module. */ @@ -1078,8 +1102,10 @@ pmap_swapin_thread(struct thread *td) void pmap_pinit0(pmap_t pm) { + int i; - pm->pm_context[PCPU_GET(cpuid)] = pmap_context_alloc(); + for (i = 0; i < MAXCPU; i++) + pm->pm_context[i] = 0; pm->pm_active = 0; pm->pm_count = 1; pm->pm_tsb = NULL; @@ -1129,8 +1155,9 @@ pmap_pinit(pmap_t pm) } pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES); + for (i = 0; i < MAXCPU; i++) + pm->pm_context[i] = -1; pm->pm_active = 0; - pm->pm_context[PCPU_GET(cpuid)] = pmap_context_alloc(); pm->pm_count = 1; TAILQ_INIT(&pm->pm_pvlist); bzero(&pm->pm_stats, sizeof(pm->pm_stats)); @@ -1162,7 +1189,6 @@ pmap_release(pmap_t pm) KASSERT(pmap_resident_count(pm) == 0, ("pmap_release: resident pages %ld != 0", pmap_resident_count(pm))); - pmap_context_destroy(pm->pm_context[PCPU_GET(cpuid)]); TAILQ_FOREACH(m, &obj->memq, listq) { if (vm_page_sleep_busy(m, FALSE, "pmaprl")) continue; @@ -1769,6 +1795,7 @@ pmap_mincore(pmap_t pm, vm_offset_t addr) void pmap_activate(struct thread *td) { + struct vmspace *vm; vm_offset_t tsb; u_long context; pmap_t pm; @@ -1777,21 +1804,24 @@ pmap_activate(struct thread *td) * Load all the data we need up front to encourage the compiler to * not issue any loads while we have interrupts disable below. */ - pm = &td->td_proc->p_vmspace->vm_pmap; - context = pm->pm_context[PCPU_GET(cpuid)]; + vm = td->td_proc->p_vmspace; + pm = &vm->vm_pmap; tsb = (vm_offset_t)pm->pm_tsb; - KASSERT(context != 0, ("pmap_activate: activating nucleus context")); - KASSERT(context != -1, ("pmap_activate: steal context")); KASSERT(pm->pm_active == 0, ("pmap_activate: pmap already active?")); + KASSERT(pm->pm_context[PCPU_GET(cpuid)] != 0, + ("pmap_activate: activating nucleus context?")); + mtx_lock_spin(&sched_lock); wrpr(pstate, 0, PSTATE_MMU); mov(tsb, TSB_REG); - wrpr(pstate, 0, PSTATE_NORMAL); - pm->pm_active |= 1 << PCPU_GET(cpuid); + wrpr(pstate, 0, PSTATE_KERNEL); + context = pmap_context_alloc(); + pm->pm_context[PCPU_GET(cpuid)] = context; + pm->pm_active |= PCPU_GET(cpumask); stxa(AA_DMMU_PCXR, ASI_DMMU, context); membar(Sync); - wrpr(pstate, 0, PSTATE_KERNEL); + mtx_unlock_spin(&sched_lock); } vm_offset_t @@ -1800,32 +1830,3 @@ pmap_addr_hint(vm_object_t object, vm_offset_t va, vm_size_t size) return (va); } - -/* - * Allocate a hardware context number from the context map. - */ -static u_int -pmap_context_alloc(void) -{ - u_int i; - - i = pmap_context_base; - do { - if (pmap_context_map[i] == 0) { - pmap_context_map[i] = 1; - pmap_context_base = (i + 1) & (PMAP_CONTEXT_MAX - 1); - return (i); - } - } while ((i = (i + 1) & (PMAP_CONTEXT_MAX - 1)) != pmap_context_base); - panic("pmap_context_alloc"); -} - -/* - * Free a hardware context number back to the context map. - */ -static void -pmap_context_destroy(u_int i) -{ - - pmap_context_map[i] = 0; -} diff --git a/sys/sparc64/sparc64/swtch.S b/sys/sparc64/sparc64/swtch.S index 731d586..70d95ec 100644 --- a/sys/sparc64/sparc64/swtch.S +++ b/sys/sparc64/sparc64/swtch.S @@ -31,6 +31,9 @@ #include #include + .register %g2, #ignore + .register %g3, #ignore + #include "assym.s" ENTRY(cpu_throw) @@ -89,18 +92,14 @@ ENTRY(cpu_switch) */ .Lsw1: #if KTR_COMPILE & KTR_PROC - CATR(KTR_PROC, "cpu_switch: td=%d (%s) pc=%#lx fp=%#lx" - , %l3, %l4, %l5, 7, 8, 9) - ldx [%o0 + TD_PROC], %l4 - lduw [%l4 + P_PID], %l5 - stx %l5, [%l3 + KTR_PARM1] - add %l4, P_COMM, %l5 - stx %l5, [%l3 + KTR_PARM2] - ldx [%o0 + TD_PCB], %l4 - ldx [%l4 + PCB_PC], %l5 - stx %l5, [%l3 + KTR_PARM3] - ldx [%l4 + PCB_FP], %l5 - stx %l5, [%l3 + KTR_PARM4] + CATR(KTR_PROC, "cpu_switch: new td=%p pc=%#lx fp=%#lx" + , %g1, %g2, %g3, 7, 8, 9) + stx %o0, [%g1 + KTR_PARM1] + ldx [%o0 + TD_PCB], %g2 + ldx [%g2 + PCB_PC], %g3 + stx %g3, [%g1 + KTR_PARM2] + ldx [%g2 + PCB_FP], %g3 + stx %g3, [%g1 + KTR_PARM3] 9: #endif ldx [%o0 + TD_PCB], %o1 @@ -110,6 +109,9 @@ ENTRY(cpu_switch) stx %o0, [PCPU(CURTHREAD)] stx %o1, [PCPU(CURPCB)] + SET(sched_lock, %o3, %o2) + stx %o0, [%o2 + MTX_LOCK] + wrpr %g0, PSTATE_NORMAL, %pstate mov %o1, PCB_REG wrpr %g0, PSTATE_ALT, %pstate @@ -124,31 +126,36 @@ ENTRY(cpu_switch) ldx [%l2 + P_VMSPACE], %l2 ldx [%o2 + P_VMSPACE], %o2 +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: new vm=%p old vm=%p" + , %g1, %g2, %g3, 7, 8, 9) + stx %o2, [%g1 + KTR_PARM1] + stx %l2, [%g1 + KTR_PARM2] +9: +#endif + /* - * If they're the same we are done. + * If they are the same we are done. */ cmp %l2, %o2 be,a,pn %xcc, 4f nop - /* - * If the old process has nucleus context we can skip demapping the - * tsb. - */ + lduw [PCPU(CPUID)], %o3 + sllx %o3, INT_SHIFT, %o3 + add %o2, VM_PMAP + PM_CONTEXT, %o4 + lduw [PCPU(CPUID)], %l3 sllx %l3, INT_SHIFT, %l3 add %l2, VM_PMAP + PM_CONTEXT, %l4 - lduw [%l3 + %l4], %l5 - brz,a,pn %l5, 3f - nop /* - * Demap the old process's tsb. + * If the old process has nucleus context we don't want to deactivate + * its pmap on this cpu. */ - ldx [%l2 + VM_PMAP + PM_TSB], %l3 - or %l3, TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %l3 - stxa %g0, [%l3] ASI_DMMU_DEMAP - membar #Sync + lduw [%l3 + %l4], %l5 + brz,a %l5, 2f + nop /* * Mark the pmap no longer active on this cpu. @@ -159,42 +166,103 @@ ENTRY(cpu_switch) stw %l3, [%l2 + VM_PMAP + PM_ACTIVE] /* + * Take away its context. + */ + lduw [PCPU(CPUID)], %l3 + sllx %l3, INT_SHIFT, %l3 + add %l2, VM_PMAP + PM_CONTEXT, %l4 + mov -1, %l5 + stw %l5, [%l3 + %l4] + + /* * If the new process has nucleus context we are done. */ -3: lduw [PCPU(CPUID)], %o3 - sllx %o3, INT_SHIFT, %o3 - add %o2, VM_PMAP + PM_CONTEXT, %o4 - lduw [%o3 + %o4], %o5 +2: lduw [%o3 + %o4], %o5 + +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: ctx=%#lx" + , %g1, %g2, %g3, 7, 8, 9) + stx %o5, [%g1 + KTR_PARM1] +9: +#endif + brz,a,pn %o5, 4f nop /* - * Install the new primary context. + * Find the current free tlb context for this cpu and install it as + * the new primary context. */ + lduw [PCPU(TLB_CTX)], %o5 + stw %o5, [%o3 + %o4] mov AA_DMMU_PCXR, %o4 stxa %o5, [%o4] ASI_DMMU - flush %o0 + membar #Sync + + /* + * See if we have run out of free contexts. + */ + lduw [PCPU(TLB_CTX_MAX)], %o3 + +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: ctx=%#lx next=%#lx max=%#lx" + , %g1, %g2, %g3, 7, 8, 9) + stx %o5, [%g1 + KTR_PARM1] + add %o5, 1, %g2 + stx %g2, [%g1 + KTR_PARM2] + stx %o3, [%g1 + KTR_PARM3] +9: +#endif + + add %o5, 1, %o5 + cmp %o3, %o5 + bne,a,pt %xcc, 3f + stw %o5, [PCPU(TLB_CTX)] + +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: context rollover" + , %g1, %g2, %g3, 7, 8, 9) +9: +#endif + + /* + * We will start re-using contexts on the next switch. Flush all + * non-nucleus mappings from the tlb, and reset the next free context. + */ + call pmap_context_rollover + nop + ldx [PCPU(CURTHREAD)], %o0 + ldx [%o0 + TD_PROC], %o2 + ldx [%o2 + P_VMSPACE], %o2 /* * Mark the pmap as active on this cpu. */ - lduw [%o2 + VM_PMAP + PM_ACTIVE], %o3 +3: lduw [%o2 + VM_PMAP + PM_ACTIVE], %o3 lduw [PCPU(CPUMASK)], %o4 or %o3, %o4, %o3 stw %o3, [%o2 + VM_PMAP + PM_ACTIVE] /* - * Switch to mmu globals and install the preloaded tsb pointer. + * Load the address of the tsb, switch to mmu globals, and install + * the preloaded tsb pointer. */ ldx [%o2 + VM_PMAP + PM_TSB], %o3 wrpr %g0, PSTATE_MMU, %pstate mov %o3, TSB_REG wrpr %g0, PSTATE_KERNEL, %pstate +4: +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: return" + , %g1, %g2, %g3, 7, 8, 9) +9: +#endif + /* * Done. Return and load the new process's window from the stack. */ -4: ret + ret restore END(cpu_switch) @@ -209,6 +277,9 @@ ENTRY(savectx) restore %g0, 0, %o0 END(savectx) +/* + * void savefpctx(struct fpstate *); + */ ENTRY(savefpctx) wr %g0, FPRS_FEF, %fprs wr %g0, ASI_BLK_S, %asi @@ -221,6 +292,9 @@ ENTRY(savefpctx) wr %g0, 0, %fprs END(savefpctx) +/* + * void restorefpctx(struct fpstate *); + */ ENTRY(restorefpctx) wr %g0, FPRS_FEF, %fprs wr %g0, ASI_BLK_S, %asi diff --git a/sys/sparc64/sparc64/swtch.s b/sys/sparc64/sparc64/swtch.s index 731d586..70d95ec 100644 --- a/sys/sparc64/sparc64/swtch.s +++ b/sys/sparc64/sparc64/swtch.s @@ -31,6 +31,9 @@ #include #include + .register %g2, #ignore + .register %g3, #ignore + #include "assym.s" ENTRY(cpu_throw) @@ -89,18 +92,14 @@ ENTRY(cpu_switch) */ .Lsw1: #if KTR_COMPILE & KTR_PROC - CATR(KTR_PROC, "cpu_switch: td=%d (%s) pc=%#lx fp=%#lx" - , %l3, %l4, %l5, 7, 8, 9) - ldx [%o0 + TD_PROC], %l4 - lduw [%l4 + P_PID], %l5 - stx %l5, [%l3 + KTR_PARM1] - add %l4, P_COMM, %l5 - stx %l5, [%l3 + KTR_PARM2] - ldx [%o0 + TD_PCB], %l4 - ldx [%l4 + PCB_PC], %l5 - stx %l5, [%l3 + KTR_PARM3] - ldx [%l4 + PCB_FP], %l5 - stx %l5, [%l3 + KTR_PARM4] + CATR(KTR_PROC, "cpu_switch: new td=%p pc=%#lx fp=%#lx" + , %g1, %g2, %g3, 7, 8, 9) + stx %o0, [%g1 + KTR_PARM1] + ldx [%o0 + TD_PCB], %g2 + ldx [%g2 + PCB_PC], %g3 + stx %g3, [%g1 + KTR_PARM2] + ldx [%g2 + PCB_FP], %g3 + stx %g3, [%g1 + KTR_PARM3] 9: #endif ldx [%o0 + TD_PCB], %o1 @@ -110,6 +109,9 @@ ENTRY(cpu_switch) stx %o0, [PCPU(CURTHREAD)] stx %o1, [PCPU(CURPCB)] + SET(sched_lock, %o3, %o2) + stx %o0, [%o2 + MTX_LOCK] + wrpr %g0, PSTATE_NORMAL, %pstate mov %o1, PCB_REG wrpr %g0, PSTATE_ALT, %pstate @@ -124,31 +126,36 @@ ENTRY(cpu_switch) ldx [%l2 + P_VMSPACE], %l2 ldx [%o2 + P_VMSPACE], %o2 +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: new vm=%p old vm=%p" + , %g1, %g2, %g3, 7, 8, 9) + stx %o2, [%g1 + KTR_PARM1] + stx %l2, [%g1 + KTR_PARM2] +9: +#endif + /* - * If they're the same we are done. + * If they are the same we are done. */ cmp %l2, %o2 be,a,pn %xcc, 4f nop - /* - * If the old process has nucleus context we can skip demapping the - * tsb. - */ + lduw [PCPU(CPUID)], %o3 + sllx %o3, INT_SHIFT, %o3 + add %o2, VM_PMAP + PM_CONTEXT, %o4 + lduw [PCPU(CPUID)], %l3 sllx %l3, INT_SHIFT, %l3 add %l2, VM_PMAP + PM_CONTEXT, %l4 - lduw [%l3 + %l4], %l5 - brz,a,pn %l5, 3f - nop /* - * Demap the old process's tsb. + * If the old process has nucleus context we don't want to deactivate + * its pmap on this cpu. */ - ldx [%l2 + VM_PMAP + PM_TSB], %l3 - or %l3, TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %l3 - stxa %g0, [%l3] ASI_DMMU_DEMAP - membar #Sync + lduw [%l3 + %l4], %l5 + brz,a %l5, 2f + nop /* * Mark the pmap no longer active on this cpu. @@ -159,42 +166,103 @@ ENTRY(cpu_switch) stw %l3, [%l2 + VM_PMAP + PM_ACTIVE] /* + * Take away its context. + */ + lduw [PCPU(CPUID)], %l3 + sllx %l3, INT_SHIFT, %l3 + add %l2, VM_PMAP + PM_CONTEXT, %l4 + mov -1, %l5 + stw %l5, [%l3 + %l4] + + /* * If the new process has nucleus context we are done. */ -3: lduw [PCPU(CPUID)], %o3 - sllx %o3, INT_SHIFT, %o3 - add %o2, VM_PMAP + PM_CONTEXT, %o4 - lduw [%o3 + %o4], %o5 +2: lduw [%o3 + %o4], %o5 + +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: ctx=%#lx" + , %g1, %g2, %g3, 7, 8, 9) + stx %o5, [%g1 + KTR_PARM1] +9: +#endif + brz,a,pn %o5, 4f nop /* - * Install the new primary context. + * Find the current free tlb context for this cpu and install it as + * the new primary context. */ + lduw [PCPU(TLB_CTX)], %o5 + stw %o5, [%o3 + %o4] mov AA_DMMU_PCXR, %o4 stxa %o5, [%o4] ASI_DMMU - flush %o0 + membar #Sync + + /* + * See if we have run out of free contexts. + */ + lduw [PCPU(TLB_CTX_MAX)], %o3 + +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: ctx=%#lx next=%#lx max=%#lx" + , %g1, %g2, %g3, 7, 8, 9) + stx %o5, [%g1 + KTR_PARM1] + add %o5, 1, %g2 + stx %g2, [%g1 + KTR_PARM2] + stx %o3, [%g1 + KTR_PARM3] +9: +#endif + + add %o5, 1, %o5 + cmp %o3, %o5 + bne,a,pt %xcc, 3f + stw %o5, [PCPU(TLB_CTX)] + +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: context rollover" + , %g1, %g2, %g3, 7, 8, 9) +9: +#endif + + /* + * We will start re-using contexts on the next switch. Flush all + * non-nucleus mappings from the tlb, and reset the next free context. + */ + call pmap_context_rollover + nop + ldx [PCPU(CURTHREAD)], %o0 + ldx [%o0 + TD_PROC], %o2 + ldx [%o2 + P_VMSPACE], %o2 /* * Mark the pmap as active on this cpu. */ - lduw [%o2 + VM_PMAP + PM_ACTIVE], %o3 +3: lduw [%o2 + VM_PMAP + PM_ACTIVE], %o3 lduw [PCPU(CPUMASK)], %o4 or %o3, %o4, %o3 stw %o3, [%o2 + VM_PMAP + PM_ACTIVE] /* - * Switch to mmu globals and install the preloaded tsb pointer. + * Load the address of the tsb, switch to mmu globals, and install + * the preloaded tsb pointer. */ ldx [%o2 + VM_PMAP + PM_TSB], %o3 wrpr %g0, PSTATE_MMU, %pstate mov %o3, TSB_REG wrpr %g0, PSTATE_KERNEL, %pstate +4: +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: return" + , %g1, %g2, %g3, 7, 8, 9) +9: +#endif + /* * Done. Return and load the new process's window from the stack. */ -4: ret + ret restore END(cpu_switch) @@ -209,6 +277,9 @@ ENTRY(savectx) restore %g0, 0, %o0 END(savectx) +/* + * void savefpctx(struct fpstate *); + */ ENTRY(savefpctx) wr %g0, FPRS_FEF, %fprs wr %g0, ASI_BLK_S, %asi @@ -221,6 +292,9 @@ ENTRY(savefpctx) wr %g0, 0, %fprs END(savefpctx) +/* + * void restorefpctx(struct fpstate *); + */ ENTRY(restorefpctx) wr %g0, FPRS_FEF, %fprs wr %g0, ASI_BLK_S, %asi -- cgit v1.1