summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjake <jake@FreeBSD.org>2002-03-04 05:20:29 +0000
committerjake <jake@FreeBSD.org>2002-03-04 05:20:29 +0000
commit83227618098af1ac7c74fc80c5bc890fe26c9b55 (patch)
treea9197741e85c56de06ad5824608f968f989a1259
parentdd2207f5cd6095600f89c5258f3dafe916f98371 (diff)
downloadFreeBSD-src-83227618098af1ac7c74fc80c5bc890fe26c9b55.zip
FreeBSD-src-83227618098af1ac7c74fc80c5bc890fe26c9b55.tar.gz
Allocate tlb contexts on the fly in cpu_switch, instead of statically 1 to 1
with pmaps. When the context numbers wrap around we flush all user mappings from the tlb. This makes use of the array indexed by cpuid to allow a pmap to have a different context number on a different cpu. If the context numbers are then divided evenly among cpus such that none are shared, we can avoid sending tlb shootdown ipis in an smp system for non-shared pmaps. This also removes a limit of 8192 processes (pmaps) that could be active at any given time due to running out of tlb contexts. Inspired by: the brown book Crucial bugfix from: tmm
-rw-r--r--sys/sparc64/include/pcpu.h3
-rw-r--r--sys/sparc64/include/pmap.h1
-rw-r--r--sys/sparc64/include/tlb.h29
-rw-r--r--sys/sparc64/sparc64/genassym.c7
-rw-r--r--sys/sparc64/sparc64/machdep.c4
-rw-r--r--sys/sparc64/sparc64/pmap.c105
-rw-r--r--sys/sparc64/sparc64/swtch.S142
-rw-r--r--sys/sparc64/sparc64/swtch.s142
8 files changed, 297 insertions, 136 deletions
diff --git a/sys/sparc64/include/pcpu.h b/sys/sparc64/include/pcpu.h
index 671cec5..c2fd512 100644
--- a/sys/sparc64/include/pcpu.h
+++ b/sys/sparc64/include/pcpu.h
@@ -45,6 +45,9 @@
#define PCPU_MD_FIELDS \
struct intr_queue pc_iq; /* interrupt queue */ \
u_int pc_mid; \
+ u_int pc_tlb_ctx; \
+ u_int pc_tlb_ctx_max; \
+ u_int pc_tlb_ctx_min; \
u_int pc_wp_insn; /* watch point support */ \
u_long pc_wp_pstate; \
u_long pc_wp_va; \
diff --git a/sys/sparc64/include/pmap.h b/sys/sparc64/include/pmap.h
index 858d894..f31c3d6 100644
--- a/sys/sparc64/include/pmap.h
+++ b/sys/sparc64/include/pmap.h
@@ -84,6 +84,7 @@ struct pv_entry {
};
void pmap_bootstrap(vm_offset_t ekva);
+void pmap_context_rollover(void);
vm_offset_t pmap_kextract(vm_offset_t va);
void pmap_kenter_flags(vm_offset_t va, vm_offset_t pa, u_long flags);
void pmap_qenter_flags(vm_offset_t va, vm_page_t *m, int count, u_long fl);
diff --git a/sys/sparc64/include/tlb.h b/sys/sparc64/include/tlb.h
index 6f72bca..d3697e5 100644
--- a/sys/sparc64/include/tlb.h
+++ b/sys/sparc64/include/tlb.h
@@ -64,6 +64,8 @@
#define TLB_DEMAP_NUCLEUS (TLB_DEMAP_ID(TLB_DEMAP_ID_NUCLEUS))
#define TLB_CTX_KERNEL (0)
+#define TLB_CTX_USER_MIN (1)
+#define TLB_CTX_USER_MAX (8192)
#define TLB_DTLB (1 << 0)
#define TLB_ITLB (1 << 1)
@@ -105,14 +107,10 @@ tlb_dtlb_page_demap(u_long ctx, vm_offset_t va)
stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE,
ASI_DMMU_DEMAP, 0);
membar(Sync);
- } else {
- stxa(AA_DMMU_SCXR, ASI_DMMU, ctx);
- membar(Sync);
- stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_SECONDARY | TLB_DEMAP_PAGE,
+ } else if (ctx != -1) {
+ stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE,
ASI_DMMU_DEMAP, 0);
membar(Sync);
- stxa(AA_DMMU_SCXR, ASI_DMMU, 0);
- membar(Sync);
}
}
@@ -155,15 +153,10 @@ tlb_itlb_page_demap(u_long ctx, vm_offset_t va)
stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE,
ASI_IMMU_DEMAP, 0);
flush(KERNBASE);
- } else {
- stxa(AA_DMMU_SCXR, ASI_DMMU, ctx);
- membar(Sync);
- stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_SECONDARY | TLB_DEMAP_PAGE,
+ } else if (ctx != -1) {
+ stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE,
ASI_IMMU_DEMAP, 0);
membar(Sync);
- stxa(AA_DMMU_SCXR, ASI_DMMU, 0);
- /* flush probably not needed. */
- membar(Sync);
}
}
@@ -188,10 +181,12 @@ tlb_itlb_store(vm_offset_t va, u_long ctx, struct tte tte)
}
static __inline void
-tlb_context_demap(u_int context)
+tlb_context_demap(u_int ctx)
{
- tlb_dtlb_context_primary_demap();
- tlb_itlb_context_primary_demap();
+ if (ctx != -1) {
+ tlb_dtlb_context_primary_demap();
+ tlb_itlb_context_primary_demap();
+ }
}
static __inline void
@@ -231,6 +226,7 @@ tlb_tte_demap(struct tte tte, u_int ctx)
static __inline void
tlb_store(u_int tlb, vm_offset_t va, u_long ctx, struct tte tte)
{
+ KASSERT(ctx != -1, ("tlb_store: invalid context"));
if (tlb & TLB_DTLB)
tlb_dtlb_store(va, ctx, tte);
if (tlb & TLB_ITLB)
@@ -240,6 +236,7 @@ tlb_store(u_int tlb, vm_offset_t va, u_long ctx, struct tte tte)
static __inline void
tlb_store_slot(u_int tlb, vm_offset_t va, u_long ctx, struct tte tte, int slot)
{
+ KASSERT(ctx != -1, ("tlb_store_slot: invalid context"));
if (tlb & TLB_DTLB)
tlb_dtlb_store_slot(va, ctx, tte, slot);
if (tlb & TLB_ITLB)
diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c
index 61ee811..993c6e5 100644
--- a/sys/sparc64/sparc64/genassym.c
+++ b/sys/sparc64/sparc64/genassym.c
@@ -34,6 +34,8 @@
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/signal.h>
+#include <sys/systm.h>
+#include <sys/ucontext.h>
#include <sys/user.h>
#include <sys/ucontext.h>
#include <sys/vmmeter.h>
@@ -153,6 +155,9 @@ ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
ASSYM(PC_CPUMASK, offsetof(struct pcpu, pc_cpumask));
ASSYM(PC_IQ, offsetof(struct pcpu, pc_iq));
ASSYM(PC_MID, offsetof(struct pcpu, pc_mid));
+ASSYM(PC_TLB_CTX, offsetof(struct pcpu, pc_tlb_ctx));
+ASSYM(PC_TLB_CTX_MAX, offsetof(struct pcpu, pc_tlb_ctx_max));
+ASSYM(PC_TLB_CTX_MIN, offsetof(struct pcpu, pc_tlb_ctx_min));
ASSYM(PC_SIZEOF, sizeof(struct pcpu));
ASSYM(IH_SHIFT, IH_SHIFT);
@@ -188,6 +193,8 @@ ASSYM(KEF_NEEDRESCHED, KEF_NEEDRESCHED);
ASSYM(MD_UTRAP, offsetof(struct mdproc, md_utrap));
+ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
+
ASSYM(P_COMM, offsetof(struct proc, p_comm));
ASSYM(P_MD, offsetof(struct proc, p_md));
ASSYM(P_PID, offsetof(struct proc, p_pid));
diff --git a/sys/sparc64/sparc64/machdep.c b/sys/sparc64/sparc64/machdep.c
index 0517ede..50e1daf 100644
--- a/sys/sparc64/sparc64/machdep.c
+++ b/sys/sparc64/sparc64/machdep.c
@@ -96,6 +96,7 @@
#include <machine/reg.h>
#include <machine/sigframe.h>
#include <machine/tick.h>
+#include <machine/tlb.h>
#include <machine/tstate.h>
#include <machine/ver.h>
@@ -296,6 +297,9 @@ sparc64_init(caddr_t mdp, u_int *state, u_int mid, u_int bootmid,
pc->pc_curthread = &thread0;
pc->pc_curpcb = thread0.td_pcb;
pc->pc_mid = mid;
+ pc->pc_tlb_ctx = TLB_CTX_USER_MIN;
+ pc->pc_tlb_ctx_min = TLB_CTX_USER_MIN;
+ pc->pc_tlb_ctx_max = TLB_CTX_USER_MAX;
/*
* Initialize global registers.
diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c
index e92402d..19409e9 100644
--- a/sys/sparc64/sparc64/pmap.c
+++ b/sys/sparc64/sparc64/pmap.c
@@ -155,13 +155,6 @@ vm_offset_t kernel_page;
*/
struct pmap kernel_pmap_store;
-/*
- * Map of free and in use hardware contexts and index of first potentially
- * free context.
- */
-static char pmap_context_map[PMAP_CONTEXT_MAX];
-static u_int pmap_context_base;
-
static boolean_t pmap_initialized = FALSE;
/* Convert a tte data field into a page mask */
@@ -175,12 +168,6 @@ static vm_offset_t pmap_page_masks[] = {
#define PMAP_TD_GET_MASK(d) pmap_page_masks[TD_GET_SIZE((d))]
/*
- * Allocate and free hardware context numbers.
- */
-static u_int pmap_context_alloc(void);
-static void pmap_context_destroy(u_int i);
-
-/*
* Allocate physical memory for use in pmap_bootstrap.
*/
static vm_offset_t pmap_bootstrap_alloc(vm_size_t size);
@@ -478,6 +465,43 @@ pmap_bootstrap_alloc(vm_size_t size)
panic("pmap_bootstrap_alloc");
}
+void
+pmap_context_rollover(void)
+{
+ u_long data;
+ int i;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+ CTR0(KTR_PMAP, "pmap_context_rollover");
+ for (i = 0; i < 64; i++) {
+ data = ldxa(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG);
+ if ((data & TD_V) != 0 && (data & TD_P) == 0) {
+ stxa(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG, 0);
+ membar(Sync);
+ }
+ data = ldxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG);
+ if ((data & TD_V) != 0 && (data & TD_P) == 0) {
+ stxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG, 0);
+ membar(Sync);
+ }
+ }
+ PCPU_SET(tlb_ctx, PCPU_GET(tlb_ctx_min));
+}
+
+static __inline u_int
+pmap_context_alloc(void)
+{
+ u_int context;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+ context = PCPU_GET(tlb_ctx);
+ if (context + 1 == PCPU_GET(tlb_ctx_max))
+ pmap_context_rollover();
+ else
+ PCPU_SET(tlb_ctx, context + 1);
+ return (context);
+}
+
/*
* Initialize the pmap module.
*/
@@ -1078,8 +1102,10 @@ pmap_swapin_thread(struct thread *td)
void
pmap_pinit0(pmap_t pm)
{
+ int i;
- pm->pm_context[PCPU_GET(cpuid)] = pmap_context_alloc();
+ for (i = 0; i < MAXCPU; i++)
+ pm->pm_context[i] = 0;
pm->pm_active = 0;
pm->pm_count = 1;
pm->pm_tsb = NULL;
@@ -1129,8 +1155,9 @@ pmap_pinit(pmap_t pm)
}
pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
+ for (i = 0; i < MAXCPU; i++)
+ pm->pm_context[i] = -1;
pm->pm_active = 0;
- pm->pm_context[PCPU_GET(cpuid)] = pmap_context_alloc();
pm->pm_count = 1;
TAILQ_INIT(&pm->pm_pvlist);
bzero(&pm->pm_stats, sizeof(pm->pm_stats));
@@ -1162,7 +1189,6 @@ pmap_release(pmap_t pm)
KASSERT(pmap_resident_count(pm) == 0,
("pmap_release: resident pages %ld != 0",
pmap_resident_count(pm)));
- pmap_context_destroy(pm->pm_context[PCPU_GET(cpuid)]);
TAILQ_FOREACH(m, &obj->memq, listq) {
if (vm_page_sleep_busy(m, FALSE, "pmaprl"))
continue;
@@ -1769,6 +1795,7 @@ pmap_mincore(pmap_t pm, vm_offset_t addr)
void
pmap_activate(struct thread *td)
{
+ struct vmspace *vm;
vm_offset_t tsb;
u_long context;
pmap_t pm;
@@ -1777,21 +1804,24 @@ pmap_activate(struct thread *td)
* Load all the data we need up front to encourage the compiler to
* not issue any loads while we have interrupts disable below.
*/
- pm = &td->td_proc->p_vmspace->vm_pmap;
- context = pm->pm_context[PCPU_GET(cpuid)];
+ vm = td->td_proc->p_vmspace;
+ pm = &vm->vm_pmap;
tsb = (vm_offset_t)pm->pm_tsb;
- KASSERT(context != 0, ("pmap_activate: activating nucleus context"));
- KASSERT(context != -1, ("pmap_activate: steal context"));
KASSERT(pm->pm_active == 0, ("pmap_activate: pmap already active?"));
+ KASSERT(pm->pm_context[PCPU_GET(cpuid)] != 0,
+ ("pmap_activate: activating nucleus context?"));
+ mtx_lock_spin(&sched_lock);
wrpr(pstate, 0, PSTATE_MMU);
mov(tsb, TSB_REG);
- wrpr(pstate, 0, PSTATE_NORMAL);
- pm->pm_active |= 1 << PCPU_GET(cpuid);
+ wrpr(pstate, 0, PSTATE_KERNEL);
+ context = pmap_context_alloc();
+ pm->pm_context[PCPU_GET(cpuid)] = context;
+ pm->pm_active |= PCPU_GET(cpumask);
stxa(AA_DMMU_PCXR, ASI_DMMU, context);
membar(Sync);
- wrpr(pstate, 0, PSTATE_KERNEL);
+ mtx_unlock_spin(&sched_lock);
}
vm_offset_t
@@ -1800,32 +1830,3 @@ pmap_addr_hint(vm_object_t object, vm_offset_t va, vm_size_t size)
return (va);
}
-
-/*
- * Allocate a hardware context number from the context map.
- */
-static u_int
-pmap_context_alloc(void)
-{
- u_int i;
-
- i = pmap_context_base;
- do {
- if (pmap_context_map[i] == 0) {
- pmap_context_map[i] = 1;
- pmap_context_base = (i + 1) & (PMAP_CONTEXT_MAX - 1);
- return (i);
- }
- } while ((i = (i + 1) & (PMAP_CONTEXT_MAX - 1)) != pmap_context_base);
- panic("pmap_context_alloc");
-}
-
-/*
- * Free a hardware context number back to the context map.
- */
-static void
-pmap_context_destroy(u_int i)
-{
-
- pmap_context_map[i] = 0;
-}
diff --git a/sys/sparc64/sparc64/swtch.S b/sys/sparc64/sparc64/swtch.S
index 731d586..70d95ec 100644
--- a/sys/sparc64/sparc64/swtch.S
+++ b/sys/sparc64/sparc64/swtch.S
@@ -31,6 +31,9 @@
#include <machine/ktr.h>
#include <machine/tstate.h>
+ .register %g2, #ignore
+ .register %g3, #ignore
+
#include "assym.s"
ENTRY(cpu_throw)
@@ -89,18 +92,14 @@ ENTRY(cpu_switch)
*/
.Lsw1:
#if KTR_COMPILE & KTR_PROC
- CATR(KTR_PROC, "cpu_switch: td=%d (%s) pc=%#lx fp=%#lx"
- , %l3, %l4, %l5, 7, 8, 9)
- ldx [%o0 + TD_PROC], %l4
- lduw [%l4 + P_PID], %l5
- stx %l5, [%l3 + KTR_PARM1]
- add %l4, P_COMM, %l5
- stx %l5, [%l3 + KTR_PARM2]
- ldx [%o0 + TD_PCB], %l4
- ldx [%l4 + PCB_PC], %l5
- stx %l5, [%l3 + KTR_PARM3]
- ldx [%l4 + PCB_FP], %l5
- stx %l5, [%l3 + KTR_PARM4]
+ CATR(KTR_PROC, "cpu_switch: new td=%p pc=%#lx fp=%#lx"
+ , %g1, %g2, %g3, 7, 8, 9)
+ stx %o0, [%g1 + KTR_PARM1]
+ ldx [%o0 + TD_PCB], %g2
+ ldx [%g2 + PCB_PC], %g3
+ stx %g3, [%g1 + KTR_PARM2]
+ ldx [%g2 + PCB_FP], %g3
+ stx %g3, [%g1 + KTR_PARM3]
9:
#endif
ldx [%o0 + TD_PCB], %o1
@@ -110,6 +109,9 @@ ENTRY(cpu_switch)
stx %o0, [PCPU(CURTHREAD)]
stx %o1, [PCPU(CURPCB)]
+ SET(sched_lock, %o3, %o2)
+ stx %o0, [%o2 + MTX_LOCK]
+
wrpr %g0, PSTATE_NORMAL, %pstate
mov %o1, PCB_REG
wrpr %g0, PSTATE_ALT, %pstate
@@ -124,31 +126,36 @@ ENTRY(cpu_switch)
ldx [%l2 + P_VMSPACE], %l2
ldx [%o2 + P_VMSPACE], %o2
+#if KTR_COMPILE & KTR_PROC
+ CATR(KTR_PROC, "cpu_switch: new vm=%p old vm=%p"
+ , %g1, %g2, %g3, 7, 8, 9)
+ stx %o2, [%g1 + KTR_PARM1]
+ stx %l2, [%g1 + KTR_PARM2]
+9:
+#endif
+
/*
- * If they're the same we are done.
+ * If they are the same we are done.
*/
cmp %l2, %o2
be,a,pn %xcc, 4f
nop
- /*
- * If the old process has nucleus context we can skip demapping the
- * tsb.
- */
+ lduw [PCPU(CPUID)], %o3
+ sllx %o3, INT_SHIFT, %o3
+ add %o2, VM_PMAP + PM_CONTEXT, %o4
+
lduw [PCPU(CPUID)], %l3
sllx %l3, INT_SHIFT, %l3
add %l2, VM_PMAP + PM_CONTEXT, %l4
- lduw [%l3 + %l4], %l5
- brz,a,pn %l5, 3f
- nop
/*
- * Demap the old process's tsb.
+ * If the old process has nucleus context we don't want to deactivate
+ * its pmap on this cpu.
*/
- ldx [%l2 + VM_PMAP + PM_TSB], %l3
- or %l3, TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %l3
- stxa %g0, [%l3] ASI_DMMU_DEMAP
- membar #Sync
+ lduw [%l3 + %l4], %l5
+ brz,a %l5, 2f
+ nop
/*
* Mark the pmap no longer active on this cpu.
@@ -159,42 +166,103 @@ ENTRY(cpu_switch)
stw %l3, [%l2 + VM_PMAP + PM_ACTIVE]
/*
+ * Take away its context.
+ */
+ lduw [PCPU(CPUID)], %l3
+ sllx %l3, INT_SHIFT, %l3
+ add %l2, VM_PMAP + PM_CONTEXT, %l4
+ mov -1, %l5
+ stw %l5, [%l3 + %l4]
+
+ /*
* If the new process has nucleus context we are done.
*/
-3: lduw [PCPU(CPUID)], %o3
- sllx %o3, INT_SHIFT, %o3
- add %o2, VM_PMAP + PM_CONTEXT, %o4
- lduw [%o3 + %o4], %o5
+2: lduw [%o3 + %o4], %o5
+
+#if KTR_COMPILE & KTR_PROC
+ CATR(KTR_PROC, "cpu_switch: ctx=%#lx"
+ , %g1, %g2, %g3, 7, 8, 9)
+ stx %o5, [%g1 + KTR_PARM1]
+9:
+#endif
+
brz,a,pn %o5, 4f
nop
/*
- * Install the new primary context.
+ * Find the current free tlb context for this cpu and install it as
+ * the new primary context.
*/
+ lduw [PCPU(TLB_CTX)], %o5
+ stw %o5, [%o3 + %o4]
mov AA_DMMU_PCXR, %o4
stxa %o5, [%o4] ASI_DMMU
- flush %o0
+ membar #Sync
+
+ /*
+ * See if we have run out of free contexts.
+ */
+ lduw [PCPU(TLB_CTX_MAX)], %o3
+
+#if KTR_COMPILE & KTR_PROC
+ CATR(KTR_PROC, "cpu_switch: ctx=%#lx next=%#lx max=%#lx"
+ , %g1, %g2, %g3, 7, 8, 9)
+ stx %o5, [%g1 + KTR_PARM1]
+ add %o5, 1, %g2
+ stx %g2, [%g1 + KTR_PARM2]
+ stx %o3, [%g1 + KTR_PARM3]
+9:
+#endif
+
+ add %o5, 1, %o5
+ cmp %o3, %o5
+ bne,a,pt %xcc, 3f
+ stw %o5, [PCPU(TLB_CTX)]
+
+#if KTR_COMPILE & KTR_PROC
+ CATR(KTR_PROC, "cpu_switch: context rollover"
+ , %g1, %g2, %g3, 7, 8, 9)
+9:
+#endif
+
+ /*
+ * We will start re-using contexts on the next switch. Flush all
+ * non-nucleus mappings from the tlb, and reset the next free context.
+ */
+ call pmap_context_rollover
+ nop
+ ldx [PCPU(CURTHREAD)], %o0
+ ldx [%o0 + TD_PROC], %o2
+ ldx [%o2 + P_VMSPACE], %o2
/*
* Mark the pmap as active on this cpu.
*/
- lduw [%o2 + VM_PMAP + PM_ACTIVE], %o3
+3: lduw [%o2 + VM_PMAP + PM_ACTIVE], %o3
lduw [PCPU(CPUMASK)], %o4
or %o3, %o4, %o3
stw %o3, [%o2 + VM_PMAP + PM_ACTIVE]
/*
- * Switch to mmu globals and install the preloaded tsb pointer.
+ * Load the address of the tsb, switch to mmu globals, and install
+ * the preloaded tsb pointer.
*/
ldx [%o2 + VM_PMAP + PM_TSB], %o3
wrpr %g0, PSTATE_MMU, %pstate
mov %o3, TSB_REG
wrpr %g0, PSTATE_KERNEL, %pstate
+4:
+#if KTR_COMPILE & KTR_PROC
+ CATR(KTR_PROC, "cpu_switch: return"
+ , %g1, %g2, %g3, 7, 8, 9)
+9:
+#endif
+
/*
* Done. Return and load the new process's window from the stack.
*/
-4: ret
+ ret
restore
END(cpu_switch)
@@ -209,6 +277,9 @@ ENTRY(savectx)
restore %g0, 0, %o0
END(savectx)
+/*
+ * void savefpctx(struct fpstate *);
+ */
ENTRY(savefpctx)
wr %g0, FPRS_FEF, %fprs
wr %g0, ASI_BLK_S, %asi
@@ -221,6 +292,9 @@ ENTRY(savefpctx)
wr %g0, 0, %fprs
END(savefpctx)
+/*
+ * void restorefpctx(struct fpstate *);
+ */
ENTRY(restorefpctx)
wr %g0, FPRS_FEF, %fprs
wr %g0, ASI_BLK_S, %asi
diff --git a/sys/sparc64/sparc64/swtch.s b/sys/sparc64/sparc64/swtch.s
index 731d586..70d95ec 100644
--- a/sys/sparc64/sparc64/swtch.s
+++ b/sys/sparc64/sparc64/swtch.s
@@ -31,6 +31,9 @@
#include <machine/ktr.h>
#include <machine/tstate.h>
+ .register %g2, #ignore
+ .register %g3, #ignore
+
#include "assym.s"
ENTRY(cpu_throw)
@@ -89,18 +92,14 @@ ENTRY(cpu_switch)
*/
.Lsw1:
#if KTR_COMPILE & KTR_PROC
- CATR(KTR_PROC, "cpu_switch: td=%d (%s) pc=%#lx fp=%#lx"
- , %l3, %l4, %l5, 7, 8, 9)
- ldx [%o0 + TD_PROC], %l4
- lduw [%l4 + P_PID], %l5
- stx %l5, [%l3 + KTR_PARM1]
- add %l4, P_COMM, %l5
- stx %l5, [%l3 + KTR_PARM2]
- ldx [%o0 + TD_PCB], %l4
- ldx [%l4 + PCB_PC], %l5
- stx %l5, [%l3 + KTR_PARM3]
- ldx [%l4 + PCB_FP], %l5
- stx %l5, [%l3 + KTR_PARM4]
+ CATR(KTR_PROC, "cpu_switch: new td=%p pc=%#lx fp=%#lx"
+ , %g1, %g2, %g3, 7, 8, 9)
+ stx %o0, [%g1 + KTR_PARM1]
+ ldx [%o0 + TD_PCB], %g2
+ ldx [%g2 + PCB_PC], %g3
+ stx %g3, [%g1 + KTR_PARM2]
+ ldx [%g2 + PCB_FP], %g3
+ stx %g3, [%g1 + KTR_PARM3]
9:
#endif
ldx [%o0 + TD_PCB], %o1
@@ -110,6 +109,9 @@ ENTRY(cpu_switch)
stx %o0, [PCPU(CURTHREAD)]
stx %o1, [PCPU(CURPCB)]
+ SET(sched_lock, %o3, %o2)
+ stx %o0, [%o2 + MTX_LOCK]
+
wrpr %g0, PSTATE_NORMAL, %pstate
mov %o1, PCB_REG
wrpr %g0, PSTATE_ALT, %pstate
@@ -124,31 +126,36 @@ ENTRY(cpu_switch)
ldx [%l2 + P_VMSPACE], %l2
ldx [%o2 + P_VMSPACE], %o2
+#if KTR_COMPILE & KTR_PROC
+ CATR(KTR_PROC, "cpu_switch: new vm=%p old vm=%p"
+ , %g1, %g2, %g3, 7, 8, 9)
+ stx %o2, [%g1 + KTR_PARM1]
+ stx %l2, [%g1 + KTR_PARM2]
+9:
+#endif
+
/*
- * If they're the same we are done.
+ * If they are the same we are done.
*/
cmp %l2, %o2
be,a,pn %xcc, 4f
nop
- /*
- * If the old process has nucleus context we can skip demapping the
- * tsb.
- */
+ lduw [PCPU(CPUID)], %o3
+ sllx %o3, INT_SHIFT, %o3
+ add %o2, VM_PMAP + PM_CONTEXT, %o4
+
lduw [PCPU(CPUID)], %l3
sllx %l3, INT_SHIFT, %l3
add %l2, VM_PMAP + PM_CONTEXT, %l4
- lduw [%l3 + %l4], %l5
- brz,a,pn %l5, 3f
- nop
/*
- * Demap the old process's tsb.
+ * If the old process has nucleus context we don't want to deactivate
+ * its pmap on this cpu.
*/
- ldx [%l2 + VM_PMAP + PM_TSB], %l3
- or %l3, TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %l3
- stxa %g0, [%l3] ASI_DMMU_DEMAP
- membar #Sync
+ lduw [%l3 + %l4], %l5
+ brz,a %l5, 2f
+ nop
/*
* Mark the pmap no longer active on this cpu.
@@ -159,42 +166,103 @@ ENTRY(cpu_switch)
stw %l3, [%l2 + VM_PMAP + PM_ACTIVE]
/*
+ * Take away its context.
+ */
+ lduw [PCPU(CPUID)], %l3
+ sllx %l3, INT_SHIFT, %l3
+ add %l2, VM_PMAP + PM_CONTEXT, %l4
+ mov -1, %l5
+ stw %l5, [%l3 + %l4]
+
+ /*
* If the new process has nucleus context we are done.
*/
-3: lduw [PCPU(CPUID)], %o3
- sllx %o3, INT_SHIFT, %o3
- add %o2, VM_PMAP + PM_CONTEXT, %o4
- lduw [%o3 + %o4], %o5
+2: lduw [%o3 + %o4], %o5
+
+#if KTR_COMPILE & KTR_PROC
+ CATR(KTR_PROC, "cpu_switch: ctx=%#lx"
+ , %g1, %g2, %g3, 7, 8, 9)
+ stx %o5, [%g1 + KTR_PARM1]
+9:
+#endif
+
brz,a,pn %o5, 4f
nop
/*
- * Install the new primary context.
+ * Find the current free tlb context for this cpu and install it as
+ * the new primary context.
*/
+ lduw [PCPU(TLB_CTX)], %o5
+ stw %o5, [%o3 + %o4]
mov AA_DMMU_PCXR, %o4
stxa %o5, [%o4] ASI_DMMU
- flush %o0
+ membar #Sync
+
+ /*
+ * See if we have run out of free contexts.
+ */
+ lduw [PCPU(TLB_CTX_MAX)], %o3
+
+#if KTR_COMPILE & KTR_PROC
+ CATR(KTR_PROC, "cpu_switch: ctx=%#lx next=%#lx max=%#lx"
+ , %g1, %g2, %g3, 7, 8, 9)
+ stx %o5, [%g1 + KTR_PARM1]
+ add %o5, 1, %g2
+ stx %g2, [%g1 + KTR_PARM2]
+ stx %o3, [%g1 + KTR_PARM3]
+9:
+#endif
+
+ add %o5, 1, %o5
+ cmp %o3, %o5
+ bne,a,pt %xcc, 3f
+ stw %o5, [PCPU(TLB_CTX)]
+
+#if KTR_COMPILE & KTR_PROC
+ CATR(KTR_PROC, "cpu_switch: context rollover"
+ , %g1, %g2, %g3, 7, 8, 9)
+9:
+#endif
+
+ /*
+ * We will start re-using contexts on the next switch. Flush all
+ * non-nucleus mappings from the tlb, and reset the next free context.
+ */
+ call pmap_context_rollover
+ nop
+ ldx [PCPU(CURTHREAD)], %o0
+ ldx [%o0 + TD_PROC], %o2
+ ldx [%o2 + P_VMSPACE], %o2
/*
* Mark the pmap as active on this cpu.
*/
- lduw [%o2 + VM_PMAP + PM_ACTIVE], %o3
+3: lduw [%o2 + VM_PMAP + PM_ACTIVE], %o3
lduw [PCPU(CPUMASK)], %o4
or %o3, %o4, %o3
stw %o3, [%o2 + VM_PMAP + PM_ACTIVE]
/*
- * Switch to mmu globals and install the preloaded tsb pointer.
+ * Load the address of the tsb, switch to mmu globals, and install
+ * the preloaded tsb pointer.
*/
ldx [%o2 + VM_PMAP + PM_TSB], %o3
wrpr %g0, PSTATE_MMU, %pstate
mov %o3, TSB_REG
wrpr %g0, PSTATE_KERNEL, %pstate
+4:
+#if KTR_COMPILE & KTR_PROC
+ CATR(KTR_PROC, "cpu_switch: return"
+ , %g1, %g2, %g3, 7, 8, 9)
+9:
+#endif
+
/*
* Done. Return and load the new process's window from the stack.
*/
-4: ret
+ ret
restore
END(cpu_switch)
@@ -209,6 +277,9 @@ ENTRY(savectx)
restore %g0, 0, %o0
END(savectx)
+/*
+ * void savefpctx(struct fpstate *);
+ */
ENTRY(savefpctx)
wr %g0, FPRS_FEF, %fprs
wr %g0, ASI_BLK_S, %asi
@@ -221,6 +292,9 @@ ENTRY(savefpctx)
wr %g0, 0, %fprs
END(savefpctx)
+/*
+ * void restorefpctx(struct fpstate *);
+ */
ENTRY(restorefpctx)
wr %g0, FPRS_FEF, %fprs
wr %g0, ASI_BLK_S, %asi
OpenPOWER on IntegriCloud