summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/i386/i386/apic_vector.s16
-rw-r--r--sys/i386/i386/db_trace.c3
-rw-r--r--sys/i386/i386/mp_machdep.c7
-rw-r--r--sys/i386/i386/pmap.c99
-rw-r--r--sys/i386/i386/swtch.s6
-rw-r--r--sys/i386/include/smp.h4
-rw-r--r--sys/i386/xen/mp_machdep.c11
-rw-r--r--sys/i386/xen/pmap.c99
8 files changed, 242 insertions, 3 deletions
diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s
index a78b601..86e78c4 100644
--- a/sys/i386/i386/apic_vector.s
+++ b/sys/i386/i386/apic_vector.s
@@ -357,4 +357,20 @@ IDTVEC(rendezvous)
POP_FRAME
iret
+/*
+ * Clean up when we lose out on the lazy context switch optimization.
+ * ie: when we are about to release a PTD but a cpu is still borrowing it.
+ */
+ SUPERALIGN_TEXT
+IDTVEC(lazypmap)
+ PUSH_FRAME
+ SET_KERNEL_SREGS
+ cld
+
+ call pmap_lazyfix_action
+
+ movl lapic, %eax
+ movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
+ POP_FRAME
+ iret
#endif /* SMP */
diff --git a/sys/i386/i386/db_trace.c b/sys/i386/i386/db_trace.c
index 79da4dc..445d9c5 100644
--- a/sys/i386/i386/db_trace.c
+++ b/sys/i386/i386/db_trace.c
@@ -312,7 +312,8 @@ db_nextframe(struct i386_frame **fp, db_addr_t *ip, struct thread *td)
frame_type = TRAP_TIMERINT;
else if (strcmp(name, "Xcpustop") == 0 ||
strcmp(name, "Xrendezvous") == 0 ||
- strcmp(name, "Xipi_intr_bitmap_handler") == 0)
+ strcmp(name, "Xipi_intr_bitmap_handler") == 0 ||
+ strcmp(name, "Xlazypmap") == 0)
frame_type = TRAP_INTERRUPT;
}
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 966e46e..fbaae89 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -166,6 +166,7 @@ u_long *ipi_invlrng_counts[MAXCPU];
u_long *ipi_invlpg_counts[MAXCPU];
u_long *ipi_invlcache_counts[MAXCPU];
u_long *ipi_rendezvous_counts[MAXCPU];
+u_long *ipi_lazypmap_counts[MAXCPU];
static u_long *ipi_hardclock_counts[MAXCPU];
#endif
@@ -575,6 +576,10 @@ cpu_mp_start(void)
setidt(IPI_INVLCACHE, IDTVEC(invlcache),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ /* Install an inter-CPU IPI for lazy pmap release */
+ setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
/* Install an inter-CPU IPI for all-CPU rendezvous */
setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
@@ -1718,6 +1723,8 @@ mp_ipi_intrcnt(void *dummy)
intrcnt_add(buf, &ipi_ast_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
intrcnt_add(buf, &ipi_rendezvous_counts[i]);
+ snprintf(buf, sizeof(buf), "cpu%d:lazypmap", i);
+ intrcnt_add(buf, &ipi_lazypmap_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
intrcnt_add(buf, &ipi_hardclock_counts[i]);
}
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index c85f138..d0699b5 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -1900,6 +1900,104 @@ retry:
* Pmap allocation/deallocation routines.
***************************************************/
+#ifdef SMP
+/*
+ * Deal with a SMP shootdown of other users of the pmap that we are
+ * trying to dispose of. This can be a bit hairy.
+ */
+static cpuset_t *lazymask;
+static u_int lazyptd;
+static volatile u_int lazywait;
+
+void pmap_lazyfix_action(void);
+
+void
+pmap_lazyfix_action(void)
+{
+
+#ifdef COUNT_IPIS
+ (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
+#endif
+ if (rcr3() == lazyptd)
+ load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+ CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask);
+ atomic_store_rel_int(&lazywait, 1);
+}
+
+static void
+pmap_lazyfix_self(cpuset_t mymask)
+{
+
+ if (rcr3() == lazyptd)
+ load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+ CPU_NAND_ATOMIC(lazymask, &mymask);
+}
+
+
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+ cpuset_t mymask, mask;
+ u_int spins;
+ int lbs;
+
+ mask = pmap->pm_active;
+ while (!CPU_EMPTY(&mask)) {
+ spins = 50000000;
+
+ /* Find least significant set bit. */
+ lsb = cpusetobj_ffs(&mask);
+ lsb--;
+ CPU_SETOF(lsb, &mask);
+ mtx_lock_spin(&smp_ipi_mtx);
+#ifdef PAE
+ lazyptd = vtophys(pmap->pm_pdpt);
+#else
+ lazyptd = vtophys(pmap->pm_pdir);
+#endif
+ mymask = PCPU_GET(cpumask);
+ if (mask == mymask) {
+ lazymask = &pmap->pm_active;
+ pmap_lazyfix_self(mymask);
+ } else {
+ atomic_store_rel_int((u_int *)&lazymask,
+ (u_int)&pmap->pm_active);
+ atomic_store_rel_int(&lazywait, 0);
+ ipi_selected(mask, IPI_LAZYPMAP);
+ while (lazywait == 0) {
+ ia32_pause();
+ if (--spins == 0)
+ break;
+ }
+ }
+ mtx_unlock_spin(&smp_ipi_mtx);
+ if (spins == 0)
+ printf("pmap_lazyfix: spun for 50000000\n");
+ mask = pmap->pm_active;
+ }
+}
+
+#else /* SMP */
+
+/*
+ * Cleaning up on uniprocessor is easy. For various reasons, we're
+ * unlikely to have to even execute this code, including the fact
+ * that the cleanup is deferred until the parent does a wait(2), which
+ * means that another userland process has run.
+ */
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+ u_int cr3;
+
+ cr3 = vtophys(pmap->pm_pdir);
+ if (cr3 == rcr3()) {
+ load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+ CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active);
+ }
+}
+#endif /* SMP */
+
/*
* Release any resources held by the given physical map.
* Called when a pmap initialized by pmap_pinit is being released.
@@ -1917,6 +2015,7 @@ pmap_release(pmap_t pmap)
KASSERT(pmap->pm_root == NULL,
("pmap_release: pmap has reserved page table page(s)"));
+ pmap_lazyfix(pmap);
mtx_lock_spin(&allpmaps_lock);
LIST_REMOVE(pmap, pm_list);
mtx_unlock_spin(&allpmaps_lock);
diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s
index 6547569..680b032 100644
--- a/sys/i386/i386/swtch.s
+++ b/sys/i386/i386/swtch.s
@@ -174,6 +174,12 @@ ENTRY(cpu_switch)
/* switch address space */
movl PCB_CR3(%edx),%eax
+#ifdef PAE
+ cmpl %eax,IdlePDPT /* Kernel address space? */
+#else
+ cmpl %eax,IdlePTD /* Kernel address space? */
+#endif
+ je sw0
READ_CR3(%ebx) /* The same address space? */
cmpl %ebx,%eax
je sw0
diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h
index cdec9c8..33b2578 100644
--- a/sys/i386/include/smp.h
+++ b/sys/i386/include/smp.h
@@ -42,6 +42,7 @@ extern u_long *ipi_invlrng_counts[MAXCPU];
extern u_long *ipi_invlpg_counts[MAXCPU];
extern u_long *ipi_invlcache_counts[MAXCPU];
extern u_long *ipi_rendezvous_counts[MAXCPU];
+extern u_long *ipi_lazypmap_counts[MAXCPU];
#endif
/* IPI handlers */
@@ -52,7 +53,8 @@ inthand_t
IDTVEC(invlcache), /* Write back and invalidate cache */
IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */
IDTVEC(cpustop), /* CPU stops & waits to be restarted */
- IDTVEC(rendezvous); /* handle CPU rendezvous */
+ IDTVEC(rendezvous), /* handle CPU rendezvous */
+ IDTVEC(lazypmap); /* handle lazy pmap release */
/* functions in mp_machdep.c */
void cpu_add(u_int apic_id, char boot_cpu);
diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c
index 1565859..2d05596 100644
--- a/sys/i386/xen/mp_machdep.c
+++ b/sys/i386/xen/mp_machdep.c
@@ -154,6 +154,7 @@ static cpuset_t hyperthreading_cpus_mask;
extern void Xhypervisor_callback(void);
extern void failsafe_callback(void);
+extern void pmap_lazyfix_action(void);
struct cpu_group *
cpu_topo(void)
@@ -341,16 +342,24 @@ iv_invlcache(uintptr_t a, uintptr_t b)
atomic_add_int(&smp_tlb_wait, 1);
}
+static void
+iv_lazypmap(uintptr_t a, uintptr_t b)
+{
+ pmap_lazyfix_action();
+ atomic_add_int(&smp_tlb_wait, 1);
+}
+
/*
* These start from "IPI offset" APIC_IPI_INTS
*/
-static call_data_func_t *ipi_vectors[5] =
+static call_data_func_t *ipi_vectors[6] =
{
iv_rendezvous,
iv_invltlb,
iv_invlpg,
iv_invlrng,
iv_invlcache,
+ iv_lazypmap,
};
/*
diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c
index 74ace61..1149b6f 100644
--- a/sys/i386/xen/pmap.c
+++ b/sys/i386/xen/pmap.c
@@ -1683,6 +1683,104 @@ retry:
* Pmap allocation/deallocation routines.
***************************************************/
+#ifdef SMP
+/*
+ * Deal with a SMP shootdown of other users of the pmap that we are
+ * trying to dispose of. This can be a bit hairy.
+ */
+static cpuset_t *lazymask;
+static u_int lazyptd;
+static volatile u_int lazywait;
+
+void pmap_lazyfix_action(void);
+
+void
+pmap_lazyfix_action(void)
+{
+
+#ifdef COUNT_IPIS
+ (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
+#endif
+ if (rcr3() == lazyptd)
+ load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+ CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask);
+ atomic_store_rel_int(&lazywait, 1);
+}
+
+static void
+pmap_lazyfix_self(cpuset_t mymask)
+{
+
+ if (rcr3() == lazyptd)
+ load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+ CPU_NAND_ATOMIC(lazymask, &mymask);
+}
+
+
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+ cpuset_t mymask, mask;
+ u_int spins;
+ int lsb;
+
+ mask = pmap->pm_active;
+ while (!CPU_EMPTY(&mask)) {
+ spins = 50000000;
+
+ /* Find least significant set bit. */
+ lsb = cpusetobj_ffs(&mask);
+ lsb--;
+ CPU_SETOF(lsb, &mask);
+ mtx_lock_spin(&smp_ipi_mtx);
+#ifdef PAE
+ lazyptd = vtophys(pmap->pm_pdpt);
+#else
+ lazyptd = vtophys(pmap->pm_pdir);
+#endif
+ mymask = PCPU_GET(cpumask);
+ if (mask == mymask) {
+ lazymask = &pmap->pm_active;
+ pmap_lazyfix_self(mymask);
+ } else {
+ atomic_store_rel_int((u_int *)&lazymask,
+ (u_int)&pmap->pm_active);
+ atomic_store_rel_int(&lazywait, 0);
+ ipi_selected(mask, IPI_LAZYPMAP);
+ while (lazywait == 0) {
+ ia32_pause();
+ if (--spins == 0)
+ break;
+ }
+ }
+ mtx_unlock_spin(&smp_ipi_mtx);
+ if (spins == 0)
+ printf("pmap_lazyfix: spun for 50000000\n");
+ mask = pmap->pm_active;
+ }
+}
+
+#else /* SMP */
+
+/*
+ * Cleaning up on uniprocessor is easy. For various reasons, we're
+ * unlikely to have to even execute this code, including the fact
+ * that the cleanup is deferred until the parent does a wait(2), which
+ * means that another userland process has run.
+ */
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+ u_int cr3;
+
+ cr3 = vtophys(pmap->pm_pdir);
+ if (cr3 == rcr3()) {
+ load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+ CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active);
+ }
+}
+#endif /* SMP */
+
/*
* Release any resources held by the given physical map.
* Called when a pmap initialized by pmap_pinit is being released.
@@ -1708,6 +1806,7 @@ pmap_release(pmap_t pmap)
mtx_lock(&createdelete_lock);
#endif
+ pmap_lazyfix(pmap);
mtx_lock_spin(&allpmaps_lock);
LIST_REMOVE(pmap, pm_list);
mtx_unlock_spin(&allpmaps_lock);
OpenPOWER on IntegriCloud