summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2015-05-09 19:11:01 +0000
committerkib <kib@FreeBSD.org>2015-05-09 19:11:01 +0000
commit3fb738761ee4e1438402fd537fc893b44ae9312b (patch)
tree4e5ad64b4a3d4406e2c171bb5e27a1e45fdf9ef0 /sys
parent332806b9467d4a717a0589454d9756248426fd7c (diff)
downloadFreeBSD-src-3fb738761ee4e1438402fd537fc893b44ae9312b.zip
FreeBSD-src-3fb738761ee4e1438402fd537fc893b44ae9312b.tar.gz
Rewrite amd64 PCID implementation to follow an algorithm described in
the Vahalia' "Unix Internals" section 15.12 "Other TLB Consistency Algorithms". The same algorithm is already utilized by the MIPS pmap to handle ASIDs. The PCID for the address space is now allocated per-cpu during context switch to the thread using pmap, when no PCID on the cpu was ever allocated, or the current PCID is invalidated. If the PCID is reused, bit 63 of %cr3 can be set to avoid TLB flush. Each cpu has PCID' algorithm generation count, which is saved in the pmap pcpu block when pcpu PCID is allocated. On invalidation, the pmap generation count is zeroed, which signals the context switch code that already allocated PCID is no longer valid. The implication is the TLB shootdown for the given cpu/address space, due to the allocation of new PCID. The pm_save mask is no longer has to be tracked, which (significantly) reduces the targets of the TLB shootdown IPIs. Previously, pm_save was reset only on pmap_invalidate_all(), which made it accumulate the cpuids of all processors on which the thread was scheduled between full TLB shootdowns. Besides reducing the amount of TLB shootdowns and removing atomics to update pm_saves in the context switch code, the algorithm is much simpler than the maintanence of pm_save and selection of the right address space in the shootdown IPI handler. Reviewed by: alc Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 3 weeks
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/apic_vector.S18
-rw-r--r--sys/amd64/amd64/cpu_switch.S75
-rw-r--r--sys/amd64/amd64/genassym.c2
-rw-r--r--sys/amd64/amd64/machdep.c1
-rw-r--r--sys/amd64/amd64/mp_machdep.c254
-rw-r--r--sys/amd64/amd64/pmap.c341
-rw-r--r--sys/amd64/amd64/vm_machdep.c2
-rw-r--r--sys/amd64/include/cpufunc.h5
-rw-r--r--sys/amd64/include/pcpu.h4
-rw-r--r--sys/amd64/include/pmap.h15
-rw-r--r--sys/amd64/include/smp.h19
-rw-r--r--sys/x86/include/specialreg.h1
-rw-r--r--sys/x86/xen/xen_apic.c20
13 files changed, 268 insertions, 489 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index f23f162..a1279e6 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -203,30 +203,28 @@ invltlb_ret:
jmp doreti_iret
SUPERALIGN_TEXT
+IDTVEC(invltlb)
+ PUSH_FRAME
+
+ call invltlb_handler
+ jmp invltlb_ret
+
IDTVEC(invltlb_pcid)
PUSH_FRAME
call invltlb_pcid_handler
jmp invltlb_ret
-
- SUPERALIGN_TEXT
-IDTVEC(invltlb)
+IDTVEC(invltlb_invpcid)
PUSH_FRAME
- call invltlb_handler
+ call invltlb_invpcid_handler
jmp invltlb_ret
/*
* Single page TLB shootdown
*/
.text
- SUPERALIGN_TEXT
-IDTVEC(invlpg_pcid)
- PUSH_FRAME
-
- call invlpg_pcid_handler
- jmp invltlb_ret
SUPERALIGN_TEXT
IDTVEC(invlpg)
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index eb0ee8b..e292797 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -69,16 +69,10 @@
* %rsi = newtd
*/
ENTRY(cpu_throw)
- movl PCPU(CPUID),%eax
- testq %rdi,%rdi
- jz 1f
- /* release bit from old pm_active */
- movq PCPU(CURPMAP),%rdx
- LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */
-1:
- movq TD_PCB(%rsi),%r8 /* newtd->td_pcb */
- movq PCB_CR3(%r8),%rcx /* new address space */
- jmp swact
+ movq %rsi,%r12
+ movq %rsi,%rdi
+ call pmap_activate_sw
+ jmp sw1
END(cpu_throw)
/*
@@ -132,59 +126,20 @@ ctx_switch_xsave:
xorl %eax,%eax
movq %rax,PCPU(FPCURTHREAD)
3:
-
/* Save is done. Now fire up new thread. Leave old vmspace. */
- movq TD_PCB(%rsi),%r8
-
- /* switch address space */
- movq PCB_CR3(%r8),%rcx
- movq %cr3,%rax
- cmpq %rcx,%rax /* Same address space? */
- jne swinact
- SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */
- jmp sw1
-swinact:
- movl PCPU(CPUID),%eax
- /* Release bit from old pmap->pm_active */
- movq PCPU(CURPMAP),%r12
- LK btrl %eax,PM_ACTIVE(%r12) /* clear old */
- SETLK %rdx,TD_LOCK(%rdi) /* Release the old thread */
-swact:
- /* Set bit in new pmap->pm_active */
- movq TD_PROC(%rsi),%rdx /* newproc */
- movq P_VMSPACE(%rdx), %rdx
- addq $VM_PMAP,%rdx
- cmpl $-1,PM_PCID(%rdx)
- je 1f
- LK btsl %eax,PM_SAVE(%rdx)
- jnc 1f
- btsq $63,%rcx /* CR3_PCID_SAVE */
- incq PCPU(PM_SAVE_CNT)
-1:
- movq %rcx,%cr3 /* new address space */
- LK btsl %eax,PM_ACTIVE(%rdx) /* set new */
- movq %rdx,PCPU(CURPMAP)
-
- /*
- * We might lose the race and other CPU might have changed
- * the pmap after we set our bit in pmap->pm_save. Recheck.
- * Reload %cr3 with CR3_PCID_SAVE bit cleared if pmap was
- * modified, causing TLB flush for this pcid.
- */
- btrq $63,%rcx
- jnc 1f
- LK btsl %eax,PM_SAVE(%rdx)
- jc 1f
- decq PCPU(PM_SAVE_CNT)
- movq %rcx,%cr3
-1:
-
+ movq %rsi,%r12
+ movq %rdi,%r13
+ movq %rdx,%r15
+ movq %rsi,%rdi
+ callq pmap_activate_sw
+ SETLK %r15,TD_LOCK(%r13) /* Release the old thread */
sw1:
+ movq TD_PCB(%r12),%r8
#if defined(SCHED_ULE) && defined(SMP)
/* Wait for the new thread to become unblocked */
movq $blocked_lock, %rdx
1:
- movq TD_LOCK(%rsi),%rcx
+ movq TD_LOCK(%r12),%rcx
cmpq %rcx, %rdx
pause
je 1b
@@ -195,13 +150,13 @@ sw1:
*/
/* Skip loading user fsbase/gsbase for kthreads */
- testl $TDP_KTHREAD,TD_PFLAGS(%rsi)
+ testl $TDP_KTHREAD,TD_PFLAGS(%r12)
jnz do_kthread
/*
* Load ldt register
*/
- movq TD_PROC(%rsi),%rcx
+ movq TD_PROC(%r12),%rcx
cmpq $0, P_MD+MD_LDT(%rcx)
jne do_ldt
xorl %eax,%eax
@@ -238,7 +193,7 @@ done_tss:
movq %r8,PCPU(CURPCB)
/* Update the TSS_RSP0 pointer for the next interrupt */
movq %r8,COMMON_TSS_RSP0(%rdx)
- movq %rsi,PCPU(CURTHREAD) /* into next thread */
+ movq %r12,PCPU(CURTHREAD) /* into next thread */
/* Test if debug registers should be restored. */
testl $PCB_DBREGS,PCB_FLAGS(%r8)
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 3ffefc0..11012c4 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -71,8 +71,6 @@ __FBSDID("$FreeBSD$");
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
-ASSYM(PM_SAVE, offsetof(struct pmap, pm_save));
-ASSYM(PM_PCID, offsetof(struct pmap, pm_pcid));
ASSYM(P_MD, offsetof(struct proc, p_md));
ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 3230937..7cd58b1 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1718,7 +1718,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0;
- thread0.td_pcb->pcb_cr3 = KPML4phys; /* PCID 0 is reserved for kernel */
thread0.td_frame = &proc0_tf;
env = kern_getenv("kernelname");
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 83ca548..e91e6d5 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -88,12 +88,9 @@ char *doublefault_stack;
char *nmi_stack;
/* Variables needed for SMP tlb shootdown. */
-vm_offset_t smp_tlb_addr2;
-struct invpcid_descr smp_tlb_invpcid;
+static vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
+static pmap_t smp_tlb_pmap;
volatile int smp_tlb_wait;
-uint64_t pcid_cr3;
-pmap_t smp_tlb_pmap;
-extern int invpcid_works;
extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
@@ -139,14 +136,17 @@ cpu_mp_start(void)
/* Install an inter-CPU IPI for TLB invalidation */
if (pmap_pcid_enabled) {
- setidt(IPI_INVLTLB, IDTVEC(invltlb_pcid), SDT_SYSIGT,
- SEL_KPL, 0);
- setidt(IPI_INVLPG, IDTVEC(invlpg_pcid), SDT_SYSIGT,
- SEL_KPL, 0);
+ if (invpcid_works) {
+ setidt(IPI_INVLTLB, IDTVEC(invltlb_invpcid),
+ SDT_SYSIGT, SEL_KPL, 0);
+ } else {
+ setidt(IPI_INVLTLB, IDTVEC(invltlb_pcid), SDT_SYSIGT,
+ SEL_KPL, 0);
+ }
} else {
setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
}
+ setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
/* Install an inter-CPU IPI for cache invalidation. */
@@ -242,6 +242,9 @@ init_secondary(void)
pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
GUSERLDT_SEL];
+ pc->pc_curpmap = kernel_pmap;
+ pc->pc_pcid_gen = 1;
+ pc->pc_pcid_next = PMAP_PCID_KERN + 1;
/* Save the per-cpu pointer for use by the NMI handler. */
np->np_pcpu = (register_t) pc;
@@ -407,35 +410,8 @@ start_ap(int apic_id)
}
/*
- * Flush the TLB on all other CPU's
+ * Flush the TLB on other CPU's
*/
-static void
-smp_tlb_shootdown(u_int vector, pmap_t pmap, vm_offset_t addr1,
- vm_offset_t addr2)
-{
- u_int ncpu;
-
- ncpu = mp_ncpus - 1; /* does not shootdown self */
- if (ncpu < 1)
- return; /* no other cpus */
- if (!(read_rflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- smp_tlb_invpcid.addr = addr1;
- if (pmap == NULL) {
- smp_tlb_invpcid.pcid = 0;
- } else {
- smp_tlb_invpcid.pcid = pmap->pm_pcid;
- pcid_cr3 = pmap->pm_cr3;
- }
- smp_tlb_addr2 = addr2;
- smp_tlb_pmap = pmap;
- atomic_store_rel_int(&smp_tlb_wait, 0);
- ipi_all_but_self(vector);
- while (smp_tlb_wait < ncpu)
- ia32_pause();
- mtx_unlock_spin(&smp_ipi_mtx);
-}
static void
smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
@@ -443,7 +419,11 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
{
int cpu, ncpu, othercpus;
- othercpus = mp_ncpus - 1;
+ othercpus = mp_ncpus - 1; /* does not shootdown self */
+
+ /*
+ * Check for other cpus. Return if none.
+ */
if (CPU_ISFULLSET(&mask)) {
if (othercpus < 1)
return;
@@ -452,16 +432,11 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
if (CPU_EMPTY(&mask))
return;
}
+
if (!(read_rflags() & PSL_I))
panic("%s: interrupts disabled", __func__);
mtx_lock_spin(&smp_ipi_mtx);
- smp_tlb_invpcid.addr = addr1;
- if (pmap == NULL) {
- smp_tlb_invpcid.pcid = 0;
- } else {
- smp_tlb_invpcid.pcid = pmap->pm_pcid;
- pcid_cr3 = pmap->pm_cr3;
- }
+ smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_pmap = pmap;
atomic_store_rel_int(&smp_tlb_wait, 0);
@@ -485,65 +460,39 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
}
void
-smp_invlpg(pmap_t pmap, vm_offset_t addr)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLPG, pmap, addr, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_page++;
-#endif
- }
-}
-
-void
-smp_invlpg_range(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLRNG, pmap, addr1, addr2);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_range++;
- ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
-#endif
- }
-}
-
-void
smp_masked_invltlb(cpuset_t mask, pmap_t pmap)
{
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
- ipi_masked_global++;
+ ipi_global++;
#endif
}
}
void
-smp_masked_invlpg(cpuset_t mask, pmap_t pmap, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
{
if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0);
+ smp_targeted_tlb_shootdown(mask, IPI_INVLPG, NULL, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
- ipi_masked_page++;
+ ipi_page++;
#endif
}
}
void
-smp_masked_invlpg_range(cpuset_t mask, pmap_t pmap, vm_offset_t addr1,
- vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
{
if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1,
- addr2);
+ smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, NULL,
+ addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
- ipi_masked_range++;
- ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
+ ipi_range++;
+ ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
}
@@ -552,19 +501,9 @@ void
smp_cache_flush(void)
{
- if (smp_started)
- smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0);
-}
-
-void
-smp_invltlb(pmap_t pmap)
-{
-
if (smp_started) {
- smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_global++;
-#endif
+ smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL,
+ 0, 0);
}
}
@@ -586,10 +525,10 @@ invltlb_handler(void)
}
void
-invltlb_pcid_handler(void)
+invltlb_invpcid_handler(void)
{
- uint64_t cr3;
- u_int cpuid;
+ struct invpcid_descr d;
+
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
@@ -597,49 +536,45 @@ invltlb_pcid_handler(void)
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- if (smp_tlb_invpcid.pcid != (uint64_t)-1 &&
- smp_tlb_invpcid.pcid != 0) {
- if (invpcid_works) {
- invpcid(&smp_tlb_invpcid, INVPCID_CTX);
- } else {
- /* Otherwise reload %cr3 twice. */
- cr3 = rcr3();
- if (cr3 != pcid_cr3) {
- load_cr3(pcid_cr3);
- cr3 |= CR3_PCID_SAVE;
- }
- load_cr3(cr3);
- }
- } else {
- invltlb_globpcid();
- }
- if (smp_tlb_pmap != NULL) {
- cpuid = PCPU_GET(cpuid);
- if (!CPU_ISSET(cpuid, &smp_tlb_pmap->pm_active))
- CPU_CLR_ATOMIC(cpuid, &smp_tlb_pmap->pm_save);
- }
-
+ d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
+ d.pad = 0;
+ d.addr = 0;
+ invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB :
+ INVPCID_CTX);
atomic_add_int(&smp_tlb_wait, 1);
}
void
-invlpg_handler(void)
+invltlb_pcid_handler(void)
{
#ifdef COUNT_XINVLTLB_HITS
- xhits_pg[PCPU_GET(cpuid)]++;
+ xhits_gbl[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
#ifdef COUNT_IPIS
- (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
+ (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- invlpg(smp_tlb_invpcid.addr);
+ if (smp_tlb_pmap == kernel_pmap) {
+ invltlb_globpcid();
+ } else {
+ /*
+ * The current pmap might not be equal to
+ * smp_tlb_pmap. The clearing of the pm_gen in
+ * pmap_invalidate_all() takes care of TLB
+ * invalidation when switching to the pmap on this
+ * CPU.
+ */
+ if (PCPU_GET(curpmap) == smp_tlb_pmap) {
+ load_cr3(smp_tlb_pmap->pm_cr3 |
+ smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid);
+ }
+ }
atomic_add_int(&smp_tlb_wait, 1);
}
void
-invlpg_pcid_handler(void)
+invlpg_handler(void)
{
- uint64_t cr3;
#ifdef COUNT_XINVLTLB_HITS
xhits_pg[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
@@ -647,45 +582,15 @@ invlpg_pcid_handler(void)
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- if (smp_tlb_invpcid.pcid == (uint64_t)-1) {
- invltlb_globpcid();
- } else if (smp_tlb_invpcid.pcid == 0) {
- invlpg(smp_tlb_invpcid.addr);
- } else if (invpcid_works) {
- invpcid(&smp_tlb_invpcid, INVPCID_ADDR);
- } else {
- /*
- * PCID supported, but INVPCID is not.
- * Temporarily switch to the target address
- * space and do INVLPG.
- */
- cr3 = rcr3();
- if (cr3 != pcid_cr3)
- load_cr3(pcid_cr3 | CR3_PCID_SAVE);
- invlpg(smp_tlb_invpcid.addr);
- load_cr3(cr3 | CR3_PCID_SAVE);
- }
-
+ invlpg(smp_tlb_addr1);
atomic_add_int(&smp_tlb_wait, 1);
}
-static inline void
-invlpg_range(vm_offset_t start, vm_offset_t end)
-{
-
- do {
- invlpg(start);
- start += PAGE_SIZE;
- } while (start < end);
-}
-
void
invlrng_handler(void)
{
- struct invpcid_descr d;
vm_offset_t addr;
- uint64_t cr3;
- u_int cpuid;
+
#ifdef COUNT_XINVLTLB_HITS
xhits_rng[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
@@ -693,38 +598,11 @@ invlrng_handler(void)
(*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- addr = smp_tlb_invpcid.addr;
- if (pmap_pcid_enabled) {
- if (smp_tlb_invpcid.pcid == 0) {
- /*
- * kernel pmap - use invlpg to invalidate
- * global mapping.
- */
- invlpg_range(addr, smp_tlb_addr2);
- } else if (smp_tlb_invpcid.pcid == (uint64_t)-1) {
- invltlb_globpcid();
- if (smp_tlb_pmap != NULL) {
- cpuid = PCPU_GET(cpuid);
- if (!CPU_ISSET(cpuid, &smp_tlb_pmap->pm_active))
- CPU_CLR_ATOMIC(cpuid,
- &smp_tlb_pmap->pm_save);
- }
- } else if (invpcid_works) {
- d = smp_tlb_invpcid;
- do {
- invpcid(&d, INVPCID_ADDR);
- d.addr += PAGE_SIZE;
- } while (d.addr <= smp_tlb_addr2);
- } else {
- cr3 = rcr3();
- if (cr3 != pcid_cr3)
- load_cr3(pcid_cr3 | CR3_PCID_SAVE);
- invlpg_range(addr, smp_tlb_addr2);
- load_cr3(cr3 | CR3_PCID_SAVE);
- }
- } else {
- invlpg_range(addr, smp_tlb_addr2);
- }
+ addr = smp_tlb_addr1;
+ do {
+ invlpg(addr);
+ addr += PAGE_SIZE;
+ } while (addr < smp_tlb_addr2);
atomic_add_int(&smp_tlb_wait, 1);
}
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index dc823fa..e18676a 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -273,6 +273,8 @@ pmap_modified_bit(pmap_t pmap)
return (mask);
}
+extern struct pcpu __pcpu[];
+
#if !defined(DIAGNOSTIC)
#ifdef __GNUC_GNU_INLINE__
#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
@@ -379,8 +381,6 @@ caddr_t CADDR1 = 0;
static int pmap_flags = PMAP_PDE_SUPERPAGE; /* flags for x86 pmaps */
-static struct unrhdr pcid_unr;
-static struct mtx pcid_mtx;
int pmap_pcid_enabled = 0;
SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
&pmap_pcid_enabled, 0, "Is TLB Context ID enabled ?");
@@ -827,6 +827,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
{
vm_offset_t va;
pt_entry_t *pte;
+ int i;
/*
* Create an initial set of page tables to run the kernel in.
@@ -861,7 +862,6 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
kernel_pmap->pm_cr3 = KPML4phys;
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
- CPU_FILL(&kernel_pmap->pm_save); /* always superset of pm_active */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
kernel_pmap->pm_flags = pmap_flags;
@@ -895,18 +895,28 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
/* Initialize TLB Context Id. */
TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
- load_cr4(rcr4() | CR4_PCIDE);
- mtx_init(&pcid_mtx, "pcid", NULL, MTX_DEF);
- init_unrhdr(&pcid_unr, 1, (1 << 12) - 1, &pcid_mtx);
/* Check for INVPCID support */
invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID)
!= 0;
- kernel_pmap->pm_pcid = 0;
-#ifndef SMP
+ for (i = 0; i < MAXCPU; i++) {
+ kernel_pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN;
+ kernel_pmap->pm_pcids[i].pm_gen = 1;
+ }
+ __pcpu[0].pc_pcid_next = PMAP_PCID_KERN + 1;
+ __pcpu[0].pc_pcid_gen = 1;
+ /*
+ * pcpu area for APs is zeroed during AP startup.
+ * pc_pcid_next and pc_pcid_gen are initialized by AP
+ * during pcpu setup.
+ */
+#ifdef SMP
+ load_cr4(rcr4() | CR4_PCIDE);
+#else
pmap_pcid_enabled = 0;
#endif
- } else
+ } else {
pmap_pcid_enabled = 0;
+ }
}
/*
@@ -1277,28 +1287,6 @@ pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde)
}
#ifdef SMP
-static void
-pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va)
-{
- struct invpcid_descr d;
- uint64_t cr3;
-
- if (invpcid_works) {
- d.pcid = pmap->pm_pcid;
- d.pad = 0;
- d.addr = va;
- invpcid(&d, INVPCID_ADDR);
- return;
- }
-
- cr3 = rcr3();
- critical_enter();
- load_cr3(pmap->pm_cr3 | CR3_PCID_SAVE);
- invlpg(va);
- load_cr3(cr3 | CR3_PCID_SAVE);
- critical_exit();
-}
-
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
*
@@ -1361,8 +1349,8 @@ pmap_invalidate_ept(pmap_t pmap)
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
- cpuset_t other_cpus;
- u_int cpuid;
+ cpuset_t *mask;
+ u_int cpuid, i;
if (pmap_type_guest(pmap)) {
pmap_invalidate_ept(pmap);
@@ -1373,74 +1361,33 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
("pmap_invalidate_page: invalid type %d", pmap->pm_type));
sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- if (!pmap_pcid_enabled) {
- invlpg(va);
- } else {
- if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
- if (pmap == PCPU_GET(curpmap))
- invlpg(va);
- else
- pmap_invalidate_page_pcid(pmap, va);
- } else {
- invltlb_globpcid();
- }
- }
- smp_invlpg(pmap, va);
+ if (pmap == kernel_pmap) {
+ invlpg(va);
+ mask = &all_cpus;
} else {
cpuid = PCPU_GET(cpuid);
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
+ if (pmap == PCPU_GET(curpmap))
invlpg(va);
- else if (pmap_pcid_enabled) {
- if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0)
- pmap_invalidate_page_pcid(pmap, va);
- else
- invltlb_globpcid();
+ else if (pmap_pcid_enabled)
+ pmap->pm_pcids[cpuid].pm_gen = 0;
+ if (pmap_pcid_enabled) {
+ CPU_FOREACH(i) {
+ if (cpuid != i)
+ pmap->pm_pcids[i].pm_gen = 0;
+ }
}
- if (pmap_pcid_enabled)
- CPU_AND(&other_cpus, &pmap->pm_save);
- else
- CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invlpg(other_cpus, pmap, va);
+ mask = &pmap->pm_active;
}
+ smp_masked_invlpg(*mask, va);
sched_unpin();
}
-static void
-pmap_invalidate_range_pcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- struct invpcid_descr d;
- uint64_t cr3;
- vm_offset_t addr;
-
- if (invpcid_works) {
- d.pcid = pmap->pm_pcid;
- d.pad = 0;
- for (addr = sva; addr < eva; addr += PAGE_SIZE) {
- d.addr = addr;
- invpcid(&d, INVPCID_ADDR);
- }
- return;
- }
-
- cr3 = rcr3();
- critical_enter();
- load_cr3(pmap->pm_cr3 | CR3_PCID_SAVE);
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- load_cr3(cr3 | CR3_PCID_SAVE);
- critical_exit();
-}
-
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
- cpuset_t other_cpus;
+ cpuset_t *mask;
vm_offset_t addr;
- u_int cpuid;
+ u_int cpuid, i;
if (pmap_type_guest(pmap)) {
pmap_invalidate_ept(pmap);
@@ -1451,55 +1398,36 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
("pmap_invalidate_range: invalid type %d", pmap->pm_type));
sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- if (!pmap_pcid_enabled) {
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- } else {
- if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
- if (pmap == PCPU_GET(curpmap)) {
- for (addr = sva; addr < eva;
- addr += PAGE_SIZE)
- invlpg(addr);
- } else {
- pmap_invalidate_range_pcid(pmap,
- sva, eva);
- }
- } else {
- invltlb_globpcid();
- }
- }
- smp_invlpg_range(pmap, sva, eva);
+ cpuid = PCPU_GET(cpuid);
+ if (pmap == kernel_pmap) {
+ for (addr = sva; addr < eva; addr += PAGE_SIZE)
+ invlpg(addr);
+ mask = &all_cpus;
} else {
- cpuid = PCPU_GET(cpuid);
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active)) {
+ if (pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
} else if (pmap_pcid_enabled) {
- if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0)
- pmap_invalidate_range_pcid(pmap, sva, eva);
- else
- invltlb_globpcid();
+ pmap->pm_pcids[cpuid].pm_gen = 0;
}
- if (pmap_pcid_enabled)
- CPU_AND(&other_cpus, &pmap->pm_save);
- else
- CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invlpg_range(other_cpus, pmap, sva, eva);
+ if (pmap_pcid_enabled) {
+ CPU_FOREACH(i) {
+ if (cpuid != i)
+ pmap->pm_pcids[i].pm_gen = 0;
+ }
+ }
+ mask = &pmap->pm_active;
}
+ smp_masked_invlpg_range(*mask, sva, eva);
sched_unpin();
}
void
pmap_invalidate_all(pmap_t pmap)
{
- cpuset_t other_cpus;
+ cpuset_t *mask;
struct invpcid_descr d;
- uint64_t cr3;
- u_int cpuid;
+ u_int cpuid, i;
if (pmap_type_guest(pmap)) {
pmap_invalidate_ept(pmap);
@@ -1510,60 +1438,42 @@ pmap_invalidate_all(pmap_t pmap)
("pmap_invalidate_all: invalid type %d", pmap->pm_type));
sched_pin();
- cpuid = PCPU_GET(cpuid);
- if (pmap == kernel_pmap ||
- (pmap_pcid_enabled && !CPU_CMP(&pmap->pm_save, &all_cpus)) ||
- !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- if (invpcid_works) {
+ if (pmap == kernel_pmap) {
+ if (pmap_pcid_enabled && invpcid_works) {
bzero(&d, sizeof(d));
invpcid(&d, INVPCID_CTXGLOB);
} else {
invltlb_globpcid();
}
- if (!CPU_ISSET(cpuid, &pmap->pm_active))
- CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
- smp_invltlb(pmap);
+ mask = &all_cpus;
} else {
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
-
- /*
- * This logic is duplicated in the Xinvltlb shootdown
- * IPI handler.
- */
- if (pmap_pcid_enabled) {
- if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
+ cpuid = PCPU_GET(cpuid);
+ if (pmap == PCPU_GET(curpmap)) {
+ if (pmap_pcid_enabled) {
if (invpcid_works) {
- d.pcid = pmap->pm_pcid;
+ d.pcid = pmap->pm_pcids[cpuid].pm_pcid;
d.pad = 0;
d.addr = 0;
invpcid(&d, INVPCID_CTX);
} else {
- cr3 = rcr3();
- critical_enter();
-
- /*
- * Bit 63 is clear, pcid TLB
- * entries are invalidated.
- */
- load_cr3(pmap->pm_cr3);
- load_cr3(cr3 | CR3_PCID_SAVE);
- critical_exit();
+ load_cr3(pmap->pm_cr3 | pmap->pm_pcids
+ [PCPU_GET(cpuid)].pm_pcid);
}
} else {
- invltlb_globpcid();
+ invltlb();
}
- } else if (CPU_ISSET(cpuid, &pmap->pm_active))
- invltlb();
- if (!CPU_ISSET(cpuid, &pmap->pm_active))
- CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
- if (pmap_pcid_enabled)
- CPU_AND(&other_cpus, &pmap->pm_save);
- else
- CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invltlb(other_cpus, pmap);
+ } else if (pmap_pcid_enabled) {
+ pmap->pm_pcids[cpuid].pm_gen = 0;
+ }
+ if (pmap_pcid_enabled) {
+ CPU_FOREACH(i) {
+ if (cpuid != i)
+ pmap->pm_pcids[i].pm_gen = 0;
+ }
+ }
+ mask = &pmap->pm_active;
}
+ smp_masked_invltlb(*mask, pmap);
sched_unpin();
}
@@ -1627,7 +1537,6 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
active = all_cpus;
else {
active = pmap->pm_active;
- CPU_AND_ATOMIC(&pmap->pm_save, &active);
}
if (CPU_OVERLAP(&active, &other_cpus)) {
act.store = cpuid;
@@ -2205,11 +2114,9 @@ pmap_pinit0(pmap_t pmap)
pmap->pm_cr3 = KPML4phys;
pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
- CPU_ZERO(&pmap->pm_save);
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
- pmap->pm_pcid = pmap_pcid_enabled ? 0 : -1;
pmap->pm_flags = pmap_flags;
}
@@ -2233,7 +2140,10 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
pml4phys = VM_PAGE_TO_PHYS(pml4pg);
pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pml4phys);
- pmap->pm_pcid = -1;
+ CPU_FOREACH(i) {
+ pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE;
+ pmap->pm_pcids[i].pm_gen = 0;
+ }
pmap->pm_cr3 = ~0; /* initialize to an invalid value */
if ((pml4pg->flags & PG_ZERO) == 0)
@@ -2260,12 +2170,6 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
/* install self-referential address mapping entry(s) */
pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) |
X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M;
-
- if (pmap_pcid_enabled) {
- pmap->pm_pcid = alloc_unr(&pcid_unr);
- if (pmap->pm_pcid != -1)
- pmap->pm_cr3 |= pmap->pm_pcid;
- }
}
pmap->pm_root.rt_root = 0;
@@ -2274,7 +2178,6 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
pmap->pm_flags = flags;
pmap->pm_eptgen = 0;
- CPU_ZERO(&pmap->pm_save);
return (1);
}
@@ -2535,14 +2438,6 @@ pmap_release(pmap_t pmap)
KASSERT(CPU_EMPTY(&pmap->pm_active),
("releasing active pmap %p", pmap));
- if (pmap_pcid_enabled) {
- /*
- * Invalidate any left TLB entries, to allow the reuse
- * of the pcid.
- */
- pmap_invalidate_all(pmap);
- }
-
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4));
for (i = 0; i < NKPML4E; i++) /* KVA */
@@ -2554,8 +2449,6 @@ pmap_release(pmap_t pmap)
m->wire_count--;
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
vm_page_free_zero(m);
- if (pmap->pm_pcid != -1)
- free_unr(&pcid_unr, pmap->pm_pcid);
}
static int
@@ -6657,28 +6550,84 @@ retry:
return (val);
}
+static uint64_t
+pmap_pcid_alloc(pmap_t pmap, u_int cpuid)
+{
+ uint32_t gen, new_gen, pcid_next;
+
+ CRITICAL_ASSERT(curthread);
+ gen = PCPU_GET(pcid_gen);
+ if (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN ||
+ pmap->pm_pcids[cpuid].pm_gen == gen)
+ return (CR3_PCID_SAVE);
+ pcid_next = PCPU_GET(pcid_next);
+ KASSERT(pcid_next <= PMAP_PCID_OVERMAX, ("cpu %d pcid_next %#x",
+ cpuid, pcid_next));
+ if (pcid_next == PMAP_PCID_OVERMAX) {
+ new_gen = gen + 1;
+ if (new_gen == 0)
+ new_gen = 1;
+ PCPU_SET(pcid_gen, new_gen);
+ pcid_next = PMAP_PCID_KERN + 1;
+ } else {
+ new_gen = gen;
+ }
+ pmap->pm_pcids[cpuid].pm_pcid = pcid_next;
+ pmap->pm_pcids[cpuid].pm_gen = new_gen;
+ PCPU_SET(pcid_next, pcid_next + 1);
+ return (0);
+}
+
void
-pmap_activate(struct thread *td)
+pmap_activate_sw(struct thread *td)
{
- pmap_t pmap, oldpmap;
- u_int cpuid;
+ pmap_t oldpmap, pmap;
+ uint64_t cached, cr3;
+ u_int cpuid;
- critical_enter();
- pmap = vmspace_pmap(td->td_proc->p_vmspace);
oldpmap = PCPU_GET(curpmap);
+ pmap = vmspace_pmap(td->td_proc->p_vmspace);
+ if (oldpmap == pmap)
+ return;
cpuid = PCPU_GET(cpuid);
#ifdef SMP
- CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
- CPU_SET_ATOMIC(cpuid, &pmap->pm_save);
#else
- CPU_CLR(cpuid, &oldpmap->pm_active);
CPU_SET(cpuid, &pmap->pm_active);
- CPU_SET(cpuid, &pmap->pm_save);
#endif
- td->td_pcb->pcb_cr3 = pmap->pm_cr3;
- load_cr3(pmap->pm_cr3);
+ cr3 = rcr3();
+ if (pmap_pcid_enabled) {
+ cached = pmap_pcid_alloc(pmap, cpuid);
+ KASSERT(pmap->pm_pcids[cpuid].pm_pcid >= 0 &&
+ pmap->pm_pcids[cpuid].pm_pcid < PMAP_PCID_OVERMAX,
+ ("pmap %p cpu %d pcid %#x", pmap, cpuid,
+ pmap->pm_pcids[cpuid].pm_pcid));
+ KASSERT(pmap != PMAP_PCID_KERN || pmap == kernel_pmap,
+ ("non-kernel pmap %p cpu %d pcid %#x", pmap, cpuid,
+ pmap->pm_pcids[cpuid].pm_pcid));
+ if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) {
+ load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid |
+ cached);
+ if (cached)
+ PCPU_INC(pm_save_cnt);
+ }
+ } else if (cr3 != pmap->pm_cr3) {
+ load_cr3(pmap->pm_cr3);
+ }
PCPU_SET(curpmap, pmap);
+#ifdef SMP
+ CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
+#else
+ CPU_CLR(cpuid, &oldpmap->pm_active);
+#endif
+}
+
+void
+pmap_activate(struct thread *td)
+{
+
+ critical_enter();
+ pmap_activate_sw(td);
critical_exit();
}
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 26ac7ab..0d03ed6 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -219,7 +219,6 @@ cpu_fork(td1, p2, td2, flags)
* return address on stack. These are the kernel mode register values.
*/
pmap2 = vmspace_pmap(p2->p_vmspace);
- pcb2->pcb_cr3 = pmap2->pm_cr3;
pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */
pcb2->pcb_rbp = 0;
pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *);
@@ -477,7 +476,6 @@ cpu_set_upcall(struct thread *td, struct thread *td0)
pcb2->pcb_rip = (register_t)fork_trampoline;
/*
* If we didn't copy the pcb, we'd need to do the following registers:
- * pcb2->pcb_cr3: cloned above.
* pcb2->pcb_dr*: cloned above.
* pcb2->pcb_savefpu: cloned above.
* pcb2->pcb_onfault: cloned above (always NULL here?).
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index 7ea4bcf..c0c5b0a 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -540,9 +540,8 @@ static __inline void
invpcid(struct invpcid_descr *d, int type)
{
- /* invpcid (%rdx),%rax */
- __asm __volatile(".byte 0x66,0x0f,0x38,0x82,0x02"
- : : "d" (d), "a" ((u_long)type) : "memory");
+ __asm __volatile("invpcid (%0),%1"
+ : : "r" (d), "r" ((u_long)type) : "memory");
}
static __inline u_short
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index fe898e9..91e8fb2 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -63,7 +63,9 @@
uint64_t pc_dbreg[16]; /* ddb debugging regs */ \
int pc_dbreg_cmd; /* ddb debugging reg cmd */ \
u_int pc_vcpu_id; /* Xen vCPU ID */ \
- char __pad[157] /* be divisor of PAGE_SIZE \
+ uint32_t pc_pcid_next; \
+ uint32_t pc_pcid_gen; \
+ char __pad[149] /* be divisor of PAGE_SIZE \
after cache alignment */
#define PC_DBREG_CMD_NONE 0
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 868db7d..39df87a 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -219,6 +219,10 @@
#define ISA_HOLE_START 0xa0000
#define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
+#define PMAP_PCID_NONE 0xffffffff
+#define PMAP_PCID_KERN 0
+#define PMAP_PCID_OVERMAX 0x1000
+
#ifndef LOCORE
#include <sys/queue.h>
@@ -292,6 +296,11 @@ enum pmap_type {
PT_RVI, /* AMD's nested page tables */
};
+struct pmap_pcids {
+ uint32_t pm_pcid;
+ uint32_t pm_gen;
+};
+
/*
* The kernel virtual address (KVA) of the level 4 page table page is always
* within the direct map (DMAP) region.
@@ -302,13 +311,12 @@ struct pmap {
uint64_t pm_cr3;
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
cpuset_t pm_active; /* active on cpus */
- cpuset_t pm_save; /* Context valid on cpus mask */
- int pm_pcid; /* context id */
enum pmap_type pm_type; /* regular or nested tables */
struct pmap_statistics pm_stats; /* pmap statistics */
struct vm_radix pm_root; /* spare page table pages */
long pm_eptgen; /* EPT pmap generation id */
int pm_flags;
+ struct pmap_pcids pm_pcids[MAXCPU];
};
/* flags */
@@ -375,6 +383,9 @@ extern vm_paddr_t dmaplimit;
#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz))
+struct thread;
+
+void pmap_activate_sw(struct thread *);
void pmap_bootstrap(vm_paddr_t *);
int pmap_change_attr(vm_offset_t, vm_size_t, int);
void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate);
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index 034a693..4fd6aac 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -46,6 +46,7 @@ extern struct mtx ap_boot_mtx;
extern int cpu_logical;
extern int cpu_cores;
extern int pmap_pcid_enabled;
+extern int invpcid_works;
extern u_int xhits_gbl[];
extern u_int xhits_pg[];
extern u_int xhits_rng[];
@@ -53,10 +54,6 @@ extern u_int ipi_global;
extern u_int ipi_page;
extern u_int ipi_range;
extern u_int ipi_range_size;
-extern u_int ipi_masked_global;
-extern u_int ipi_masked_page;
-extern u_int ipi_masked_range;
-extern u_int ipi_masked_range_size;
extern volatile int smp_tlb_wait;
@@ -78,9 +75,9 @@ extern u_long *ipi_rendezvous_counts[MAXCPU];
/* IPI handlers */
inthand_t
- IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid enabled */
IDTVEC(invltlb), /* TLB shootdowns - global */
- IDTVEC(invlpg_pcid), /* TLB shootdowns - 1 page, pcid enabled */
+ IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid */
+ IDTVEC(invltlb_invpcid),/* TLB shootdowns - global, invpcid */
IDTVEC(invlpg), /* TLB shootdowns - 1 page */
IDTVEC(invlrng), /* TLB shootdowns - page range */
IDTVEC(invlcache), /* Write back and invalidate cache */
@@ -100,8 +97,8 @@ void cpususpend_handler(void);
void init_secondary_tail(void);
void invltlb_handler(void);
void invltlb_pcid_handler(void);
+void invltlb_invpcid_handler(void);
void invlpg_handler(void);
-void invlpg_pcid_handler(void);
void invlrng_handler(void);
void invlcache_handler(void);
void init_secondary(void);
@@ -114,13 +111,9 @@ void ipi_selected(cpuset_t cpus, u_int ipi);
u_int mp_bootaddress(u_int);
void set_interrupt_apic_ids(void);
void smp_cache_flush(void);
-void smp_invlpg(struct pmap *pmap, vm_offset_t addr);
-void smp_masked_invlpg(cpuset_t mask, struct pmap *pmap, vm_offset_t addr);
-void smp_invlpg_range(struct pmap *pmap, vm_offset_t startva,
+void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
+void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
vm_offset_t endva);
-void smp_masked_invlpg_range(cpuset_t mask, struct pmap *pmap,
- vm_offset_t startva, vm_offset_t endva);
-void smp_invltlb(struct pmap *pmap);
void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap);
int native_start_all_aps(void);
void mem_range_AP_init(void);
diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h
index a771fff..6c849ff 100644
--- a/sys/x86/include/specialreg.h
+++ b/sys/x86/include/specialreg.h
@@ -53,6 +53,7 @@
#define CR0_CD 0x40000000 /* Cache Disable */
#define CR3_PCID_SAVE 0x8000000000000000
+#define CR3_PCID_MASK 0xfff
/*
* Bits in PPro special registers
diff --git a/sys/x86/xen/xen_apic.c b/sys/x86/xen/xen_apic.c
index c742920..a65f3b5 100644
--- a/sys/x86/xen/xen_apic.c
+++ b/sys/x86/xen/xen_apic.c
@@ -423,31 +423,29 @@ xen_invltlb(void *arg)
#ifdef __amd64__
static int
-xen_invltlb_pcid(void *arg)
+xen_invltlb_invpcid(void *arg)
{
- invltlb_pcid_handler();
+ invltlb_invpcid_handler();
return (FILTER_HANDLED);
}
-#endif
static int
-xen_invlpg(void *arg)
+xen_invltlb_pcid(void *arg)
{
- invlpg_handler();
+ invltlb_pcid_handler();
return (FILTER_HANDLED);
}
+#endif
-#ifdef __amd64__
static int
-xen_invlpg_pcid(void *arg)
+xen_invlpg(void *arg)
{
- invlpg_pcid_handler();
+ invlpg_handler();
return (FILTER_HANDLED);
}
-#endif
static int
xen_invlrng(void *arg)
@@ -532,8 +530,8 @@ xen_setup_cpus(void)
#ifdef __amd64__
if (pmap_pcid_enabled) {
- xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = xen_invltlb_pcid;
- xen_ipis[IPI_TO_IDX(IPI_INVLPG)].filter = xen_invlpg_pcid;
+ xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = invpcid_works ?
+ xen_invltlb_invpcid : xen_invltlb_pcid;
}
#endif
CPU_FOREACH(i)
OpenPOWER on IntegriCloud