summaryrefslogtreecommitdiffstats
path: root/sys/amd64/amd64
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64/amd64')
-rw-r--r--sys/amd64/amd64/apic_vector.S241
-rw-r--r--sys/amd64/amd64/cpu_switch.S34
-rw-r--r--sys/amd64/amd64/genassym.c4
-rw-r--r--sys/amd64/amd64/machdep.c2
-rw-r--r--sys/amd64/amd64/mp_machdep.c62
-rw-r--r--sys/amd64/amd64/pmap.c276
-rw-r--r--sys/amd64/amd64/vm_machdep.c2
7 files changed, 514 insertions, 107 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 7551cc5..e868cf5 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -43,6 +43,12 @@
#include "assym.s"
+#ifdef SMP
+#define LK lock ;
+#else
+#define LK
+#endif
+
/*
* I/O Interrupt Entry Point. Rather than having one entry point for
* each interrupt source, we use one entry point for each 32-bit word
@@ -149,6 +155,38 @@ IDTVEC(xen_intr_upcall)
* Global address space TLB shootdown.
*/
.text
+
+#define NAKE_INTR_CS 24
+
+ SUPERALIGN_TEXT
+global_invltlb:
+ movl %cr4,%eax
+ andl $~0x80,%eax
+ movl %eax,%cr4
+ orl $0x80,%eax
+ movl %eax,%cr4
+invltlb_ret_clear_pm_save:
+ movq smp_tlb_pmap,%rdx
+ testq %rdx,%rdx
+ jz invltlb_ret
+ testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp)
+ jz 1f
+ swapgs
+1:
+ movl PCPU(CPUID),%eax
+ jz 2f
+ swapgs
+2:
+ LK btcl %eax,PM_SAVE(%rdx)
+ SUPERALIGN_TEXT
+invltlb_ret:
+ movq lapic, %rax
+ movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
+ LK incl smp_tlb_wait
+ popq %rdx
+ popq %rax
+ jmp doreti_iret
+
SUPERALIGN_TEXT
IDTVEC(invltlb)
#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
@@ -165,18 +203,44 @@ IDTVEC(invltlb)
#endif
pushq %rax
+ pushq %rdx
- movq %cr3, %rax /* invalidate the TLB */
- movq %rax, %cr3
-
- movq lapic, %rax
- movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
-
- lock
- incl smp_tlb_wait
-
- popq %rax
- jmp doreti_iret
+ movq %cr3,%rax
+ cmpl $0,pmap_pcid_enabled
+ je 2f
+
+ movq $smp_tlb_invpcid,%rdx
+ cmpl $0,(%rdx)
+ je global_invltlb
+ cmpl $-1,(%rdx)
+ je global_invltlb
+
+ /*
+ * Non-zero smp_tlb_invpcid, only invalidate TLB for entries with
+ * current PCID.
+ */
+ cmpl $0,invpcid_works
+ je 1f
+ /* Use invpcid if available. */
+ movl $1,%eax /* INVPCID_CTX */
+ /* invpcid (%rdx),%rax */
+ .byte 0x66,0x0f,0x38,0x82,0x02
+ jmp invltlb_ret_clear_pm_save
+1:
+ /* Otherwise reload %cr3 twice. */
+ movq pcid_cr3,%rdx
+ cmpq %rax,%rdx
+ je 2f
+ movq %rdx,%cr3 /* Invalidate, bit 63 is zero. */
+ btsq $63,%rax
+
+ /*
+ * Invalidate the TLB if PCID is not enabled.
+ * Restore the old address space.
+ */
+2:
+ movq %rax,%cr3
+ jmp invltlb_ret_clear_pm_save
/*
* Single page TLB shootdown
@@ -198,18 +262,54 @@ IDTVEC(invlpg)
#endif
pushq %rax
-
- movq smp_tlb_addr1, %rax
- invlpg (%rax) /* invalidate single page */
-
- movq lapic, %rax
- movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
-
- lock
- incl smp_tlb_wait
-
- popq %rax
- jmp doreti_iret
+ pushq %rdx
+ movq $smp_tlb_invpcid,%rdx
+ cmpl $0,pmap_pcid_enabled
+ je 3f
+ cmpl $0,invpcid_works
+ jne 2f
+
+ /* kernel pmap - use invlpg to invalidate global mapping */
+ cmpl $0,(%rdx)
+ je 3f
+ cmpl $-1,(%rdx)
+ je global_invltlb
+
+ /*
+ * PCID supported, but INVPCID is not.
+ * Temporarily switch to the target address space and do INVLPG.
+ */
+ pushq %rcx
+ movq %cr3,%rcx
+ movq pcid_cr3,%rax
+ cmp %rcx,%rax
+ je 1f
+ btsq $63,%rax
+ movq %rax,%cr3
+1: movq 8(%rdx),%rax
+ invlpg (%rax)
+ btsq $63,%rcx
+ movq %rcx,%cr3
+ popq %rcx
+ jmp invltlb_ret
+
+ /*
+ * Invalidate the TLB entry using INVPCID_ADDR.
+ */
+2:
+ xorl %eax,%eax
+/* invpcid (%rdx),%rax */
+ .byte 0x66,0x0f,0x38,0x82,0x02
+ jmp invltlb_ret
+
+ /*
+ * PCID is not supported or kernel pmap.
+ * Invalidate single page using INVLPG.
+ */
+3:
+ movq 8(%rdx),%rax
+ invlpg (%rax)
+ jmp invltlb_ret
/*
* Page range TLB shootdown.
@@ -232,23 +332,76 @@ IDTVEC(invlrng)
pushq %rax
pushq %rdx
-
- movq smp_tlb_addr1, %rdx
- movq smp_tlb_addr2, %rax
+ movq $smp_tlb_invpcid,%rdx
+ cmpl $0,pmap_pcid_enabled
+ jne invlrng_single_page
+ cmpl $0,invpcid_works
+ jne invlrng_invpcid
+
+ /* kernel pmap - use invlpg to invalidate global mapping */
+ cmpl $0,(%rdx)
+ je invlrng_single_page
+ cmpl $-1,(%rdx)
+ je global_invltlb
+
+ pushq %rcx
+ movq %cr3,%rcx
+ movq pcid_cr3,%rax
+ cmpq %rcx,%rax
+ je 1f
+ btsq $63,%rax
+ movq %rax,%cr3
+1:
+ movq 8(%rdx),%rdx
+ movq smp_tlb_addr2,%rax
+2:
+ invlpg (%rdx)
+ addq $PAGE_SIZE,%rdx
+ cmpq %rax,%rdx
+ jb 2b
+ btsq $63,%rcx
+ movq %rcx,%cr3
+ popq %rcx
+ jmp invltlb_ret
+
+invlrng_invpcid:
+ testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp)
+ jz 1f
+ swapgs
+1:
+ pushq %rcx
+ movq (%rdx),%rcx
+ movq %rcx,PCPU(INVPCID_DESCR)
+ movq 8(%rdx),%rax
+ movq %rax,PCPU(INVPCID_DESCR)+8
+ movq smp_tlb_addr2,%rcx
+ xorl %eax,%eax
+ movq $PC_INVPCID_DESCR,%rdx
+ gs
+ subq 8(%rdx),%rcx
+ shrq $PAGE_SHIFT,%rcx
+2:
+ gs
+// invpcid (%rdx),%rax
+ .byte 0x66,0x0f,0x38,0x82,0x02
+ gs
+ addq $PAGE_SIZE,8(%rdx)
+ dec %rcx
+ jne 2b
+ popq %rcx
+ testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp)
+ jz invltlb_ret
+ swapgs
+ jmp invltlb_ret
+
+invlrng_single_page:
+ movq 8(%rdx),%rdx
+ movq smp_tlb_addr2,%rax
1: invlpg (%rdx) /* invalidate single page */
- addq $PAGE_SIZE, %rdx
- cmpq %rax, %rdx
+ addq $PAGE_SIZE,%rdx
+ cmpq %rax,%rdx
jb 1b
-
- movq lapic, %rax
- movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
-
- lock
- incl smp_tlb_wait
-
- popq %rdx
- popq %rax
- jmp doreti_iret
+ jmp invltlb_ret
/*
* Invalidate cache.
@@ -265,17 +418,9 @@ IDTVEC(invlcache)
#endif
pushq %rax
-
+ pushq %rdx
wbinvd
-
- movq lapic, %rax
- movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
-
- lock
- incl smp_tlb_wait
-
- popq %rax
- jmp doreti_iret
+ jmp invltlb_ret
/*
* Handler for IPIs sent via the per-cpu IPI bitmap.
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index ed1ccb5..ac30990 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -77,8 +77,7 @@ ENTRY(cpu_throw)
LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */
1:
movq TD_PCB(%rsi),%r8 /* newtd->td_pcb */
- movq PCB_CR3(%r8),%rdx
- movq %rdx,%cr3 /* new address space */
+ movq PCB_CR3(%r8),%rcx /* new address space */
jmp swact
END(cpu_throw)
@@ -145,20 +144,41 @@ ctx_switch_xsave:
SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */
jmp sw1
swinact:
- movq %rcx,%cr3 /* new address space */
- movl PCPU(CPUID), %eax
+ movl PCPU(CPUID),%eax
/* Release bit from old pmap->pm_active */
- movq PCPU(CURPMAP),%rcx
- LK btrl %eax,PM_ACTIVE(%rcx) /* clear old */
- SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */
+ movq PCPU(CURPMAP),%r12
+ LK btrl %eax,PM_ACTIVE(%r12) /* clear old */
+ SETLK %rdx,TD_LOCK(%rdi) /* Release the old thread */
swact:
/* Set bit in new pmap->pm_active */
movq TD_PROC(%rsi),%rdx /* newproc */
movq P_VMSPACE(%rdx), %rdx
addq $VM_PMAP,%rdx
+ cmpl $-1,PM_PCID(%rdx)
+ je 1f
+ LK btsl %eax,PM_SAVE(%rdx)
+ jnc 1f
+ btsq $63,%rcx /* CR3_PCID_SAVE */
+ incq PCPU(PM_SAVE_CNT)
+1:
+ movq %rcx,%cr3 /* new address space */
LK btsl %eax,PM_ACTIVE(%rdx) /* set new */
movq %rdx,PCPU(CURPMAP)
+ /*
+ * We might lose the race and other CPU might have changed
+ * the pmap after we set our bit in pmap->pm_save. Recheck.
+ * Reload %cr3 with CR3_PCID_SAVE bit cleared if pmap was
+ * modified, causing TLB flush for this pcid.
+ */
+ btrq $63,%rcx
+ jnc 1f
+ LK btsl %eax,PM_SAVE(%rdx)
+ jc 1f
+ decq PCPU(PM_SAVE_CNT)
+ movq %rcx,%cr3
+1:
+
sw1:
#if defined(SCHED_ULE) && defined(SMP)
/* Wait for the new thread to become unblocked */
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 3043bb5..62017e7 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -76,6 +76,8 @@ __FBSDID("$FreeBSD$");
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
+ASSYM(PM_SAVE, offsetof(struct pmap, pm_save));
+ASSYM(PM_PCID, offsetof(struct pmap, pm_pcid));
ASSYM(P_MD, offsetof(struct proc, p_md));
ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
@@ -225,6 +227,8 @@ ASSYM(PC_GS32P, offsetof(struct pcpu, pc_gs32p));
ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt));
ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp));
ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss));
+ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt));
+ASSYM(PC_INVPCID_DESCR, offsetof(struct pcpu, pc_invpcid_descr));
ASSYM(LA_VER, offsetof(struct LAPIC, version));
ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 7f7e54a..f3969d3 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1909,7 +1909,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0;
- thread0.td_pcb->pcb_cr3 = KPML4phys;
+ thread0.td_pcb->pcb_cr3 = KPML4phys; /* PCID 0 is reserved for kernel */
thread0.td_frame = &proc0_tf;
env = getenv("kernelname");
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 267b933..530aa61 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -107,9 +107,11 @@ struct pcb stoppcbs[MAXCPU];
struct pcb **susppcbs;
/* Variables needed for SMP tlb shootdown. */
-vm_offset_t smp_tlb_addr1;
vm_offset_t smp_tlb_addr2;
+struct invpcid_descr smp_tlb_invpcid;
volatile int smp_tlb_wait;
+uint64_t pcid_cr3;
+pmap_t smp_tlb_pmap;
#ifdef COUNT_IPIS
/* Interrupt counts. */
@@ -603,6 +605,8 @@ cpu_mp_announce(void)
}
}
+extern int pmap_pcid_enabled;
+
/*
* AP CPU's call this to initialize themselves.
*/
@@ -768,6 +772,8 @@ init_secondary(void)
*/
load_cr4(rcr4() | CR4_PGE);
+ if (pmap_pcid_enabled)
+ load_cr4(rcr4() | CR4_PCIDE);
load_ds(_udatasel);
load_es(_udatasel);
load_fs(_ufssel);
@@ -1119,7 +1125,8 @@ ipi_send_cpu(int cpu, u_int ipi)
* Flush the TLB on all other CPU's
*/
static void
-smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_tlb_shootdown(u_int vector, pmap_t pmap, vm_offset_t addr1,
+ vm_offset_t addr2)
{
u_int ncpu;
@@ -1129,8 +1136,16 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
if (!(read_rflags() & PSL_I))
panic("%s: interrupts disabled", __func__);
mtx_lock_spin(&smp_ipi_mtx);
- smp_tlb_addr1 = addr1;
+ smp_tlb_invpcid.addr = addr1;
+ if (pmap == NULL) {
+ smp_tlb_invpcid.pcid = 0;
+ } else {
+ smp_tlb_invpcid.pcid = pmap->pm_pcid;
+ pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) |
+ (pmap->pm_pcid == -1 ? 0 : pmap->pm_pcid);
+ }
smp_tlb_addr2 = addr2;
+ smp_tlb_pmap = pmap;
atomic_store_rel_int(&smp_tlb_wait, 0);
ipi_all_but_self(vector);
while (smp_tlb_wait < ncpu)
@@ -1139,7 +1154,8 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
}
static void
-smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
+ vm_offset_t addr1, vm_offset_t addr2)
{
int cpu, ncpu, othercpus;
@@ -1155,8 +1171,16 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_of
if (!(read_rflags() & PSL_I))
panic("%s: interrupts disabled", __func__);
mtx_lock_spin(&smp_ipi_mtx);
- smp_tlb_addr1 = addr1;
+ smp_tlb_invpcid.addr = addr1;
+ if (pmap == NULL) {
+ smp_tlb_invpcid.pcid = 0;
+ } else {
+ smp_tlb_invpcid.pcid = pmap->pm_pcid;
+ pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) |
+ (pmap->pm_pcid == -1 ? 0 : pmap->pm_pcid);
+ }
smp_tlb_addr2 = addr2;
+ smp_tlb_pmap = pmap;
atomic_store_rel_int(&smp_tlb_wait, 0);
if (CPU_ISFULLSET(&mask)) {
ncpu = othercpus;
@@ -1182,15 +1206,15 @@ smp_cache_flush(void)
{
if (smp_started)
- smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
+ smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0);
}
void
-smp_invltlb(void)
+smp_invltlb(pmap_t pmap)
{
if (smp_started) {
- smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
+ smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
@@ -1198,11 +1222,11 @@ smp_invltlb(void)
}
void
-smp_invlpg(vm_offset_t addr)
+smp_invlpg(pmap_t pmap, vm_offset_t addr)
{
if (smp_started) {
- smp_tlb_shootdown(IPI_INVLPG, addr, 0);
+ smp_tlb_shootdown(IPI_INVLPG, pmap, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
@@ -1210,11 +1234,11 @@ smp_invlpg(vm_offset_t addr)
}
void
-smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
+smp_invlpg_range(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2)
{
if (smp_started) {
- smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
+ smp_tlb_shootdown(IPI_INVLRNG, pmap, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
@@ -1223,11 +1247,11 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
}
void
-smp_masked_invltlb(cpuset_t mask)
+smp_masked_invltlb(cpuset_t mask, pmap_t pmap)
{
if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
+ smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, NULL, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
@@ -1235,11 +1259,11 @@ smp_masked_invltlb(cpuset_t mask)
}
void
-smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, pmap_t pmap, vm_offset_t addr)
{
if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
+ smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
@@ -1247,11 +1271,13 @@ smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
}
void
-smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, pmap_t pmap, vm_offset_t addr1,
+ vm_offset_t addr2)
{
if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
+ smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1,
+ addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 851f92a..bca40f0 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -116,11 +116,8 @@ __FBSDID("$FreeBSD$");
#include <sys/vmmeter.h>
#include <sys/sched.h>
#include <sys/sysctl.h>
-#ifdef SMP
+#include <sys/_unrhdr.h>
#include <sys/smp.h>
-#else
-#include <sys/cpuset.h>
-#endif
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -250,6 +247,53 @@ static struct md_page *pv_table;
pt_entry_t *CMAP1 = 0;
caddr_t CADDR1 = 0;
+static struct unrhdr pcid_unr;
+static struct mtx pcid_mtx;
+int pmap_pcid_enabled = 1;
+SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN, &pmap_pcid_enabled,
+ 0, "Is TLB Context ID enabled ?");
+int invpcid_works = 0;
+
+/*
+ * Perform the guaranteed invalidation of all TLB entries. This
+ * includes the global entries, and entries in all PCIDs, not only the
+ * current context. The function works both on non-PCID CPUs and CPUs
+ * with the PCID turned off or on. See IA-32 SDM Vol. 3a 4.10.4.1
+ * Operations that Invalidate TLBs and Paging-Structure Caches.
+ */
+static __inline void
+invltlb_globpcid(void)
+{
+ uint64_t cr4;
+
+ cr4 = rcr4();
+ load_cr4(cr4 & ~CR4_PGE);
+ /*
+ * Although preemption at this point could be detrimental to
+ * performance, it would not lead to an error. PG_G is simply
+ * ignored if CR4.PGE is clear. Moreover, in case this block
+ * is re-entered, the load_cr4() either above or below will
+ * modify CR4.PGE flushing the TLB.
+ */
+ load_cr4(cr4 | CR4_PGE);
+}
+
+static int
+pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS)
+{
+ int i;
+ uint64_t res;
+
+ res = 0;
+ CPU_FOREACH(i) {
+ res += cpuid_to_pcpu[i]->pc_pm_save_cnt;
+ }
+ return (sysctl_handle_64(oidp, &res, 0, req));
+}
+SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW |
+ CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
+ "Count of saved TLB context on switch");
+
/*
* Crashdump maps.
*/
@@ -685,6 +729,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
+ CPU_ZERO(&kernel_pmap->pm_save);
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
/*
@@ -716,6 +761,21 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
/* Initialize the PAT MSR. */
pmap_init_pat();
+
+#ifdef SMP
+ /* Initialize TLB Context Id. */
+ TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
+ if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
+ load_cr4(rcr4() | CR4_PCIDE);
+ mtx_init(&pcid_mtx, "pcid", NULL, MTX_DEF);
+ init_unrhdr(&pcid_unr, 1, (1 << 12) - 1, &pcid_mtx);
+ /* Check for INVPCID support */
+ invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID)
+ != 0;
+ kernel_pmap->pm_pcid = 0;
+ } else
+#endif
+ pmap_pcid_enabled = 0;
}
/*
@@ -952,7 +1012,6 @@ pmap_cache_bits(int mode, boolean_t is_pde)
static void
pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
{
- u_long cr4;
if ((newpde & PG_PS) == 0)
/* Demotion: flush a specific 2MB page mapping. */
@@ -968,19 +1027,34 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
* Promotion: flush every 4KB page mapping from the TLB,
* including any global (PG_G) mappings.
*/
- cr4 = rcr4();
- load_cr4(cr4 & ~CR4_PGE);
- /*
- * Although preemption at this point could be detrimental to
- * performance, it would not lead to an error. PG_G is simply
- * ignored if CR4.PGE is clear. Moreover, in case this block
- * is re-entered, the load_cr4() either above or below will
- * modify CR4.PGE flushing the TLB.
- */
- load_cr4(cr4 | CR4_PGE);
+ invltlb_globpcid();
}
}
#ifdef SMP
+
+static void
+pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va)
+{
+ struct invpcid_descr d;
+ uint64_t cr3;
+
+ if (invpcid_works) {
+ d.pcid = pmap->pm_pcid;
+ d.pad = 0;
+ d.addr = va;
+ invpcid(&d, INVPCID_ADDR);
+ return;
+ }
+
+ cr3 = rcr3();
+ critical_enter();
+ load_cr3(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | pmap->pm_pcid |
+ CR3_PCID_SAVE);
+ invlpg(va);
+ load_cr3(cr3 | CR3_PCID_SAVE);
+ critical_exit();
+}
+
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
*
@@ -1008,21 +1082,68 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- invlpg(va);
- smp_invlpg(va);
+ if (!pmap_pcid_enabled) {
+ invlpg(va);
+ } else {
+ if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
+ if (pmap == PCPU_GET(curpmap))
+ invlpg(va);
+ else
+ pmap_invalidate_page_pcid(pmap, va);
+ } else {
+ invltlb_globpcid();
+ }
+ }
+ smp_invlpg(pmap, va);
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
if (CPU_ISSET(cpuid, &pmap->pm_active))
invlpg(va);
- CPU_AND(&other_cpus, &pmap->pm_active);
+ else if (pmap_pcid_enabled) {
+ if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0)
+ pmap_invalidate_page_pcid(pmap, va);
+ else
+ invltlb_globpcid();
+ }
+ if (pmap_pcid_enabled)
+ CPU_AND(&other_cpus, &pmap->pm_save);
+ else
+ CPU_AND(&other_cpus, &pmap->pm_active);
if (!CPU_EMPTY(&other_cpus))
- smp_masked_invlpg(other_cpus, va);
+ smp_masked_invlpg(other_cpus, pmap, va);
}
sched_unpin();
}
+static void
+pmap_invalidate_range_pcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ struct invpcid_descr d;
+ uint64_t cr3;
+ vm_offset_t addr;
+
+ if (invpcid_works) {
+ d.pcid = pmap->pm_pcid;
+ d.pad = 0;
+ for (addr = sva; addr < eva; addr += PAGE_SIZE) {
+ d.addr = addr;
+ invpcid(&d, INVPCID_ADDR);
+ }
+ return;
+ }
+
+ cr3 = rcr3();
+ critical_enter();
+ load_cr3(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | pmap->pm_pcid |
+ CR3_PCID_SAVE);
+ for (addr = sva; addr < eva; addr += PAGE_SIZE)
+ invlpg(addr);
+ load_cr3(cr3 | CR3_PCID_SAVE);
+ critical_exit();
+}
+
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
@@ -1032,19 +1153,43 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- smp_invlpg_range(sva, eva);
+ if (!pmap_pcid_enabled) {
+ for (addr = sva; addr < eva; addr += PAGE_SIZE)
+ invlpg(addr);
+ } else {
+ if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
+ if (pmap == PCPU_GET(curpmap)) {
+ for (addr = sva; addr < eva;
+ addr += PAGE_SIZE)
+ invlpg(addr);
+ } else {
+ pmap_invalidate_range_pcid(pmap,
+ sva, eva);
+ }
+ } else {
+ invltlb_globpcid();
+ }
+ }
+ smp_invlpg_range(pmap, sva, eva);
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
+ if (CPU_ISSET(cpuid, &pmap->pm_active)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
- CPU_AND(&other_cpus, &pmap->pm_active);
+ } else if (pmap_pcid_enabled) {
+ if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0)
+ pmap_invalidate_range_pcid(pmap, sva, eva);
+ else
+ invltlb_globpcid();
+ }
+ if (pmap_pcid_enabled)
+ CPU_AND(&other_cpus, &pmap->pm_save);
+ else
+ CPU_AND(&other_cpus, &pmap->pm_active);
if (!CPU_EMPTY(&other_cpus))
- smp_masked_invlpg_range(other_cpus, sva, eva);
+ smp_masked_invlpg_range(other_cpus, pmap, sva, eva);
}
sched_unpin();
}
@@ -1053,21 +1198,63 @@ void
pmap_invalidate_all(pmap_t pmap)
{
cpuset_t other_cpus;
+ struct invpcid_descr d;
+ uint64_t cr3;
u_int cpuid;
sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- invltlb();
- smp_invltlb();
+ cpuid = PCPU_GET(cpuid);
+ if (pmap == kernel_pmap ||
+ (pmap_pcid_enabled && !CPU_CMP(&pmap->pm_save, &all_cpus)) ||
+ !CPU_CMP(&pmap->pm_active, &all_cpus)) {
+ if (invpcid_works) {
+ bzero(&d, sizeof(d));
+ invpcid(&d, INVPCID_CTXGLOB);
+ } else {
+ invltlb_globpcid();
+ }
+ CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
+ smp_invltlb(pmap);
} else {
- cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
+
+ /*
+ * This logic is duplicated in the Xinvltlb shootdown
+ * IPI handler.
+ */
+ if (pmap_pcid_enabled) {
+ if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
+ if (invpcid_works) {
+ d.pcid = pmap->pm_pcid;
+ d.pad = 0;
+ d.addr = 0;
+ invpcid(&d, INVPCID_CTX);
+ } else {
+ cr3 = rcr3();
+ critical_enter();
+
+ /*
+ * Bit 63 is clear, pcid TLB
+ * entries are invalidated.
+ */
+ load_cr3(DMAP_TO_PHYS((vm_offset_t)
+ pmap->pm_pml4) | pmap->pm_pcid);
+ load_cr3(cr3 | CR3_PCID_SAVE);
+ critical_exit();
+ }
+ } else {
+ invltlb_globpcid();
+ }
+ } else if (CPU_ISSET(cpuid, &pmap->pm_active))
invltlb();
- CPU_AND(&other_cpus, &pmap->pm_active);
+ CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
+ if (pmap_pcid_enabled)
+ CPU_AND(&other_cpus, &pmap->pm_save);
+ else
+ CPU_AND(&other_cpus, &pmap->pm_active);
if (!CPU_EMPTY(&other_cpus))
- smp_masked_invltlb(other_cpus);
+ smp_masked_invltlb(other_cpus, pmap);
}
sched_unpin();
}
@@ -1129,8 +1316,10 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
CPU_CLR(cpuid, &other_cpus);
if (pmap == kernel_pmap)
active = all_cpus;
- else
+ else {
active = pmap->pm_active;
+ CPU_AND_ATOMIC(&pmap->pm_save, &active);
+ }
if (CPU_OVERLAP(&active, &other_cpus)) {
act.store = cpuid;
act.invalidate = active;
@@ -1193,6 +1382,8 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
pde_store(pde, newpde);
if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
pmap_update_pde_invalidate(va, newpde);
+ else
+ CPU_ZERO(&pmap->pm_save);
}
#endif /* !SMP */
@@ -1675,6 +1866,8 @@ pmap_pinit0(pmap_t pmap)
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+ pmap->pm_pcid = pmap_pcid_enabled ? 0 : -1;
+ CPU_ZERO(&pmap->pm_save);
}
/*
@@ -1716,6 +1909,8 @@ pmap_pinit(pmap_t pmap)
CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+ pmap->pm_pcid = pmap_pcid_enabled ? alloc_unr(&pcid_unr) : -1;
+ CPU_ZERO(&pmap->pm_save);
return (1);
}
@@ -1957,6 +2152,14 @@ pmap_release(pmap_t pmap)
KASSERT(vm_radix_is_empty(&pmap->pm_root),
("pmap_release: pmap has reserved page table page(s)"));
+ if (pmap_pcid_enabled) {
+ /*
+ * Invalidate any left TLB entries, to allow the reuse
+ * of the pcid.
+ */
+ pmap_invalidate_all(pmap);
+ }
+
m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME);
for (i = 0; i < NKPML4E; i++) /* KVA */
@@ -1968,6 +2171,8 @@ pmap_release(pmap_t pmap)
m->wire_count--;
atomic_subtract_int(&cnt.v_wire_count, 1);
vm_page_free_zero(m);
+ if (pmap->pm_pcid != -1)
+ free_unr(&pcid_unr, pmap->pm_pcid);
}
static int
@@ -5734,15 +5939,20 @@ pmap_activate(struct thread *td)
critical_enter();
pmap = vmspace_pmap(td->td_proc->p_vmspace);
oldpmap = PCPU_GET(curpmap);
+ CPU_ZERO(&pmap->pm_save);
cpuid = PCPU_GET(cpuid);
#ifdef SMP
CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
+ CPU_SET_ATOMIC(cpuid, &pmap->pm_save);
#else
CPU_CLR(cpuid, &oldpmap->pm_active);
CPU_SET(cpuid, &pmap->pm_active);
+ CPU_SET(cpuid, &pmap->pm_save);
#endif
cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4);
+ if (pmap->pm_pcid != -1)
+ cr3 |= pmap->pm_pcid;
td->td_pcb->pcb_cr3 = cr3;
load_cr3(cr3);
PCPU_SET(curpmap, pmap);
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index ed0e7e9..3764f72 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -221,6 +221,8 @@ cpu_fork(td1, p2, td2, flags)
*/
pmap2 = vmspace_pmap(p2->p_vmspace);
pcb2->pcb_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap2->pm_pml4);
+ if (pmap2->pm_pcid != -1)
+ pcb2->pcb_cr3 |= pmap2->pm_pcid;
pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */
pcb2->pcb_rbp = 0;
pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *);
OpenPOWER on IntegriCloud