summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/apic_vector.S110
-rw-r--r--sys/amd64/amd64/genassym.c1
-rw-r--r--sys/amd64/amd64/mp_machdep.c21
-rw-r--r--sys/amd64/amd64/pmap.c31
-rw-r--r--sys/amd64/amd64/vm_machdep.c4
-rw-r--r--sys/amd64/include/pcpu.h1
-rw-r--r--sys/amd64/include/pmap.h1
-rw-r--r--sys/amd64/include/smp.h2
8 files changed, 98 insertions, 73 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 79ec5ed..d002b4d 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -168,7 +168,7 @@ global_invltlb:
invltlb_ret_clear_pm_save:
movq smp_tlb_pmap,%rdx
testq %rdx,%rdx
- jz invltlb_ret
+ jz invltlb_ret_rdx
testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp)
jz 1f
swapgs
@@ -179,16 +179,17 @@ invltlb_ret_clear_pm_save:
2:
LK btcl %eax,PM_SAVE(%rdx)
SUPERALIGN_TEXT
-invltlb_ret:
+invltlb_ret_rdx:
+ popq %rdx
+invltlb_ret_rax:
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
LK incl smp_tlb_wait
- popq %rdx
popq %rax
jmp doreti_iret
SUPERALIGN_TEXT
-IDTVEC(invltlb)
+IDTVEC(invltlb_pcid)
#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
PUSH_FRAME
movl PCPU(CPUID), %eax
@@ -206,8 +207,6 @@ IDTVEC(invltlb)
pushq %rdx
movq %cr3,%rax
- cmpl $0,pmap_pcid_enabled
- je 2f
movq $smp_tlb_invpcid,%rdx
cmpl $0,(%rdx)
@@ -216,8 +215,7 @@ IDTVEC(invltlb)
je global_invltlb
/*
- * Non-zero smp_tlb_invpcid, only invalidate TLB for entries with
- * current PCID.
+ * Only invalidate TLB for entries with current PCID.
*/
cmpl $0,invpcid_works
je 1f
@@ -233,21 +231,36 @@ IDTVEC(invltlb)
je 2f
movq %rdx,%cr3 /* Invalidate, bit 63 is zero. */
btsq $63,%rax
-
- /*
- * Invalidate the TLB if PCID is not enabled.
- * Restore the old address space.
- */
2:
movq %rax,%cr3
jmp invltlb_ret_clear_pm_save
+ SUPERALIGN_TEXT
+IDTVEC(invltlb)
+#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
+ PUSH_FRAME
+ movl PCPU(CPUID), %eax
+#ifdef COUNT_XINVLTLB_HITS
+ incl xhits_gbl(,%rax,4)
+#endif
+#ifdef COUNT_IPIS
+ movq ipi_invltlb_counts(,%rax,8),%rax
+ incq (%rax)
+#endif
+ POP_FRAME
+#endif
+
+ pushq %rax
+ movq %cr3, %rax /* invalidate the TLB */
+ movq %rax, %cr3
+ jmp invltlb_ret_rax
+
/*
* Single page TLB shootdown
*/
.text
SUPERALIGN_TEXT
-IDTVEC(invlpg)
+IDTVEC(invlpg_pcid)
#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
PUSH_FRAME
movl PCPU(CPUID), %eax
@@ -264,8 +277,6 @@ IDTVEC(invlpg)
pushq %rax
pushq %rdx
movq $smp_tlb_invpcid,%rdx
- cmpl $0,pmap_pcid_enabled
- je 3f
cmpl $0,invpcid_works
jne 2f
@@ -291,7 +302,7 @@ IDTVEC(invlpg)
btsq $63,%rcx
movq %rcx,%cr3
popq %rcx
- jmp invltlb_ret
+ jmp invltlb_ret_rdx
/*
* Invalidate the TLB entry using INVPCID_ADDR.
@@ -300,7 +311,7 @@ IDTVEC(invlpg)
xorl %eax,%eax
/* invpcid (%rdx),%rax */
.byte 0x66,0x0f,0x38,0x82,0x02
- jmp invltlb_ret
+ jmp invltlb_ret_rdx
/*
* PCID is not supported or kernel pmap.
@@ -309,7 +320,27 @@ IDTVEC(invlpg)
3:
movq 8(%rdx),%rax
invlpg (%rax)
- jmp invltlb_ret
+ jmp invltlb_ret_rdx
+
+ SUPERALIGN_TEXT
+IDTVEC(invlpg)
+#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
+ PUSH_FRAME
+ movl PCPU(CPUID), %eax
+#ifdef COUNT_XINVLTLB_HITS
+ incl xhits_pg(,%rax,4)
+#endif
+#ifdef COUNT_IPIS
+ movq ipi_invlpg_counts(,%rax,8),%rax
+ incq (%rax)
+#endif
+ POP_FRAME
+#endif
+
+ pushq %rax
+ movq smp_tlb_invpcid+8,%rax
+ invlpg (%rax) /* invalidate single page */
+ jmp invltlb_ret_rax
/*
* Page range TLB shootdown.
@@ -334,15 +365,15 @@ IDTVEC(invlrng)
pushq %rdx
movq $smp_tlb_invpcid,%rdx
cmpl $0,pmap_pcid_enabled
- jne invlrng_single_page
- cmpl $0,invpcid_works
- jne invlrng_invpcid
+ je invlrng_single_page
/* kernel pmap - use invlpg to invalidate global mapping */
cmpl $0,(%rdx)
je invlrng_single_page
cmpl $-1,(%rdx)
je global_invltlb
+ cmpl $0,invpcid_works
+ jne invlrng_invpcid
pushq %rcx
movq %cr3,%rcx
@@ -362,37 +393,27 @@ IDTVEC(invlrng)
btsq $63,%rcx
movq %rcx,%cr3
popq %rcx
- jmp invltlb_ret
+ jmp invltlb_ret_rdx
invlrng_invpcid:
- testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp)
- jz 1f
- swapgs
-1:
pushq %rcx
+ subq $16,%rsp
movq (%rdx),%rcx
- movq %rcx,PCPU(INVPCID_DESCR)
+ movq %rcx,(%rsp)
movq 8(%rdx),%rax
- movq %rax,PCPU(INVPCID_DESCR)+8
+ movq %rax,8(%rsp)
movq smp_tlb_addr2,%rcx
- xorl %eax,%eax
- movq $PC_INVPCID_DESCR,%rdx
- gs
- subq 8(%rdx),%rcx
+ subq %rax,%rcx
shrq $PAGE_SHIFT,%rcx
-2:
- gs
+1:
// invpcid (%rdx),%rax
.byte 0x66,0x0f,0x38,0x82,0x02
- gs
- addq $PAGE_SIZE,8(%rdx)
+ addq $PAGE_SIZE,8(%rsp)
dec %rcx
- jne 2b
+ jne 1b
+ addq $16,%rsp
popq %rcx
- testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp)
- jz invltlb_ret
- swapgs
- jmp invltlb_ret
+ jmp invltlb_ret_rdx
invlrng_single_page:
movq 8(%rdx),%rdx
@@ -401,7 +422,7 @@ invlrng_single_page:
addq $PAGE_SIZE,%rdx
cmpq %rax,%rdx
jb 1b
- jmp invltlb_ret
+ jmp invltlb_ret_rdx
/*
* Invalidate cache.
@@ -418,9 +439,8 @@ IDTVEC(invlcache)
#endif
pushq %rax
- pushq %rdx
wbinvd
- jmp invltlb_ret
+ jmp invltlb_ret_rax
/*
* Handler for IPIs sent via the per-cpu IPI bitmap.
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 62017e7..028a2cd 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -228,7 +228,6 @@ ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt));
ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp));
ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss));
ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt));
-ASSYM(PC_INVPCID_DESCR, offsetof(struct pcpu, pc_invpcid_descr));
ASSYM(LA_VER, offsetof(struct LAPIC, version));
ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 530aa61..3addd43 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -127,6 +127,8 @@ static u_long *ipi_hardclock_counts[MAXCPU];
extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
+extern int pmap_pcid_enabled;
+
/*
* Local data and functions.
*/
@@ -524,8 +526,15 @@ cpu_mp_start(void)
}
/* Install an inter-CPU IPI for TLB invalidation */
- setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
+ if (pmap_pcid_enabled) {
+ setidt(IPI_INVLTLB, IDTVEC(invltlb_pcid), SDT_SYSIGT,
+ SEL_KPL, 0);
+ setidt(IPI_INVLPG, IDTVEC(invlpg_pcid), SDT_SYSIGT,
+ SEL_KPL, 0);
+ } else {
+ setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
+ }
setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
/* Install an inter-CPU IPI for cache invalidation. */
@@ -605,8 +614,6 @@ cpu_mp_announce(void)
}
}
-extern int pmap_pcid_enabled;
-
/*
* AP CPU's call this to initialize themselves.
*/
@@ -1141,8 +1148,7 @@ smp_tlb_shootdown(u_int vector, pmap_t pmap, vm_offset_t addr1,
smp_tlb_invpcid.pcid = 0;
} else {
smp_tlb_invpcid.pcid = pmap->pm_pcid;
- pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) |
- (pmap->pm_pcid == -1 ? 0 : pmap->pm_pcid);
+ pcid_cr3 = pmap->pm_cr3;
}
smp_tlb_addr2 = addr2;
smp_tlb_pmap = pmap;
@@ -1176,8 +1182,7 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
smp_tlb_invpcid.pcid = 0;
} else {
smp_tlb_invpcid.pcid = pmap->pm_pcid;
- pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) |
- (pmap->pm_pcid == -1 ? 0 : pmap->pm_pcid);
+ pcid_cr3 = pmap->pm_cr3;
}
smp_tlb_addr2 = addr2;
smp_tlb_pmap = pmap;
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index d905961..cecd92d 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -728,6 +728,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
*/
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
+ kernel_pmap->pm_cr3 = KPML4phys;
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
CPU_ZERO(&kernel_pmap->pm_save);
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
@@ -1049,8 +1050,7 @@ pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va)
cr3 = rcr3();
critical_enter();
- load_cr3(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | pmap->pm_pcid |
- CR3_PCID_SAVE);
+ load_cr3(pmap->pm_cr3 | CR3_PCID_SAVE);
invlpg(va);
load_cr3(cr3 | CR3_PCID_SAVE);
critical_exit();
@@ -1137,8 +1137,7 @@ pmap_invalidate_range_pcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
cr3 = rcr3();
critical_enter();
- load_cr3(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | pmap->pm_pcid |
- CR3_PCID_SAVE);
+ load_cr3(pmap->pm_cr3 | CR3_PCID_SAVE);
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
load_cr3(cr3 | CR3_PCID_SAVE);
@@ -1239,8 +1238,7 @@ pmap_invalidate_all(pmap_t pmap)
* Bit 63 is clear, pcid TLB
* entries are invalidated.
*/
- load_cr3(DMAP_TO_PHYS((vm_offset_t)
- pmap->pm_pml4) | pmap->pm_pcid);
+ load_cr3(pmap->pm_cr3);
load_cr3(cr3 | CR3_PCID_SAVE);
critical_exit();
}
@@ -1862,6 +1860,7 @@ pmap_pinit0(pmap_t pmap)
PMAP_LOCK_INIT(pmap);
pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
+ pmap->pm_cr3 = KPML4phys;
pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
CPU_ZERO(&pmap->pm_save);
@@ -1869,7 +1868,6 @@ pmap_pinit0(pmap_t pmap)
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
pmap->pm_pcid = pmap_pcid_enabled ? 0 : -1;
- CPU_ZERO(&pmap->pm_save);
}
/*
@@ -1889,7 +1887,8 @@ pmap_pinit(pmap_t pmap)
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
VM_WAIT;
- pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg));
+ pmap->pm_cr3 = VM_PAGE_TO_PHYS(pml4pg);
+ pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pmap->pm_cr3);
if ((pml4pg->flags & PG_ZERO) == 0)
pagezero(pmap->pm_pml4);
@@ -1911,7 +1910,13 @@ pmap_pinit(pmap_t pmap)
CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
- pmap->pm_pcid = pmap_pcid_enabled ? alloc_unr(&pcid_unr) : -1;
+ if (pmap_pcid_enabled) {
+ pmap->pm_pcid = alloc_unr(&pcid_unr);
+ if (pmap->pm_pcid != -1)
+ pmap->pm_cr3 |= pmap->pm_pcid;
+ } else {
+ pmap->pm_pcid = -1;
+ }
CPU_ZERO(&pmap->pm_save);
return (1);
@@ -5936,7 +5941,6 @@ pmap_activate(struct thread *td)
{
pmap_t pmap, oldpmap;
u_int cpuid;
- u_int64_t cr3;
critical_enter();
pmap = vmspace_pmap(td->td_proc->p_vmspace);
@@ -5951,11 +5955,8 @@ pmap_activate(struct thread *td)
CPU_SET(cpuid, &pmap->pm_active);
CPU_SET(cpuid, &pmap->pm_save);
#endif
- cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4);
- if (pmap->pm_pcid != -1)
- cr3 |= pmap->pm_pcid;
- td->td_pcb->pcb_cr3 = cr3;
- load_cr3(cr3);
+ td->td_pcb->pcb_cr3 = pmap->pm_cr3;
+ load_cr3(pmap->pm_cr3);
PCPU_SET(curpmap, pmap);
critical_exit();
}
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 3764f72..3e961e9 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -220,9 +220,7 @@ cpu_fork(td1, p2, td2, flags)
* return address on stack. These are the kernel mode register values.
*/
pmap2 = vmspace_pmap(p2->p_vmspace);
- pcb2->pcb_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap2->pm_pml4);
- if (pmap2->pm_pcid != -1)
- pcb2->pcb_cr3 |= pmap2->pm_pcid;
+ pcb2->pcb_cr3 = pmap2->pm_cr3;
pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */
pcb2->pcb_rbp = 0;
pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *);
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index 0e11975..387df1a 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -68,7 +68,6 @@
/* Pointer to the CPU TSS descriptor */ \
struct system_segment_descriptor *pc_tss; \
uint64_t pc_pm_save_cnt; \
- char pc_invpcid_descr[16]; \
u_int pc_cmci_mask; /* MCx banks for CMCI */ \
uint64_t pc_dbreg[16]; /* ddb debugging regs */ \
int pc_dbreg_cmd; /* ddb debugging reg cmd */ \
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index fa42389..b570cb7 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -238,6 +238,7 @@ struct md_page {
struct pmap {
struct mtx pm_mtx;
pml4_entry_t *pm_pml4; /* KVA of level 4 page table */
+ uint64_t pm_cr3;
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
cpuset_t pm_active; /* active on cpus */
cpuset_t pm_save; /* Context valid on cpus mask */
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index d6cd476..d1b366b 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -45,7 +45,9 @@ extern u_long *ipi_rendezvous_counts[MAXCPU];
/* IPI handlers */
inthand_t
+ IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid enabled */
IDTVEC(invltlb), /* TLB shootdowns - global */
+ IDTVEC(invlpg_pcid), /* TLB shootdowns - 1 page, pcid enabled */
IDTVEC(invlpg), /* TLB shootdowns - 1 page */
IDTVEC(invlrng), /* TLB shootdowns - page range */
IDTVEC(invlcache), /* Write back and invalidate cache */
OpenPOWER on IntegriCloud