diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/amd64/amd64/apic_vector.S | 110 | ||||
-rw-r--r-- | sys/amd64/amd64/genassym.c | 1 | ||||
-rw-r--r-- | sys/amd64/amd64/mp_machdep.c | 21 | ||||
-rw-r--r-- | sys/amd64/amd64/pmap.c | 31 | ||||
-rw-r--r-- | sys/amd64/amd64/vm_machdep.c | 4 | ||||
-rw-r--r-- | sys/amd64/include/pcpu.h | 1 | ||||
-rw-r--r-- | sys/amd64/include/pmap.h | 1 | ||||
-rw-r--r-- | sys/amd64/include/smp.h | 2 |
8 files changed, 98 insertions, 73 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index 79ec5ed..d002b4d 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -168,7 +168,7 @@ global_invltlb: invltlb_ret_clear_pm_save: movq smp_tlb_pmap,%rdx testq %rdx,%rdx - jz invltlb_ret + jz invltlb_ret_rdx testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp) jz 1f swapgs @@ -179,16 +179,17 @@ invltlb_ret_clear_pm_save: 2: LK btcl %eax,PM_SAVE(%rdx) SUPERALIGN_TEXT -invltlb_ret: +invltlb_ret_rdx: + popq %rdx +invltlb_ret_rax: movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ LK incl smp_tlb_wait - popq %rdx popq %rax jmp doreti_iret SUPERALIGN_TEXT -IDTVEC(invltlb) +IDTVEC(invltlb_pcid) #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) PUSH_FRAME movl PCPU(CPUID), %eax @@ -206,8 +207,6 @@ IDTVEC(invltlb) pushq %rdx movq %cr3,%rax - cmpl $0,pmap_pcid_enabled - je 2f movq $smp_tlb_invpcid,%rdx cmpl $0,(%rdx) @@ -216,8 +215,7 @@ IDTVEC(invltlb) je global_invltlb /* - * Non-zero smp_tlb_invpcid, only invalidate TLB for entries with - * current PCID. + * Only invalidate TLB for entries with current PCID. */ cmpl $0,invpcid_works je 1f @@ -233,21 +231,36 @@ IDTVEC(invltlb) je 2f movq %rdx,%cr3 /* Invalidate, bit 63 is zero. */ btsq $63,%rax - - /* - * Invalidate the TLB if PCID is not enabled. - * Restore the old address space. - */ 2: movq %rax,%cr3 jmp invltlb_ret_clear_pm_save + SUPERALIGN_TEXT +IDTVEC(invltlb) +#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) + PUSH_FRAME + movl PCPU(CPUID), %eax +#ifdef COUNT_XINVLTLB_HITS + incl xhits_gbl(,%rax,4) +#endif +#ifdef COUNT_IPIS + movq ipi_invltlb_counts(,%rax,8),%rax + incq (%rax) +#endif + POP_FRAME +#endif + + pushq %rax + movq %cr3, %rax /* invalidate the TLB */ + movq %rax, %cr3 + jmp invltlb_ret_rax + /* * Single page TLB shootdown */ .text SUPERALIGN_TEXT -IDTVEC(invlpg) +IDTVEC(invlpg_pcid) #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) PUSH_FRAME movl PCPU(CPUID), %eax @@ -264,8 +277,6 @@ IDTVEC(invlpg) pushq %rax pushq %rdx movq $smp_tlb_invpcid,%rdx - cmpl $0,pmap_pcid_enabled - je 3f cmpl $0,invpcid_works jne 2f @@ -291,7 +302,7 @@ IDTVEC(invlpg) btsq $63,%rcx movq %rcx,%cr3 popq %rcx - jmp invltlb_ret + jmp invltlb_ret_rdx /* * Invalidate the TLB entry using INVPCID_ADDR. @@ -300,7 +311,7 @@ IDTVEC(invlpg) xorl %eax,%eax /* invpcid (%rdx),%rax */ .byte 0x66,0x0f,0x38,0x82,0x02 - jmp invltlb_ret + jmp invltlb_ret_rdx /* * PCID is not supported or kernel pmap. @@ -309,7 +320,27 @@ IDTVEC(invlpg) 3: movq 8(%rdx),%rax invlpg (%rax) - jmp invltlb_ret + jmp invltlb_ret_rdx + + SUPERALIGN_TEXT +IDTVEC(invlpg) +#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) + PUSH_FRAME + movl PCPU(CPUID), %eax +#ifdef COUNT_XINVLTLB_HITS + incl xhits_pg(,%rax,4) +#endif +#ifdef COUNT_IPIS + movq ipi_invlpg_counts(,%rax,8),%rax + incq (%rax) +#endif + POP_FRAME +#endif + + pushq %rax + movq smp_tlb_invpcid+8,%rax + invlpg (%rax) /* invalidate single page */ + jmp invltlb_ret_rax /* * Page range TLB shootdown. @@ -334,15 +365,15 @@ IDTVEC(invlrng) pushq %rdx movq $smp_tlb_invpcid,%rdx cmpl $0,pmap_pcid_enabled - jne invlrng_single_page - cmpl $0,invpcid_works - jne invlrng_invpcid + je invlrng_single_page /* kernel pmap - use invlpg to invalidate global mapping */ cmpl $0,(%rdx) je invlrng_single_page cmpl $-1,(%rdx) je global_invltlb + cmpl $0,invpcid_works + jne invlrng_invpcid pushq %rcx movq %cr3,%rcx @@ -362,37 +393,27 @@ IDTVEC(invlrng) btsq $63,%rcx movq %rcx,%cr3 popq %rcx - jmp invltlb_ret + jmp invltlb_ret_rdx invlrng_invpcid: - testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp) - jz 1f - swapgs -1: pushq %rcx + subq $16,%rsp movq (%rdx),%rcx - movq %rcx,PCPU(INVPCID_DESCR) + movq %rcx,(%rsp) movq 8(%rdx),%rax - movq %rax,PCPU(INVPCID_DESCR)+8 + movq %rax,8(%rsp) movq smp_tlb_addr2,%rcx - xorl %eax,%eax - movq $PC_INVPCID_DESCR,%rdx - gs - subq 8(%rdx),%rcx + subq %rax,%rcx shrq $PAGE_SHIFT,%rcx -2: - gs +1: // invpcid (%rdx),%rax .byte 0x66,0x0f,0x38,0x82,0x02 - gs - addq $PAGE_SIZE,8(%rdx) + addq $PAGE_SIZE,8(%rsp) dec %rcx - jne 2b + jne 1b + addq $16,%rsp popq %rcx - testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp) - jz invltlb_ret - swapgs - jmp invltlb_ret + jmp invltlb_ret_rdx invlrng_single_page: movq 8(%rdx),%rdx @@ -401,7 +422,7 @@ invlrng_single_page: addq $PAGE_SIZE,%rdx cmpq %rax,%rdx jb 1b - jmp invltlb_ret + jmp invltlb_ret_rdx /* * Invalidate cache. @@ -418,9 +439,8 @@ IDTVEC(invlcache) #endif pushq %rax - pushq %rdx wbinvd - jmp invltlb_ret + jmp invltlb_ret_rax /* * Handler for IPIs sent via the per-cpu IPI bitmap. diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 62017e7..028a2cd 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -228,7 +228,6 @@ ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt)); ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp)); ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss)); ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt)); -ASSYM(PC_INVPCID_DESCR, offsetof(struct pcpu, pc_invpcid_descr)); ASSYM(LA_VER, offsetof(struct LAPIC, version)); ASSYM(LA_TPR, offsetof(struct LAPIC, tpr)); diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 530aa61..3addd43 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -127,6 +127,8 @@ static u_long *ipi_hardclock_counts[MAXCPU]; extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); +extern int pmap_pcid_enabled; + /* * Local data and functions. */ @@ -524,8 +526,15 @@ cpu_mp_start(void) } /* Install an inter-CPU IPI for TLB invalidation */ - setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0); - setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0); + if (pmap_pcid_enabled) { + setidt(IPI_INVLTLB, IDTVEC(invltlb_pcid), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IPI_INVLPG, IDTVEC(invlpg_pcid), SDT_SYSIGT, + SEL_KPL, 0); + } else { + setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0); + setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0); + } setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for cache invalidation. */ @@ -605,8 +614,6 @@ cpu_mp_announce(void) } } -extern int pmap_pcid_enabled; - /* * AP CPU's call this to initialize themselves. */ @@ -1141,8 +1148,7 @@ smp_tlb_shootdown(u_int vector, pmap_t pmap, vm_offset_t addr1, smp_tlb_invpcid.pcid = 0; } else { smp_tlb_invpcid.pcid = pmap->pm_pcid; - pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | - (pmap->pm_pcid == -1 ? 0 : pmap->pm_pcid); + pcid_cr3 = pmap->pm_cr3; } smp_tlb_addr2 = addr2; smp_tlb_pmap = pmap; @@ -1176,8 +1182,7 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap, smp_tlb_invpcid.pcid = 0; } else { smp_tlb_invpcid.pcid = pmap->pm_pcid; - pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | - (pmap->pm_pcid == -1 ? 0 : pmap->pm_pcid); + pcid_cr3 = pmap->pm_cr3; } smp_tlb_addr2 = addr2; smp_tlb_pmap = pmap; diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index d905961..cecd92d 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -728,6 +728,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) */ PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); + kernel_pmap->pm_cr3 = KPML4phys; CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ CPU_ZERO(&kernel_pmap->pm_save); TAILQ_INIT(&kernel_pmap->pm_pvchunk); @@ -1049,8 +1050,7 @@ pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va) cr3 = rcr3(); critical_enter(); - load_cr3(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | pmap->pm_pcid | - CR3_PCID_SAVE); + load_cr3(pmap->pm_cr3 | CR3_PCID_SAVE); invlpg(va); load_cr3(cr3 | CR3_PCID_SAVE); critical_exit(); @@ -1137,8 +1137,7 @@ pmap_invalidate_range_pcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) cr3 = rcr3(); critical_enter(); - load_cr3(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | pmap->pm_pcid | - CR3_PCID_SAVE); + load_cr3(pmap->pm_cr3 | CR3_PCID_SAVE); for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); load_cr3(cr3 | CR3_PCID_SAVE); @@ -1239,8 +1238,7 @@ pmap_invalidate_all(pmap_t pmap) * Bit 63 is clear, pcid TLB * entries are invalidated. */ - load_cr3(DMAP_TO_PHYS((vm_offset_t) - pmap->pm_pml4) | pmap->pm_pcid); + load_cr3(pmap->pm_cr3); load_cr3(cr3 | CR3_PCID_SAVE); critical_exit(); } @@ -1862,6 +1860,7 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); + pmap->pm_cr3 = KPML4phys; pmap->pm_root.rt_root = 0; CPU_ZERO(&pmap->pm_active); CPU_ZERO(&pmap->pm_save); @@ -1869,7 +1868,6 @@ pmap_pinit0(pmap_t pmap) TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); pmap->pm_pcid = pmap_pcid_enabled ? 0 : -1; - CPU_ZERO(&pmap->pm_save); } /* @@ -1889,7 +1887,8 @@ pmap_pinit(pmap_t pmap) VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) VM_WAIT; - pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); + pmap->pm_cr3 = VM_PAGE_TO_PHYS(pml4pg); + pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pmap->pm_cr3); if ((pml4pg->flags & PG_ZERO) == 0) pagezero(pmap->pm_pml4); @@ -1911,7 +1910,13 @@ pmap_pinit(pmap_t pmap) CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); - pmap->pm_pcid = pmap_pcid_enabled ? alloc_unr(&pcid_unr) : -1; + if (pmap_pcid_enabled) { + pmap->pm_pcid = alloc_unr(&pcid_unr); + if (pmap->pm_pcid != -1) + pmap->pm_cr3 |= pmap->pm_pcid; + } else { + pmap->pm_pcid = -1; + } CPU_ZERO(&pmap->pm_save); return (1); @@ -5936,7 +5941,6 @@ pmap_activate(struct thread *td) { pmap_t pmap, oldpmap; u_int cpuid; - u_int64_t cr3; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); @@ -5951,11 +5955,8 @@ pmap_activate(struct thread *td) CPU_SET(cpuid, &pmap->pm_active); CPU_SET(cpuid, &pmap->pm_save); #endif - cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4); - if (pmap->pm_pcid != -1) - cr3 |= pmap->pm_pcid; - td->td_pcb->pcb_cr3 = cr3; - load_cr3(cr3); + td->td_pcb->pcb_cr3 = pmap->pm_cr3; + load_cr3(pmap->pm_cr3); PCPU_SET(curpmap, pmap); critical_exit(); } diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 3764f72..3e961e9 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -220,9 +220,7 @@ cpu_fork(td1, p2, td2, flags) * return address on stack. These are the kernel mode register values. */ pmap2 = vmspace_pmap(p2->p_vmspace); - pcb2->pcb_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap2->pm_pml4); - if (pmap2->pm_pcid != -1) - pcb2->pcb_cr3 |= pmap2->pm_pcid; + pcb2->pcb_cr3 = pmap2->pm_cr3; pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */ pcb2->pcb_rbp = 0; pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *); diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index 0e11975..387df1a 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -68,7 +68,6 @@ /* Pointer to the CPU TSS descriptor */ \ struct system_segment_descriptor *pc_tss; \ uint64_t pc_pm_save_cnt; \ - char pc_invpcid_descr[16]; \ u_int pc_cmci_mask; /* MCx banks for CMCI */ \ uint64_t pc_dbreg[16]; /* ddb debugging regs */ \ int pc_dbreg_cmd; /* ddb debugging reg cmd */ \ diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index fa42389..b570cb7 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -238,6 +238,7 @@ struct md_page { struct pmap { struct mtx pm_mtx; pml4_entry_t *pm_pml4; /* KVA of level 4 page table */ + uint64_t pm_cr3; TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ cpuset_t pm_active; /* active on cpus */ cpuset_t pm_save; /* Context valid on cpus mask */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index d6cd476..d1b366b 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -45,7 +45,9 @@ extern u_long *ipi_rendezvous_counts[MAXCPU]; /* IPI handlers */ inthand_t + IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid enabled */ IDTVEC(invltlb), /* TLB shootdowns - global */ + IDTVEC(invlpg_pcid), /* TLB shootdowns - 1 page, pcid enabled */ IDTVEC(invlpg), /* TLB shootdowns - 1 page */ IDTVEC(invlrng), /* TLB shootdowns - page range */ IDTVEC(invlcache), /* Write back and invalidate cache */ |