From 27026f4f2afc8cf9ffb8d965b2b70a91fd318239 Mon Sep 17 00:00:00 2001 From: royger Date: Tue, 11 Mar 2014 10:03:29 +0000 Subject: amd64/i386: switch IPI handlers to C code. Move asm IPIs handlers to C code, so both Xen and native IPI handlers share the same code. Reviewed by: jhb Approved by: gibbs Sponsored by: Citrix Systems R&D amd64/amd64/apic_vector.S: i386/i386/apic_vector.s: - Remove asm coded IPI handlers and instead call the newly introduced C variants. amd64/amd64/mp_machdep.c: i386/i386/mp_machdep.c: - Add C coded clones to the asm IPI handlers (moved from x86/xen/hvm.c). i386/include/smp.h: amd64/include/smp.h: - Add prototypes for the C IPI handlers. x86/xen/hvm.c: - Move the C IPI handlers to mp_machdep and call those in the Xen IPI handlers. i386/xen/mp_machdep.c: - Add dummy IPI handlers to the i386 Xen PV port (this port doesn't support SMP). --- sys/amd64/amd64/apic_vector.S | 252 +++--------------------------------------- sys/amd64/amd64/mp_machdep.c | 170 ++++++++++++++++++++++++++++ sys/amd64/include/smp.h | 6 + 3 files changed, 191 insertions(+), 237 deletions(-) (limited to 'sys/amd64') diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index d002b4d..4c87b4e 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -159,101 +159,26 @@ IDTVEC(xen_intr_upcall) #define NAKE_INTR_CS 24 SUPERALIGN_TEXT -global_invltlb: - movq %cr4,%rax - andq $~0x80,%rax /* PGE */ - movq %rax,%cr4 - orq $0x80,%rax - movq %rax,%cr4 -invltlb_ret_clear_pm_save: - movq smp_tlb_pmap,%rdx - testq %rdx,%rdx - jz invltlb_ret_rdx - testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp) - jz 1f - swapgs -1: - movl PCPU(CPUID),%eax - jz 2f - swapgs -2: - LK btcl %eax,PM_SAVE(%rdx) - SUPERALIGN_TEXT -invltlb_ret_rdx: - popq %rdx -invltlb_ret_rax: +invltlb_ret: movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ - LK incl smp_tlb_wait - popq %rax + POP_FRAME jmp doreti_iret SUPERALIGN_TEXT IDTVEC(invltlb_pcid) -#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) PUSH_FRAME - movl PCPU(CPUID), %eax -#ifdef COUNT_XINVLTLB_HITS - incl xhits_gbl(,%rax,4) -#endif -#ifdef COUNT_IPIS - movq ipi_invltlb_counts(,%rax,8),%rax - incq (%rax) -#endif - POP_FRAME -#endif - pushq %rax - pushq %rdx - - movq %cr3,%rax - - movq $smp_tlb_invpcid,%rdx - cmpl $0,(%rdx) - je global_invltlb - cmpl $-1,(%rdx) - je global_invltlb - - /* - * Only invalidate TLB for entries with current PCID. - */ - cmpl $0,invpcid_works - je 1f - /* Use invpcid if available. */ - movl $1,%eax /* INVPCID_CTX */ - /* invpcid (%rdx),%rax */ - .byte 0x66,0x0f,0x38,0x82,0x02 - jmp invltlb_ret_clear_pm_save -1: - /* Otherwise reload %cr3 twice. */ - movq pcid_cr3,%rdx - cmpq %rax,%rdx - je 2f - movq %rdx,%cr3 /* Invalidate, bit 63 is zero. */ - btsq $63,%rax -2: - movq %rax,%cr3 - jmp invltlb_ret_clear_pm_save + call invltlb_pcid_handler + jmp invltlb_ret + SUPERALIGN_TEXT IDTVEC(invltlb) -#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) PUSH_FRAME - movl PCPU(CPUID), %eax -#ifdef COUNT_XINVLTLB_HITS - incl xhits_gbl(,%rax,4) -#endif -#ifdef COUNT_IPIS - movq ipi_invltlb_counts(,%rax,8),%rax - incq (%rax) -#endif - POP_FRAME -#endif - pushq %rax - movq %cr3, %rax /* invalidate the TLB */ - movq %rax, %cr3 - jmp invltlb_ret_rax + call invltlb_handler + jmp invltlb_ret /* * Single page TLB shootdown @@ -261,86 +186,17 @@ IDTVEC(invltlb) .text SUPERALIGN_TEXT IDTVEC(invlpg_pcid) -#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) PUSH_FRAME - movl PCPU(CPUID), %eax -#ifdef COUNT_XINVLTLB_HITS - incl xhits_pg(,%rax,4) -#endif -#ifdef COUNT_IPIS - movq ipi_invlpg_counts(,%rax,8),%rax - incq (%rax) -#endif - POP_FRAME -#endif - pushq %rax - pushq %rdx - movq $smp_tlb_invpcid,%rdx - cmpl $0,invpcid_works - jne 2f - - /* kernel pmap - use invlpg to invalidate global mapping */ - cmpl $0,(%rdx) - je 3f - cmpl $-1,(%rdx) - je global_invltlb - - /* - * PCID supported, but INVPCID is not. - * Temporarily switch to the target address space and do INVLPG. - */ - pushq %rcx - movq %cr3,%rcx - movq pcid_cr3,%rax - cmp %rcx,%rax - je 1f - btsq $63,%rax - movq %rax,%cr3 -1: movq 8(%rdx),%rax - invlpg (%rax) - btsq $63,%rcx - movq %rcx,%cr3 - popq %rcx - jmp invltlb_ret_rdx - - /* - * Invalidate the TLB entry using INVPCID_ADDR. - */ -2: - xorl %eax,%eax -/* invpcid (%rdx),%rax */ - .byte 0x66,0x0f,0x38,0x82,0x02 - jmp invltlb_ret_rdx - - /* - * PCID is not supported or kernel pmap. - * Invalidate single page using INVLPG. - */ -3: - movq 8(%rdx),%rax - invlpg (%rax) - jmp invltlb_ret_rdx + call invlpg_pcid_handler + jmp invltlb_ret SUPERALIGN_TEXT IDTVEC(invlpg) -#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) PUSH_FRAME - movl PCPU(CPUID), %eax -#ifdef COUNT_XINVLTLB_HITS - incl xhits_pg(,%rax,4) -#endif -#ifdef COUNT_IPIS - movq ipi_invlpg_counts(,%rax,8),%rax - incq (%rax) -#endif - POP_FRAME -#endif - pushq %rax - movq smp_tlb_invpcid+8,%rax - invlpg (%rax) /* invalidate single page */ - jmp invltlb_ret_rax + call invlpg_handler + jmp invltlb_ret /* * Page range TLB shootdown. @@ -348,81 +204,10 @@ IDTVEC(invlpg) .text SUPERALIGN_TEXT IDTVEC(invlrng) -#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) PUSH_FRAME - movl PCPU(CPUID), %eax -#ifdef COUNT_XINVLTLB_HITS - incl xhits_rng(,%rax,4) -#endif -#ifdef COUNT_IPIS - movq ipi_invlrng_counts(,%rax,8),%rax - incq (%rax) -#endif - POP_FRAME -#endif - pushq %rax - pushq %rdx - movq $smp_tlb_invpcid,%rdx - cmpl $0,pmap_pcid_enabled - je invlrng_single_page - - /* kernel pmap - use invlpg to invalidate global mapping */ - cmpl $0,(%rdx) - je invlrng_single_page - cmpl $-1,(%rdx) - je global_invltlb - cmpl $0,invpcid_works - jne invlrng_invpcid - - pushq %rcx - movq %cr3,%rcx - movq pcid_cr3,%rax - cmpq %rcx,%rax - je 1f - btsq $63,%rax - movq %rax,%cr3 -1: - movq 8(%rdx),%rdx - movq smp_tlb_addr2,%rax -2: - invlpg (%rdx) - addq $PAGE_SIZE,%rdx - cmpq %rax,%rdx - jb 2b - btsq $63,%rcx - movq %rcx,%cr3 - popq %rcx - jmp invltlb_ret_rdx - -invlrng_invpcid: - pushq %rcx - subq $16,%rsp - movq (%rdx),%rcx - movq %rcx,(%rsp) - movq 8(%rdx),%rax - movq %rax,8(%rsp) - movq smp_tlb_addr2,%rcx - subq %rax,%rcx - shrq $PAGE_SHIFT,%rcx -1: -// invpcid (%rdx),%rax - .byte 0x66,0x0f,0x38,0x82,0x02 - addq $PAGE_SIZE,8(%rsp) - dec %rcx - jne 1b - addq $16,%rsp - popq %rcx - jmp invltlb_ret_rdx - -invlrng_single_page: - movq 8(%rdx),%rdx - movq smp_tlb_addr2,%rax -1: invlpg (%rdx) /* invalidate single page */ - addq $PAGE_SIZE,%rdx - cmpq %rax,%rdx - jb 1b - jmp invltlb_ret_rdx + call invlrng_handler + jmp invltlb_ret /* * Invalidate cache. @@ -430,17 +215,10 @@ invlrng_single_page: .text SUPERALIGN_TEXT IDTVEC(invlcache) -#ifdef COUNT_IPIS PUSH_FRAME - movl PCPU(CPUID), %eax - movq ipi_invlcache_counts(,%rax,8),%rax - incq (%rax) - POP_FRAME -#endif - pushq %rax - wbinvd - jmp invltlb_ret_rax + call invlcache_handler + jmp invltlb_ret /* * Handler for IPIs sent via the per-cpu IPI bitmap. diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 1b38f60..9f03e4c 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -109,6 +109,7 @@ struct invpcid_descr smp_tlb_invpcid; volatile int smp_tlb_wait; uint64_t pcid_cr3; pmap_t smp_tlb_pmap; +extern int invpcid_works; #ifdef COUNT_IPIS /* Interrupt counts. */ @@ -1496,6 +1497,175 @@ cpususpend_handler(void) } /* + * Handlers for TLB related IPIs + */ +void +invltlb_handler(void) +{ +#ifdef COUNT_XINVLTLB_HITS + xhits_gbl[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + invltlb(); + atomic_add_int(&smp_tlb_wait, 1); +} + +void +invltlb_pcid_handler(void) +{ + uint64_t cr3; +#ifdef COUNT_XINVLTLB_HITS + xhits_gbl[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + cr3 = rcr3(); + if (smp_tlb_invpcid.pcid != (uint64_t)-1 && + smp_tlb_invpcid.pcid != 0) { + + if (invpcid_works) { + invpcid(&smp_tlb_invpcid, INVPCID_CTX); + } else { + /* Otherwise reload %cr3 twice. */ + if (cr3 != pcid_cr3) { + load_cr3(pcid_cr3); + cr3 |= CR3_PCID_SAVE; + } + load_cr3(cr3); + } + } else { + invltlb_globpcid(); + } + if (smp_tlb_pmap != NULL) + CPU_CLR_ATOMIC(PCPU_GET(cpuid), &smp_tlb_pmap->pm_save); + + atomic_add_int(&smp_tlb_wait, 1); +} + +void +invlpg_handler(void) +{ +#ifdef COUNT_XINVLTLB_HITS + xhits_pg[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + invlpg(smp_tlb_invpcid.addr); + atomic_add_int(&smp_tlb_wait, 1); +} + +void +invlpg_pcid_handler(void) +{ +#ifdef COUNT_XINVLTLB_HITS + xhits_pg[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + if (invpcid_works) { + invpcid(&smp_tlb_invpcid, INVPCID_ADDR); + } else if (smp_tlb_invpcid.pcid == 0) { + invlpg(smp_tlb_invpcid.addr); + } else if (smp_tlb_invpcid.pcid == (uint64_t)-1) { + invltlb_globpcid(); + } else { + uint64_t cr3; + + /* + * PCID supported, but INVPCID is not. + * Temporarily switch to the target address + * space and do INVLPG. + */ + cr3 = rcr3(); + if (cr3 != pcid_cr3) + load_cr3(pcid_cr3 | CR3_PCID_SAVE); + invlpg(smp_tlb_invpcid.addr); + load_cr3(cr3 | CR3_PCID_SAVE); + } + + atomic_add_int(&smp_tlb_wait, 1); +} + +static inline void +invlpg_range(vm_offset_t start, vm_offset_t end) +{ + + do { + invlpg(start); + start += PAGE_SIZE; + } while (start < end); +} + +void +invlrng_handler(void) +{ + vm_offset_t addr; +#ifdef COUNT_XINVLTLB_HITS + xhits_rng[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + addr = smp_tlb_invpcid.addr; + if (pmap_pcid_enabled) { + if (invpcid_works) { + struct invpcid_descr d; + + d = smp_tlb_invpcid; + do { + invpcid(&d, INVPCID_ADDR); + d.addr += PAGE_SIZE; + } while (d.addr < smp_tlb_addr2); + } else if (smp_tlb_invpcid.pcid == 0) { + /* + * kernel pmap - use invlpg to invalidate + * global mapping. + */ + invlpg_range(addr, smp_tlb_addr2); + } else if (smp_tlb_invpcid.pcid == (uint64_t)-1) { + invltlb_globpcid(); + if (smp_tlb_pmap != NULL) { + CPU_CLR_ATOMIC(PCPU_GET(cpuid), + &smp_tlb_pmap->pm_save); + } + } else { + uint64_t cr3; + + cr3 = rcr3(); + if (cr3 != pcid_cr3) + load_cr3(pcid_cr3 | CR3_PCID_SAVE); + invlpg_range(addr, smp_tlb_addr2); + load_cr3(cr3 | CR3_PCID_SAVE); + } + } else { + invlpg_range(addr, smp_tlb_addr2); + } + + atomic_add_int(&smp_tlb_wait, 1); +} + +void +invlcache_handler(void) +{ +#ifdef COUNT_IPIS + (*ipi_invlcache_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + wbinvd(); + atomic_add_int(&smp_tlb_wait, 1); +} + +/* * This is called once the rest of the system is up and running and we're * ready to let the AP's out of the pen. */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index 2be7da1..1b8a2cf 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -62,6 +62,12 @@ struct pmap; void cpu_add(u_int apic_id, char boot_cpu); void cpustop_handler(void); void cpususpend_handler(void); +void invltlb_handler(void); +void invltlb_pcid_handler(void); +void invlpg_handler(void); +void invlpg_pcid_handler(void); +void invlrng_handler(void); +void invlcache_handler(void); void init_secondary(void); void ipi_startup(int apic_id, int vector); void ipi_all_but_self(u_int ipi); -- cgit v1.1