diff options
Diffstat (limited to 'sys/amd64')
-rw-r--r-- | sys/amd64/amd64/initcpu.c | 13 | ||||
-rw-r--r-- | sys/amd64/amd64/pmap.c | 89 | ||||
-rw-r--r-- | sys/amd64/include/cpufunc.h | 7 | ||||
-rw-r--r-- | sys/amd64/linux/linux_sysvec.c | 15 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_dev.c | 2 |
5 files changed, 90 insertions, 36 deletions
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c index 66e465e..1b358bc 100644 --- a/sys/amd64/amd64/initcpu.c +++ b/sys/amd64/amd64/initcpu.c @@ -253,12 +253,17 @@ initializecpucache(void) * CPUID_SS feature even though the native CPU supports it. */ TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable); - if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) + if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) { cpu_feature &= ~CPUID_CLFSH; + cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; + } + /* - * Allow to disable CLFLUSH feature manually by - * hw.clflush_disable tunable. + * The kernel's use of CLFLUSH{,OPT} can be disabled manually + * by setting the hw.clflush_disable tunable. */ - if (hw_clflush_disable == 1) + if (hw_clflush_disable == 1) { cpu_feature &= ~CPUID_CLFSH; + cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; + } } diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index d7c5213..1f6aaf5 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -912,7 +912,12 @@ pmap_bootstrap(vm_paddr_t *firstaddr) virtual_avail = va; - /* Initialize the PAT MSR. */ + /* + * Initialize the PAT MSR. + * pmap_init_pat() clears and sets CR4_PGE, which, as a + * side-effect, invalidates stale PG_G TLB entries that might + * have been created in our pre-boot environment. + */ pmap_init_pat(); /* Initialize TLB Context Id. */ @@ -1789,9 +1794,8 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force) if ((cpu_feature & CPUID_SS) != 0 && !force) ; /* If "Self Snoop" is supported and allowed, do nothing. */ - else if ((cpu_feature & CPUID_CLFSH) != 0 && + else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 && eva - sva < PMAP_CLFLUSH_THRESHOLD) { - /* * XXX: Some CPUs fault, hang, or trash the local APIC * registers if we use CLFLUSH on the local APIC @@ -1802,16 +1806,29 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force) return; /* - * Otherwise, do per-cache line flush. Use the mfence + * Otherwise, do per-cache line flush. Use the sfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache * coherence domain. */ - mfence(); + sfence(); + for (; sva < eva; sva += cpu_clflush_line_size) + clflushopt(sva); + sfence(); + } else if ((cpu_feature & CPUID_CLFSH) != 0 && + eva - sva < PMAP_CLFLUSH_THRESHOLD) { + if (pmap_kextract(sva) == lapic_paddr) + return; + /* + * Writes are ordered by CLFLUSH on Intel CPUs. + */ + if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflush(sva); - mfence(); + if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); } else { /* @@ -1835,19 +1852,31 @@ pmap_invalidate_cache_pages(vm_page_t *pages, int count) { vm_offset_t daddr, eva; int i; + bool useclflushopt; + useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0; if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || - (cpu_feature & CPUID_CLFSH) == 0) + ((cpu_feature & CPUID_CLFSH) == 0 && !useclflushopt)) pmap_invalidate_cache(); else { - mfence(); + if (useclflushopt) + sfence(); + else if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); for (i = 0; i < count; i++) { daddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pages[i])); eva = daddr + PAGE_SIZE; - for (; daddr < eva; daddr += cpu_clflush_line_size) - clflush(daddr); + for (; daddr < eva; daddr += cpu_clflush_line_size) { + if (useclflushopt) + clflushopt(daddr); + else + clflush(daddr); + } } - mfence(); + if (useclflushopt) + sfence(); + else if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); } } @@ -3348,6 +3377,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_paddr_t mptepa; vm_page_t mpte; struct spglist free; + vm_offset_t sva; int PG_PTE_CACHE; PG_G = pmap_global_bit(pmap); @@ -3386,9 +3416,9 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { SLIST_INIT(&free); - pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free, - lockp); - pmap_invalidate_page(pmap, trunc_2mpage(va)); + sva = trunc_2mpage(va); + pmap_remove_pde(pmap, pde, sva, &free, lockp); + pmap_invalidate_range(pmap, sva, sva + NBPDR - 1); pmap_free_zero_pages(&free); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" " in pmap %p", va, pmap); @@ -3531,11 +3561,23 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; /* - * Machines that don't support invlpg, also don't support - * PG_G. + * When workaround_erratum383 is false, a promotion to a 2M + * page mapping does not invalidate the 512 4K page mappings + * from the TLB. Consequently, at this point, the TLB may + * hold both 4K and 2M page mappings. Therefore, the entire + * range of addresses must be invalidated here. In contrast, + * when workaround_erratum383 is true, a promotion does + * invalidate the 512 4K page mappings, and so a single INVLPG + * suffices to invalidate the 2M page mapping. */ - if (oldpde & PG_G) - pmap_invalidate_page(kernel_pmap, sva); + if ((oldpde & PG_G) != 0) { + if (workaround_erratum383) + pmap_invalidate_page(kernel_pmap, sva); + else + pmap_invalidate_range(kernel_pmap, sva, + sva + NBPDR - 1); + } + pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); if (oldpde & PG_MANAGED) { CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, oldpde & PG_PS_FRAME); @@ -3890,9 +3932,14 @@ retry: if (newpde != oldpde) { if (!atomic_cmpset_long(pde, oldpde, newpde)) goto retry; - if (oldpde & PG_G) - pmap_invalidate_page(pmap, sva); - else + if (oldpde & PG_G) { + /* See pmap_remove_pde() for explanation. */ + if (workaround_erratum383) + pmap_invalidate_page(kernel_pmap, sva); + else + pmap_invalidate_range(kernel_pmap, sva, + sva + NBPDR - 1); + } else anychanged = TRUE; } return (anychanged); diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index a3d82e8..dacfe32 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -327,6 +327,13 @@ mfence(void) } static __inline void +sfence(void) +{ + + __asm __volatile("sfence" : : : "memory"); +} + +static __inline void ia32_pause(void) { __asm __volatile("pause"); diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c index 13f9199..dba213b 100644 --- a/sys/amd64/linux/linux_sysvec.c +++ b/sys/amd64/linux/linux_sysvec.c @@ -718,7 +718,7 @@ exec_linux_imgact_try(struct image_params *imgp) { const char *head = (const char *)imgp->image_header; char *rpath; - int error = -1, len; + int error = -1; /* * The interpreter for shell scripts run from a linux binary needs @@ -736,17 +736,12 @@ exec_linux_imgact_try(struct image_params *imgp) linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); - if (rpath != NULL) { - len = strlen(rpath) + 1; - - if (len <= MAXSHELLCMDLEN) - memcpy(imgp->interpreter_name, - rpath, len); - free(rpath, M_TEMP); - } + if (rpath != NULL) + imgp->args->fname_buf = + imgp->interpreter_name = rpath; } } - return(error); + return (error); } #define LINUX_VSYSCALL_START (-10UL << 20) diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 5cb4150..53a8bdc 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -258,7 +258,7 @@ alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) if (VM_MEMSEG_NAME(mseg)) { sysmem = false; name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK); - error = copystr(VM_MEMSEG_NAME(mseg), name, SPECNAMELEN + 1, 0); + error = copystr(mseg->name, name, SPECNAMELEN + 1, 0); if (error) goto done; } |