summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/initcpu.c13
-rw-r--r--sys/amd64/amd64/pmap.c89
-rw-r--r--sys/amd64/include/cpufunc.h7
-rw-r--r--sys/amd64/linux/linux_sysvec.c15
-rw-r--r--sys/amd64/vmm/vmm_dev.c2
5 files changed, 90 insertions, 36 deletions
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index 66e465e..1b358bc 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -253,12 +253,17 @@ initializecpucache(void)
* CPUID_SS feature even though the native CPU supports it.
*/
TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable);
- if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1)
+ if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) {
cpu_feature &= ~CPUID_CLFSH;
+ cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+ }
+
/*
- * Allow to disable CLFLUSH feature manually by
- * hw.clflush_disable tunable.
+ * The kernel's use of CLFLUSH{,OPT} can be disabled manually
+ * by setting the hw.clflush_disable tunable.
*/
- if (hw_clflush_disable == 1)
+ if (hw_clflush_disable == 1) {
cpu_feature &= ~CPUID_CLFSH;
+ cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+ }
}
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index d7c5213..1f6aaf5 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -912,7 +912,12 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
virtual_avail = va;
- /* Initialize the PAT MSR. */
+ /*
+ * Initialize the PAT MSR.
+ * pmap_init_pat() clears and sets CR4_PGE, which, as a
+ * side-effect, invalidates stale PG_G TLB entries that might
+ * have been created in our pre-boot environment.
+ */
pmap_init_pat();
/* Initialize TLB Context Id. */
@@ -1789,9 +1794,8 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
if ((cpu_feature & CPUID_SS) != 0 && !force)
; /* If "Self Snoop" is supported and allowed, do nothing. */
- else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+ else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-
/*
* XXX: Some CPUs fault, hang, or trash the local APIC
* registers if we use CLFLUSH on the local APIC
@@ -1802,16 +1806,29 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
return;
/*
- * Otherwise, do per-cache line flush. Use the mfence
+ * Otherwise, do per-cache line flush. Use the sfence
* instruction to insure that previous stores are
* included in the write-back. The processor
* propagates flush to other processors in the cache
* coherence domain.
*/
- mfence();
+ sfence();
+ for (; sva < eva; sva += cpu_clflush_line_size)
+ clflushopt(sva);
+ sfence();
+ } else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+ eva - sva < PMAP_CLFLUSH_THRESHOLD) {
+ if (pmap_kextract(sva) == lapic_paddr)
+ return;
+ /*
+ * Writes are ordered by CLFLUSH on Intel CPUs.
+ */
+ if (cpu_vendor_id != CPU_VENDOR_INTEL)
+ mfence();
for (; sva < eva; sva += cpu_clflush_line_size)
clflush(sva);
- mfence();
+ if (cpu_vendor_id != CPU_VENDOR_INTEL)
+ mfence();
} else {
/*
@@ -1835,19 +1852,31 @@ pmap_invalidate_cache_pages(vm_page_t *pages, int count)
{
vm_offset_t daddr, eva;
int i;
+ bool useclflushopt;
+ useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0;
if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
- (cpu_feature & CPUID_CLFSH) == 0)
+ ((cpu_feature & CPUID_CLFSH) == 0 && !useclflushopt))
pmap_invalidate_cache();
else {
- mfence();
+ if (useclflushopt)
+ sfence();
+ else if (cpu_vendor_id != CPU_VENDOR_INTEL)
+ mfence();
for (i = 0; i < count; i++) {
daddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pages[i]));
eva = daddr + PAGE_SIZE;
- for (; daddr < eva; daddr += cpu_clflush_line_size)
- clflush(daddr);
+ for (; daddr < eva; daddr += cpu_clflush_line_size) {
+ if (useclflushopt)
+ clflushopt(daddr);
+ else
+ clflush(daddr);
+ }
}
- mfence();
+ if (useclflushopt)
+ sfence();
+ else if (cpu_vendor_id != CPU_VENDOR_INTEL)
+ mfence();
}
}
@@ -3348,6 +3377,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
vm_paddr_t mptepa;
vm_page_t mpte;
struct spglist free;
+ vm_offset_t sva;
int PG_PTE_CACHE;
PG_G = pmap_global_bit(pmap);
@@ -3386,9 +3416,9 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
SLIST_INIT(&free);
- pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free,
- lockp);
- pmap_invalidate_page(pmap, trunc_2mpage(va));
+ sva = trunc_2mpage(va);
+ pmap_remove_pde(pmap, pde, sva, &free, lockp);
+ pmap_invalidate_range(pmap, sva, sva + NBPDR - 1);
pmap_free_zero_pages(&free);
CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
@@ -3531,11 +3561,23 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
/*
- * Machines that don't support invlpg, also don't support
- * PG_G.
+ * When workaround_erratum383 is false, a promotion to a 2M
+ * page mapping does not invalidate the 512 4K page mappings
+ * from the TLB. Consequently, at this point, the TLB may
+ * hold both 4K and 2M page mappings. Therefore, the entire
+ * range of addresses must be invalidated here. In contrast,
+ * when workaround_erratum383 is true, a promotion does
+ * invalidate the 512 4K page mappings, and so a single INVLPG
+ * suffices to invalidate the 2M page mapping.
*/
- if (oldpde & PG_G)
- pmap_invalidate_page(kernel_pmap, sva);
+ if ((oldpde & PG_G) != 0) {
+ if (workaround_erratum383)
+ pmap_invalidate_page(kernel_pmap, sva);
+ else
+ pmap_invalidate_range(kernel_pmap, sva,
+ sva + NBPDR - 1);
+ }
+
pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE);
if (oldpde & PG_MANAGED) {
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, oldpde & PG_PS_FRAME);
@@ -3890,9 +3932,14 @@ retry:
if (newpde != oldpde) {
if (!atomic_cmpset_long(pde, oldpde, newpde))
goto retry;
- if (oldpde & PG_G)
- pmap_invalidate_page(pmap, sva);
- else
+ if (oldpde & PG_G) {
+ /* See pmap_remove_pde() for explanation. */
+ if (workaround_erratum383)
+ pmap_invalidate_page(kernel_pmap, sva);
+ else
+ pmap_invalidate_range(kernel_pmap, sva,
+ sva + NBPDR - 1);
+ } else
anychanged = TRUE;
}
return (anychanged);
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index a3d82e8..dacfe32 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -327,6 +327,13 @@ mfence(void)
}
static __inline void
+sfence(void)
+{
+
+ __asm __volatile("sfence" : : : "memory");
+}
+
+static __inline void
ia32_pause(void)
{
__asm __volatile("pause");
diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c
index 13f9199..dba213b 100644
--- a/sys/amd64/linux/linux_sysvec.c
+++ b/sys/amd64/linux/linux_sysvec.c
@@ -718,7 +718,7 @@ exec_linux_imgact_try(struct image_params *imgp)
{
const char *head = (const char *)imgp->image_header;
char *rpath;
- int error = -1, len;
+ int error = -1;
/*
* The interpreter for shell scripts run from a linux binary needs
@@ -736,17 +736,12 @@ exec_linux_imgact_try(struct image_params *imgp)
linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
imgp->interpreter_name, UIO_SYSSPACE,
&rpath, 0, AT_FDCWD);
- if (rpath != NULL) {
- len = strlen(rpath) + 1;
-
- if (len <= MAXSHELLCMDLEN)
- memcpy(imgp->interpreter_name,
- rpath, len);
- free(rpath, M_TEMP);
- }
+ if (rpath != NULL)
+ imgp->args->fname_buf =
+ imgp->interpreter_name = rpath;
}
}
- return(error);
+ return (error);
}
#define LINUX_VSYSCALL_START (-10UL << 20)
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
index 5cb4150..53a8bdc 100644
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -258,7 +258,7 @@ alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
if (VM_MEMSEG_NAME(mseg)) {
sysmem = false;
name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK);
- error = copystr(VM_MEMSEG_NAME(mseg), name, SPECNAMELEN + 1, 0);
+ error = copystr(mseg->name, name, SPECNAMELEN + 1, 0);
if (error)
goto done;
}
OpenPOWER on IntegriCloud