summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2009-07-22 14:32:38 +0000
committerkib <kib@FreeBSD.org>2009-07-22 14:32:38 +0000
commitf8feb430b0a4639f63593e2ba2bb08a7aeae8ff8 (patch)
treec5c978d2492a738d39967d6c7d8761440339bfef /sys/amd64
parentce2a12d7ea97ed65c71fda5adaa5d168fbdaf9e3 (diff)
downloadFreeBSD-src-f8feb430b0a4639f63593e2ba2bb08a7aeae8ff8.zip
FreeBSD-src-f8feb430b0a4639f63593e2ba2bb08a7aeae8ff8.tar.gz
When the page caching attributes are changed, after new mapping is
established, OS shall flush the caches on all processors that may have used the mapping previously. This operation is not needed if processors support self-snooping. If not, but clflush instruction is implemented on the CPU, series of the clflush can be used on the mapping region. Otherwise, we have to flush the whole cache. The later operation is very expensive, and AMD-made CPUs do not have self-snooping. Implement cache flush for remapped region by using clflush for amd64, when supported by CPU. Proposed and reviewed by: alc Approved by: re (kensmith)
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/initcpu.c9
-rw-r--r--sys/amd64/amd64/pmap.c56
-rw-r--r--sys/amd64/include/cpufunc.h14
-rw-r--r--sys/amd64/include/md_var.h1
4 files changed, 66 insertions, 14 deletions
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index edf1a98..c293c1a 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -65,6 +65,7 @@ char cpu_vendor[20]; /* CPU Origin code */
u_int cpu_vendor_id; /* CPU vendor ID */
u_int cpu_fxsr; /* SSE enabled */
u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */
+u_int cpu_clflush_line_size = 32;
SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
&via_feature_rng, 0, "VIA C3/C7 RNG feature available in CPU");
@@ -156,4 +157,12 @@ initializecpu(void)
AMD64_CPU_FAMILY(cpu_id) == 0x6 &&
AMD64_CPU_MODEL(cpu_id) >= 0xf)
init_via();
+
+ /*
+ * CPUID with %eax = 1, %ebx returns
+ * Bits 15-8: CLFLUSH line size
+ * (Value * 8 = cache line size in bytes)
+ */
+ if ((cpu_feature & CPUID_CLFSH) != 0)
+ cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8;
}
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 265d77d..e6497c7 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -231,6 +231,7 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
@@ -921,6 +922,40 @@ pmap_invalidate_cache(void)
}
#endif /* !SMP */
+static void
+pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
+{
+
+ KASSERT((sva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: sva not page-aligned"));
+ KASSERT((eva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: eva not page-aligned"));
+
+ if (cpu_feature & CPUID_SS)
+ ; /* If "Self Snoop" is supported, do nothing. */
+ else if (cpu_feature & CPUID_CLFSH) {
+
+ /*
+ * Otherwise, do per-cache line flush. Use the mfence
+ * instruction to insure that previous stores are
+ * included in the write-back. The processor
+ * propagates flush to other processors in the cache
+ * coherence domain.
+ */
+ mfence();
+ for (; eva < sva; eva += cpu_clflush_line_size)
+ clflush(eva);
+ mfence();
+ } else {
+
+ /*
+ * No targeted cache flush methods are supported by CPU,
+ * globally invalidate cache as a last resort.
+ */
+ pmap_invalidate_cache();
+ }
+}
+
/*
* Are we current address space or kernel?
*/
@@ -4256,7 +4291,8 @@ pmap_pde_attr(pd_entry_t *pde, int cache_bits)
void *
pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
{
- vm_offset_t va, tmpva, offset;
+ vm_offset_t va, offset;
+ vm_size_t tmpsize;
/*
* If the specified range of physical addresses fits within the direct
@@ -4273,16 +4309,10 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
if (!va)
panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
pa = trunc_page(pa);
- for (tmpva = va; size > 0; ) {
- pmap_kenter_attr(tmpva, pa, mode);
- size -= PAGE_SIZE;
- tmpva += PAGE_SIZE;
- pa += PAGE_SIZE;
- }
- pmap_invalidate_range(kernel_pmap, va, tmpva);
- /* If "Self Snoop" is supported, do nothing. */
- if (!(cpu_feature & CPUID_SS))
- pmap_invalidate_cache();
+ for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
+ pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
+ pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
+ pmap_invalidate_cache_range(va, va + tmpsize);
return ((void *)(va + offset));
}
@@ -4624,9 +4654,7 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
*/
if (changed) {
pmap_invalidate_range(kernel_pmap, base, tmpva);
- /* If "Self Snoop" is supported, do nothing. */
- if (!(cpu_feature & CPUID_SS))
- pmap_invalidate_cache();
+ pmap_invalidate_cache_range(base, tmpva);
}
return (error);
}
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index 15fc0e9..eb264ae 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -100,6 +100,13 @@ bsrq(u_long mask)
}
static __inline void
+clflush(u_long addr)
+{
+
+ __asm __volatile("clflush %0" : : "m" (*(char *)addr));
+}
+
+static __inline void
disable_intr(void)
{
__asm __volatile("cli" : : : "memory");
@@ -267,6 +274,13 @@ outw(u_int port, u_short data)
}
static __inline void
+mfence(void)
+{
+
+ __asm__ __volatile("mfence" : : : "memory");
+}
+
+static __inline void
ia32_pause(void)
{
__asm __volatile("pause");
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index 892e19d..c66fc9f 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -47,6 +47,7 @@ extern u_int amd_feature2;
extern u_int amd_pminfo;
extern u_int via_feature_rng;
extern u_int via_feature_xcrypt;
+extern u_int cpu_clflush_line_size;
extern u_int cpu_fxsr;
extern u_int cpu_high;
extern u_int cpu_id;
OpenPOWER on IntegriCloud