summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2009-07-29 08:49:58 +0000
committerkib <kib@FreeBSD.org>2009-07-29 08:49:58 +0000
commit7b17971146d9d04f6fdfc56456eda87bddfb201f (patch)
treea9270d20c2533ced81403eaadb92f28a6d20d577
parent6b019307f44e0dbb389ad68649826526b790a7cc (diff)
downloadFreeBSD-src-7b17971146d9d04f6fdfc56456eda87bddfb201f.zip
FreeBSD-src-7b17971146d9d04f6fdfc56456eda87bddfb201f.tar.gz
As was done in r195820 for amd64, use clflush for flushing cache lines
when memory page caching attributes changed, and CPU does not support self-snoop, but implemented clflush, for i386. Take care of possible mappings of the page by sf buffer by utilizing the mapping for clflush, otherwise map the page transiently. Amd64 used direct map. Proposed and reviewed by: alc Approved by: re (kensmith)
-rw-r--r--sys/i386/i386/initcpu.c9
-rw-r--r--sys/i386/i386/pmap.c98
-rw-r--r--sys/i386/i386/vm_machdep.c38
-rw-r--r--sys/i386/include/cpufunc.h14
-rw-r--r--sys/i386/include/md_var.h1
-rw-r--r--sys/i386/include/pmap.h2
-rw-r--r--sys/i386/include/sf_buf.h2
7 files changed, 143 insertions, 21 deletions
diff --git a/sys/i386/i386/initcpu.c b/sys/i386/i386/initcpu.c
index e1c38ba..1ecff1c 100644
--- a/sys/i386/i386/initcpu.c
+++ b/sys/i386/i386/initcpu.c
@@ -91,6 +91,7 @@ u_int cpu_procinfo = 0; /* HyperThreading Info / Brand Index / CLFUSH */
u_int cpu_procinfo2 = 0; /* Multicore info */
char cpu_vendor[20] = ""; /* CPU Origin code */
u_int cpu_vendor_id = 0; /* CPU vendor ID */
+u_int cpu_clflush_line_size = 32;
SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
&via_feature_rng, 0, "VIA C3/C7 RNG feature available in CPU");
@@ -709,6 +710,14 @@ initializecpu(void)
}
enable_sse();
+ /*
+ * CPUID with %eax = 1, %ebx returns
+ * Bits 15-8: CLFLUSH line size
+ * (Value * 8 = cache line size in bytes)
+ */
+ if ((cpu_feature & CPUID_CLFSH) != 0)
+ cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8;
+
#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
/*
* OS should flush L1 cache by itself because no PC-98 supports
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index c8d6025..c6f1d5f 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -119,6 +119,7 @@ __FBSDID("$FreeBSD$");
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/sf_buf.h>
#include <sys/sx.h>
#include <sys/vmmeter.h>
#include <sys/sched.h>
@@ -732,7 +733,7 @@ SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD,
* Determine the appropriate bits to set in a PTE or PDE for a specified
* caching mode.
*/
-static int
+int
pmap_cache_bits(int mode, boolean_t is_pde)
{
int pat_flag, pat_index, cache_bits;
@@ -945,6 +946,40 @@ pmap_invalidate_cache(void)
}
#endif /* !SMP */
+void
+pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
+{
+
+ KASSERT((sva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: sva not page-aligned"));
+ KASSERT((eva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: eva not page-aligned"));
+
+ if (cpu_feature & CPUID_SS)
+ ; /* If "Self Snoop" is supported, do nothing. */
+ else if (cpu_feature & CPUID_CLFSH) {
+
+ /*
+ * Otherwise, do per-cache line flush. Use the mfence
+ * instruction to insure that previous stores are
+ * included in the write-back. The processor
+ * propagates flush to other processors in the cache
+ * coherence domain.
+ */
+ mfence();
+ for (; eva < sva; eva += cpu_clflush_line_size)
+ clflush(eva);
+ mfence();
+ } else {
+
+ /*
+ * No targeted cache flush methods are supported by CPU,
+ * globally invalidate cache as a last resort.
+ */
+ pmap_invalidate_cache();
+ }
+}
+
/*
* Are we current address space or kernel? N.B. We return FALSE when
* a pmap's page table is in use because a kernel thread is borrowing
@@ -4400,7 +4435,8 @@ pmap_clear_reference(vm_page_t m)
void *
pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
{
- vm_offset_t va, tmpva, offset;
+ vm_offset_t va, offset;
+ vm_size_t tmpsize;
offset = pa & PAGE_MASK;
size = roundup(offset + size, PAGE_SIZE);
@@ -4413,16 +4449,10 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
if (!va)
panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
- for (tmpva = va; size > 0; ) {
- pmap_kenter_attr(tmpva, pa, mode);
- size -= PAGE_SIZE;
- tmpva += PAGE_SIZE;
- pa += PAGE_SIZE;
- }
- pmap_invalidate_range(kernel_pmap, va, tmpva);
- /* If "Self Snoop" is supported, do nothing. */
- if (!(cpu_feature & CPUID_SS))
- pmap_invalidate_cache();
+ for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
+ pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
+ pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
+ pmap_invalidate_cache_range(va, va + size);
return ((void *)(va + offset));
}
@@ -4462,16 +4492,48 @@ pmap_unmapdev(vm_offset_t va, vm_size_t size)
void
pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
{
+ struct sysmaps *sysmaps;
+ vm_offset_t sva, eva;
m->md.pat_mode = ma;
+ if ((m->flags & PG_FICTITIOUS) != 0)
+ return;
/*
* If "m" is a normal page, flush it from the cache.
+ * See pmap_invalidate_cache_range().
+ *
+ * First, try to find an existing mapping of the page by sf
+ * buffer. sf_buf_invalidate_cache() modifies mapping and
+ * flushes the cache.
*/
- if ((m->flags & PG_FICTITIOUS) == 0) {
- /* If "Self Snoop" is supported, do nothing. */
- if (!(cpu_feature & CPUID_SS))
- pmap_invalidate_cache();
+ if (sf_buf_invalidate_cache(m))
+ return;
+
+ /*
+ * If page is not mapped by sf buffer, but CPU does not
+ * support self snoop, map the page transient and do
+ * invalidation. In the worst case, whole cache is flushed by
+ * pmap_invalidate_cache_range().
+ */
+ if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) {
+ sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
+ mtx_lock(&sysmaps->lock);
+ if (*sysmaps->CMAP2)
+ panic("pmap_page_set_memattr: CMAP2 busy");
+ sched_pin();
+ *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) |
+ PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0);
+ invlcaddr(sysmaps->CADDR2);
+ sva = (vm_offset_t)sysmaps->CADDR2;
+ eva = sva + PAGE_SIZE;
+ } else
+ sva = eva = 0; /* gcc */
+ pmap_invalidate_cache_range(sva, eva);
+ if (sva != 0) {
+ *sysmaps->CMAP2 = 0;
+ sched_unpin();
+ mtx_unlock(&sysmaps->lock);
}
}
@@ -4537,9 +4599,7 @@ pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
*/
if (changed) {
pmap_invalidate_range(kernel_pmap, base, tmpva);
- /* If "Self Snoop" is supported, do nothing. */
- if (!(cpu_feature & CPUID_SS))
- pmap_invalidate_cache();
+ pmap_invalidate_cache_range(base, tmpva);
}
return (0);
}
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index e06a9a2..b6fd4b6 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -720,6 +720,39 @@ sf_buf_init(void *arg)
}
/*
+ * Invalidate the cache lines that may belong to the page, if
+ * (possibly old) mapping of the page by sf buffer exists. Returns
+ * TRUE when mapping was found and cache invalidated.
+ */
+boolean_t
+sf_buf_invalidate_cache(vm_page_t m)
+{
+ struct sf_head *hash_list;
+ struct sf_buf *sf;
+ boolean_t ret;
+
+ hash_list = &sf_buf_active[SF_BUF_HASH(m)];
+ ret = FALSE;
+ mtx_lock(&sf_buf_lock);
+ LIST_FOREACH(sf, hash_list, list_entry) {
+ if (sf->m == m) {
+ /*
+ * Use pmap_qenter to update the pte for
+ * existing mapping, in particular, the PAT
+ * settings are recalculated.
+ */
+ pmap_qenter(sf->kva, &m, 1);
+ pmap_invalidate_cache_range(sf->kva, sf->kva +
+ PAGE_SIZE);
+ ret = TRUE;
+ break;
+ }
+ }
+ mtx_unlock(&sf_buf_lock);
+ return (ret);
+}
+
+/*
* Get an sf_buf from the freelist. May block if none are available.
*/
struct sf_buf *
@@ -787,9 +820,10 @@ sf_buf_alloc(struct vm_page *m, int flags)
opte = *ptep;
#ifdef XEN
PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag
- | PG_RW | PG_V);
+ | PG_RW | PG_V | pmap_cache_bits(m->md.pat_mode, 0));
#else
- *ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V;
+ *ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V |
+ pmap_cache_bits(m->md.pat_mode, 0);
#endif
/*
diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h
index 6139a1f..147040b 100644
--- a/sys/i386/include/cpufunc.h
+++ b/sys/i386/include/cpufunc.h
@@ -90,6 +90,13 @@ bsrl(u_int mask)
}
static __inline void
+clflush(u_long addr)
+{
+
+ __asm __volatile("clflush %0" : : "m" (*(char *)addr));
+}
+
+static __inline void
disable_intr(void)
{
#ifdef XEN
@@ -138,6 +145,13 @@ cpu_mwait(int extensions, int hints)
__asm __volatile("mwait;" : :"a" (hints), "c" (extensions));
}
+static __inline void
+mfence(void)
+{
+
+ __asm __volatile("mfence" : : : "memory");
+}
+
#ifdef _KERNEL
#define HAVE_INLINE_FFS
diff --git a/sys/i386/include/md_var.h b/sys/i386/include/md_var.h
index aca90cd..e2968e9 100644
--- a/sys/i386/include/md_var.h
+++ b/sys/i386/include/md_var.h
@@ -52,6 +52,7 @@ extern u_int amd_feature2;
extern u_int amd_pminfo;
extern u_int via_feature_rng;
extern u_int via_feature_xcrypt;
+extern u_int cpu_clflush_line_size;
extern u_int cpu_fxsr;
extern u_int cpu_high;
extern u_int cpu_id;
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index 42c5d2e..c90f947 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -463,6 +463,7 @@ extern vm_offset_t virtual_end;
#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz))
void pmap_bootstrap(vm_paddr_t);
+int pmap_cache_bits(int mode, boolean_t is_pde);
int pmap_change_attr(vm_offset_t, vm_size_t, int);
void pmap_init_pat(void);
void pmap_kenter(vm_offset_t va, vm_paddr_t pa);
@@ -480,6 +481,7 @@ void pmap_invalidate_page(pmap_t, vm_offset_t);
void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
void pmap_invalidate_all(pmap_t);
void pmap_invalidate_cache(void);
+void pmap_invalidate_cache_range(vm_offset_t, vm_offset_t);
#endif /* _KERNEL */
diff --git a/sys/i386/include/sf_buf.h b/sys/i386/include/sf_buf.h
index 9076260..7bc1095 100644
--- a/sys/i386/include/sf_buf.h
+++ b/sys/i386/include/sf_buf.h
@@ -58,4 +58,6 @@ sf_buf_page(struct sf_buf *sf)
return (sf->m);
}
+boolean_t sf_buf_invalidate_cache(vm_page_t m);
+
#endif /* !_MACHINE_SF_BUF_H_ */
OpenPOWER on IntegriCloud