x86, amd: Avoid cache aliasing penalties on AMD family 15h

This patch provides performance tuning for the "Bulldozer" CPU. With its shared instruction cache there is a chance of generating an excessive number of cache cross-invalidates when running specific workloads on the cores of a compute module. This excessive amount of cross-invalidations can be observed if cache lines backed by shared physical memory alias in bits [14:12] of their virtual addresses, as those bits are used for the index generation. This patch addresses the issue by clearing all the bits in the [14:12] slice of the file mapping's virtual address at generation time, thus forcing those bits the same for all mappings of a single shared library across processes and, in doing so, avoids instruction cache aliases. It also adds the command line option "align_va_addr=(32|64|on|off)" with which virtual address alignment can be enabled for 32-bit or 64-bit x86 individually, or both, or be completely disabled. This change leaves virtual region address allocation on other families and/or vendors unaffected. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com> Link: http://lkml.kernel.org/r/1312550110-24160-2-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
author: Borislav Petkov <borislav.petkov@amd.com> 2011-08-05 15:15:08 +0200
committer: H. Peter Anvin <hpa@linux.intel.com> 2011-08-05 12:26:44 -0700
commit: dfb09f9b7ab03fd367740e541a5caf830ed56726 (patch)
tree: 8bd8fdbbf3fb67f7d0aed73a1e8e1c7034ed2d54 /arch/x86/kernel
parent: 13f9a3737c903ace57d8aaebe81a3bbaeb0aa0a2 (diff)
download: op-kernel-dev-dfb09f9b7ab03fd367740e541a5caf830ed56726.zip
op-kernel-dev-dfb09f9b7ab03fd367740e541a5caf830ed56726.tar.gz
2 files changed, 91 insertions, 3 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b13ed39..b0234bc 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -458,6 +458,19 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 					"with P0 frequency!\n");
 		}
 	}
+
+	if (c->x86 == 0x15) {
+		unsigned long upperbit;
+		u32 cpuid, assoc;
+
+		cpuid	 = cpuid_edx(0x80000005);
+		assoc	 = cpuid >> 16 & 0xff;
+		upperbit = ((cpuid >> 24) << 10) / assoc;
+
+		va_align.mask	  = (upperbit - 1) & PAGE_MASK;
+		va_align.flags    = ALIGN_VA_32 | ALIGN_VA_64;
+
+	}
 }
 
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index ff14a50..aaa8d09 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -18,6 +18,72 @@
 #include <asm/ia32.h>
 #include <asm/syscalls.h>
 
+struct __read_mostly va_alignment va_align = {
+	.flags = -1,
+};
+
+/*
+ * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
+ *
+ * @flags denotes the allocation direction - bottomup or topdown -
+ * or vDSO; see call sites below.
+ */
+unsigned long align_addr(unsigned long addr, struct file *filp,
+			 enum align_flags flags)
+{
+	unsigned long tmp_addr;
+
+	/* handle 32- and 64-bit case with a single conditional */
+	if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
+		return addr;
+
+	if (!(current->flags & PF_RANDOMIZE))
+		return addr;
+
+	if (!((flags & ALIGN_VDSO) || filp))
+		return addr;
+
+	tmp_addr = addr;
+
+	/*
+	 * We need an address which is <= than the original
+	 * one only when in topdown direction.
+	 */
+	if (!(flags & ALIGN_TOPDOWN))
+		tmp_addr += va_align.mask;
+
+	tmp_addr &= ~va_align.mask;
+
+	return tmp_addr;
+}
+
+static int __init control_va_addr_alignment(char *str)
+{
+	/* guard against enabling this on other CPU families */
+	if (va_align.flags < 0)
+		return 1;
+
+	if (*str == 0)
+		return 1;
+
+	if (*str == '=')
+		str++;
+
+	if (!strcmp(str, "32"))
+		va_align.flags = ALIGN_VA_32;
+	else if (!strcmp(str, "64"))
+		va_align.flags = ALIGN_VA_64;
+	else if (!strcmp(str, "off"))
+		va_align.flags = 0;
+	else if (!strcmp(str, "on"))
+		va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
+	else
+		return 0;
+
+	return 1;
+}
+__setup("align_va_addr", control_va_addr_alignment);
+
 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
 		unsigned long, prot, unsigned long, flags,
 		unsigned long, fd, unsigned long, off)
@@ -92,6 +158,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	start_addr = addr;
 
 full_search:
+
+	addr = align_addr(addr, filp, 0);
+
 	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
 		/* At this point:  (!vma || addr < vma->vm_end). */
 		if (end - len < addr) {
@@ -117,6 +186,7 @@ full_search:
 			mm->cached_hole_size = vma->vm_start - addr;
 
 		addr = vma->vm_end;
+		addr = align_addr(addr, filp, 0);
 	}
 }
 
@@ -161,10 +231,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 	/* make sure it can fit in the remaining address space */
 	if (addr > len) {
-		vma = find_vma(mm, addr-len);
-		if (!vma || addr <= vma->vm_start)
+		unsigned long tmp_addr = align_addr(addr - len, filp,
+						    ALIGN_TOPDOWN);
+
+		vma = find_vma(mm, tmp_addr);
+		if (!vma || tmp_addr + len <= vma->vm_start)
 			/* remember the address as a hint for next time */
-			return mm->free_area_cache = addr-len;
+			return mm->free_area_cache = tmp_addr;
 	}
 
 	if (mm->mmap_base < len)
@@ -173,6 +246,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	addr = mm->mmap_base-len;
 
 	do {
+		addr = align_addr(addr, filp, ALIGN_TOPDOWN);
+
 		/*
 		 * Lookup failure means no vma is above this address,
 		 * else if new region fits below vma->vm_start,
author	Borislav Petkov <borislav.petkov@amd.com>	2011-08-05 15:15:08 +0200
committer	H. Peter Anvin <hpa@linux.intel.com>	2011-08-05 12:26:44 -0700
commit	dfb09f9b7ab03fd367740e541a5caf830ed56726 (patch)
tree	8bd8fdbbf3fb67f7d0aed73a1e8e1c7034ed2d54 /arch/x86/kernel
parent	13f9a3737c903ace57d8aaebe81a3bbaeb0aa0a2 (diff)
download	op-kernel-dev-dfb09f9b7ab03fd367740e541a5caf830ed56726.zip op-kernel-dev-dfb09f9b7ab03fd367740e541a5caf830ed56726.tar.gz