From 049f3e84d34bfc37f25c91715f2e24a90fb8665e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 29 Apr 2013 16:06:10 +0100
Subject: ARM: 7705/1: use optimized do_div only for EABI

In OABI configurations, some uses of the do_div function
cause gcc to run out of registers. To work around that,
we can force the use of the out-of-line version for
configurations that build a OABI kernel.

Without this patch, building netx_defconfig results in:

net/core/pktgen.c: In function 'pktgen_if_show':
net/core/pktgen.c:682:2775: error: can't find a register in class 'GENERAL_REGS' while reloading 'asm'
net/core/pktgen.c:682:3153: error: can't find a register in class 'GENERAL_REGS' while reloading 'asm'
net/core/pktgen.c:682:2775: error: 'asm' operand has impossible constraints
net/core/pktgen.c:682:3153: error: 'asm' operand has impossible constraints

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/div64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h
index fe92ccf..191ada6 100644
--- a/arch/arm/include/asm/div64.h
+++ b/arch/arm/include/asm/div64.h
@@ -46,7 +46,7 @@
 	__rem;							\
 })
 
-#if __GNUC__ < 4
+#if __GNUC__ < 4 || !defined(CONFIG_AEABI)
 
 /*
  * gcc versions earlier than 4.0 are simply too problematic for the
-- 
cgit v1.1


From 9b97173e785a54c5df0aa23d1e1f680f61e36e43 Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Thu, 16 May 2013 19:40:22 +0100
Subject: ARM: 7728/1: mm: Use phys_addr_t properly for ioremap functions

Several of the ioremap functions use unsigned long in places
resulting in truncation if physical addresses greater than
4G are passed in. Change the types of the functions and the
callers accordingly.

Cc: Krzysztof Halasa <khc@pm.waw.pl>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/io.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 652b560..d070741 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -130,16 +130,16 @@ static inline u32 __raw_readl(const volatile void __iomem *addr)
  */
 extern void __iomem *__arm_ioremap_pfn_caller(unsigned long, unsigned long,
 	size_t, unsigned int, void *);
-extern void __iomem *__arm_ioremap_caller(unsigned long, size_t, unsigned int,
+extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int,
 	void *);
 
 extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int);
-extern void __iomem *__arm_ioremap(unsigned long, size_t, unsigned int);
-extern void __iomem *__arm_ioremap_exec(unsigned long, size_t, bool cached);
+extern void __iomem *__arm_ioremap(phys_addr_t, size_t, unsigned int);
+extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached);
 extern void __iounmap(volatile void __iomem *addr);
 extern void __arm_iounmap(volatile void __iomem *addr);
 
-extern void __iomem * (*arch_ioremap_caller)(unsigned long, size_t,
+extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t,
 	unsigned int, void *);
 extern void (*arch_iounmap)(volatile void __iomem *);
 
-- 
cgit v1.1


From 926edcc747e2efb3c9add7ed4dbc4e7a3a959d02 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Sun, 22 Jul 2012 13:40:38 -0400
Subject: ARM: LPAE: use signed arithmetic for mask definitions

This patch applies to PAGE_MASK, PMD_MASK, and PGDIR_MASK, where forcing
unsigned long math truncates the mask at the 32-bits.  This clearly does bad
things on PAE systems.

This patch fixes this problem by defining these masks as signed quantities.
We then rely on sign extension to do the right thing.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/page.h           | 2 +-
 arch/arm/include/asm/pgtable-3level.h | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 812a494..6363f3d 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -13,7 +13,7 @@
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT		12
 #define PAGE_SIZE		(_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK		(~(PAGE_SIZE-1))
+#define PAGE_MASK		(~((1 << PAGE_SHIFT) - 1))
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 86b8fe3..5b85b21 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -48,16 +48,16 @@
 #define PMD_SHIFT		21
 
 #define PMD_SIZE		(1UL << PMD_SHIFT)
-#define PMD_MASK		(~(PMD_SIZE-1))
+#define PMD_MASK		(~((1 << PMD_SHIFT) - 1))
 #define PGDIR_SIZE		(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK		(~(PGDIR_SIZE-1))
+#define PGDIR_MASK		(~((1 << PGDIR_SHIFT) - 1))
 
 /*
  * section address mask and size definitions.
  */
 #define SECTION_SHIFT		21
 #define SECTION_SIZE		(1UL << SECTION_SHIFT)
-#define SECTION_MASK		(~(SECTION_SIZE-1))
+#define SECTION_MASK		(~((1 << SECTION_SHIFT) - 1))
 
 #define USER_PTRS_PER_PGD	(PAGE_OFFSET / PGDIR_SIZE)
 
-- 
cgit v1.1


From 13f659b0f363114282679d06094337c5efa12fa8 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Mon, 16 Jul 2012 15:37:06 -0400
Subject: ARM: LPAE: use phys_addr_t in switch_mm()

This patch modifies the switch_mm() processor functions to use phys_addr_t.
On LPAE systems, we now honor the upper 32-bits of the physical address that
is being passed in, and program these into TTBR as expected.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
[will: fixed up conflict in 3-level switch_mm with big-endian changes]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/proc-fns.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index f3628fb..75b5f14 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -60,7 +60,7 @@ extern struct processor {
 	/*
 	 * Set the page table
 	 */
-	void (*switch_mm)(unsigned long pgd_phys, struct mm_struct *mm);
+	void (*switch_mm)(phys_addr_t pgd_phys, struct mm_struct *mm);
 	/*
 	 * Set a possibly extended PTE.  Non-extended PTEs should
 	 * ignore 'ext'.
@@ -82,7 +82,7 @@ extern void cpu_proc_init(void);
 extern void cpu_proc_fin(void);
 extern int cpu_do_idle(void);
 extern void cpu_dcache_clean_area(void *, int);
-extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
+extern void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
 #ifdef CONFIG_ARM_LPAE
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte);
 #else
-- 
cgit v1.1


From 1fc84ae84b5153e32a4b6ace507f9663e10b0cb2 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Mon, 16 Jul 2012 17:20:17 -0400
Subject: ARM: LPAE: use 64-bit accessors for TTBR registers

This patch adds TTBR accessor macros, and modifies cpu_get_pgd() and
the LPAE version of cpu_set_reserved_ttbr0() to use these instead.

In the process, we also fix these functions to correctly handle cases
where the physical address lies beyond the 4G limit of 32-bit addressing.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/proc-fns.h | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 75b5f14..1c3cf94 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -116,13 +116,25 @@ extern void cpu_resume(void);
 #define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm)
 
 #ifdef CONFIG_ARM_LPAE
+
+#define cpu_get_ttbr(nr)					\
+	({							\
+		u64 ttbr;					\
+		__asm__("mrrc	p15, " #nr ", %Q0, %R0, c2"	\
+			: "=r" (ttbr));				\
+		ttbr;						\
+	})
+
+#define cpu_set_ttbr(nr, val)					\
+	do {							\
+		u64 ttbr = val;					\
+		__asm__("mcrr	p15, " #nr ", %Q0, %R0, c2"	\
+			: : "r" (ttbr));			\
+	} while (0)
+
 #define cpu_get_pgd()	\
 	({						\
-		unsigned long pg, pg2;			\
-		__asm__("mrrc	p15, 0, %0, %1, c2"	\
-			: "=r" (pg), "=r" (pg2)		\
-			:				\
-			: "cc");			\
+		u64 pg = cpu_get_ttbr(0);		\
 		pg &= ~(PTRS_PER_PGD*sizeof(pgd_t)-1);	\
 		(pgd_t *)phys_to_virt(pg);		\
 	})
-- 
cgit v1.1


From a7fbc0d62a4d46e642af889e7288fede5078bc46 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Sat, 21 Jul 2012 19:47:52 -0400
Subject: ARM: LPAE: factor out T1SZ and TTBR1 computations

This patch moves the TTBR1 offset calculation and the T1SZ calculation out
of the TTB setup assembly code.  This should not affect functionality in
any way, but improves code readability as well as readability of subsequent
patches in this series.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pgtable-3level-hwdef.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 18f5cef..c6c6e6d 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -79,4 +79,24 @@
 #define PHYS_MASK_SHIFT		(40)
 #define PHYS_MASK		((1ULL << PHYS_MASK_SHIFT) - 1)
 
+/*
+ * TTBR0/TTBR1 split (PAGE_OFFSET):
+ *   0x40000000: T0SZ = 2, T1SZ = 0 (not used)
+ *   0x80000000: T0SZ = 0, T1SZ = 1
+ *   0xc0000000: T0SZ = 0, T1SZ = 2
+ *
+ * Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise
+ * booting secondary CPUs would end up using TTBR1 for the identity
+ * mapping set up in TTBR0.
+ */
+#if defined CONFIG_VMSPLIT_2G
+#define TTBR1_OFFSET	16			/* skip two L1 entries */
+#elif defined CONFIG_VMSPLIT_3G
+#define TTBR1_OFFSET	(4096 * (1 + 3))	/* only L2, skip pgd + 3*pmd */
+#else
+#define TTBR1_OFFSET	0
+#endif
+
+#define TTBR1_SIZE	(((PAGE_OFFSET >> 30) - 1) << 16)
+
 #endif
-- 
cgit v1.1


From 4756dcbfd37819a8359d3c69a22be2ee41666d0f Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Sat, 21 Jul 2012 15:55:04 -0400
Subject: ARM: LPAE: accomodate >32-bit addresses for page table base

This patch redefines the early boot time use of the R4 register to steal a few
low order bits (ARCH_PGD_SHIFT bits) on LPAE systems.  This allows for up to
38-bit physical addresses.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/memory.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 57870ab..e506088 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -18,6 +18,8 @@
 #include <linux/types.h>
 #include <linux/sizes.h>
 
+#include <asm/cache.h>
+
 #ifdef CONFIG_NEED_MACH_MEMORY_H
 #include <mach/memory.h>
 #endif
@@ -141,6 +143,20 @@
 #define page_to_phys(page)	(__pfn_to_phys(page_to_pfn(page)))
 #define phys_to_page(phys)	(pfn_to_page(__phys_to_pfn(phys)))
 
+/*
+ * Minimum guaranted alignment in pgd_alloc().  The page table pointers passed
+ * around in head.S and proc-*.S are shifted by this amount, in order to
+ * leave spare high bits for systems with physical address extension.  This
+ * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but
+ * gives us about 38-bits or so.
+ */
+#ifdef CONFIG_ARM_LPAE
+#define ARCH_PGD_SHIFT		L1_CACHE_SHIFT
+#else
+#define ARCH_PGD_SHIFT		0
+#endif
+#define ARCH_PGD_MASK		((1 << ARCH_PGD_SHIFT) - 1)
+
 #ifndef __ASSEMBLY__
 
 /*
-- 
cgit v1.1


From 5b20c5b2f014ecc0a6310988af69cd7ede9e7c67 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Wed, 12 Sep 2012 10:19:05 -0400
Subject: ARM: fix type of PHYS_PFN_OFFSET to unsigned long

On LPAE machines, PHYS_OFFSET evaluates to a phys_addr_t and this type is
inherited by the PHYS_PFN_OFFSET definition as well.  Consequently, the kernel
build emits warnings of the form:

init/main.c: In function 'start_kernel':
init/main.c:588:7: warning: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'phys_addr_t' [-Wformat]

This patch fixes this warning by pinning down the PFN type to unsigned long.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index e506088..584786f 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -223,7 +223,7 @@ static inline unsigned long __phys_to_virt(unsigned long x)
  * direct-mapped view.  We assume this is the first page
  * of RAM in the mem_map as well.
  */
-#define PHYS_PFN_OFFSET	(PHYS_OFFSET >> PAGE_SHIFT)
+#define PHYS_PFN_OFFSET	((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
 
 /*
  * These are *only* valid on the kernel direct mapped RAM memory.
-- 
cgit v1.1


From e38a517578d6c0f764b0d0f6e26dcdf9f70c69d7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 2 May 2013 13:52:01 +0100
Subject: ARM: lpae: fix definition of PTE_HWTABLE_PTRS

For 2-level page tables, PTE_HWTABLE_PTRS describes the offset between
Linux PTEs and hardware PTEs. On LPAE, there is no distinction (since
we have 64-bit descriptors with plenty of space) so PTE_HWTABLE_PTRS
should be 0. Unfortunately, it is wrongly defined as PTRS_PER_PTE,
meaning that current pte table flushing is off by a page. Luckily,
all current LPAE implementations are SMP, so the hardware walker can
snoop L1.

This patch fixes the broken definition.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pgtable-3level.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 5b85b21..d03c589 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -33,7 +33,7 @@
 #define PTRS_PER_PMD		512
 #define PTRS_PER_PGD		4
 
-#define PTE_HWTABLE_PTRS	(PTRS_PER_PTE)
+#define PTE_HWTABLE_PTRS	(0)
 #define PTE_HWTABLE_OFF		(0)
 #define PTE_HWTABLE_SIZE	(PTRS_PER_PTE * sizeof(u64))
 
-- 
cgit v1.1


From a469abd0f868c902b75532579bf87553dcf1b360 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 8 Apr 2013 17:13:12 +0100
Subject: ARM: elf: add new hwcap for identifying atomic ldrd/strd instructions

CPUs implementing LPAE have atomic ldrd/strd instructions, meaning that
userspace software can avoid having to use the exclusive variants of
these instructions if they wish.

This patch advertises the atomicity of these instructions via the
hwcaps, so userspace can detect this CPU feature.

Reported-by: Vladimir Danushevsky <vladimir.danushevsky@oracle.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/uapi/asm/hwcap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h
index 3688fd1..6d34d08 100644
--- a/arch/arm/include/uapi/asm/hwcap.h
+++ b/arch/arm/include/uapi/asm/hwcap.h
@@ -25,6 +25,6 @@
 #define HWCAP_IDIVT	(1 << 18)
 #define HWCAP_VFPD32	(1 << 19)	/* set if VFP has 32 regs (not 16) */
 #define HWCAP_IDIV	(HWCAP_IDIVA | HWCAP_IDIVT)
-
+#define HWCAP_LPAE	(1 << 20)
 
 #endif /* _UAPI__ASMARM_HWCAP_H */
-- 
cgit v1.1


From dde1b65110353517816bcbc58539463396202244 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@linaro.org>
Date: Fri, 17 May 2013 12:32:55 +0100
Subject: ARM: mm: correct pte_same behaviour for LPAE.

For 3 levels of paging the PTE_EXT_NG bit will be set for user
address ptes that are written to a page table but not for ptes
created with mk_pte.

This can cause some comparison tests made by pte_same to fail
spuriously and lead to other problems.

To correct this behaviour, we mask off PTE_EXT_NG for any pte that
is present before running the comparison.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pgtable-3level.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 86b8fe3..70f041c 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -166,6 +166,23 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 		clean_pmd_entry(pmdp);	\
 	} while (0)
 
+/*
+ * For 3 levels of paging the PTE_EXT_NG bit will be set for user address ptes
+ * that are written to a page table but not for ptes created with mk_pte.
+ *
+ * In hugetlb_no_page, a new huge pte (new_pte) is generated and passed to
+ * hugetlb_cow, where it is compared with an entry in a page table.
+ * This comparison test fails erroneously leading ultimately to a memory leak.
+ *
+ * To correct this behaviour, we mask off PTE_EXT_NG for any pte that is
+ * present before running the comparison.
+ */
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(pte_a,pte_b)	((pte_present(pte_a) ? pte_val(pte_a) & ~PTE_EXT_NG	\
+					: pte_val(pte_a))				\
+				== (pte_present(pte_b) ? pte_val(pte_b) & ~PTE_EXT_NG	\
+					: pte_val(pte_b)))
+
 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,__pte(pte_val(pte)|(ext)))
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.1


From 1355e2a6eb88f04d76125c057dc5fca64d4b6a9e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 25 Jul 2012 14:32:38 +0100
Subject: ARM: mm: HugeTLB support for LPAE systems.

This patch adds support for hugetlbfs based on the x86 implementation.
It allows mapping of 2MB sections (see Documentation/vm/hugetlbpage.txt
for usage). The 64K pages configuration is not supported (section size
is 512MB in this case).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[steve.capper@linaro.org: symbolic constants replace numbers in places.
Split up into multiple files, to simplify future non-LPAE support,
removed huge_pmd_share code, as this is very rarely executed,
Added PROT_NONE support].
Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/hugetlb-3level.h       | 71 ++++++++++++++++++++++++
 arch/arm/include/asm/hugetlb.h              | 84 +++++++++++++++++++++++++++++
 arch/arm/include/asm/pgtable-3level-hwdef.h |  2 +
 arch/arm/include/asm/pgtable-3level.h       | 11 ++++
 4 files changed, 168 insertions(+)
 create mode 100644 arch/arm/include/asm/hugetlb-3level.h
 create mode 100644 arch/arm/include/asm/hugetlb.h

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/hugetlb-3level.h b/arch/arm/include/asm/hugetlb-3level.h
new file mode 100644
index 0000000..d4014fb
--- /dev/null
+++ b/arch/arm/include/asm/hugetlb-3level.h
@@ -0,0 +1,71 @@
+/*
+ * arch/arm/include/asm/hugetlb-3level.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARM_HUGETLB_3LEVEL_H
+#define _ASM_ARM_HUGETLB_3LEVEL_H
+
+
+/*
+ * If our huge pte is non-zero then mark the valid bit.
+ * This allows pte_present(huge_ptep_get(ptep)) to return true for non-zero
+ * ptes.
+ * (The valid bit is automatically cleared by set_pte_at for PROT_NONE ptes).
+ */
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	pte_t retval = *ptep;
+	if (pte_val(retval))
+		pte_val(retval) |= L_PTE_VALID;
+	return retval;
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+	ptep_clear_flush(vma, addr, ptep);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+#endif /* _ASM_ARM_HUGETLB_3LEVEL_H */
diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h
new file mode 100644
index 0000000..1f1b1cd
--- /dev/null
+++ b/arch/arm/include/asm/hugetlb.h
@@ -0,0 +1,84 @@
+/*
+ * arch/arm/include/asm/hugetlb.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARM_HUGETLB_H
+#define _ASM_ARM_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
+
+#include <asm/hugetlb-3level.h>
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr, unsigned long len)
+{
+	return 0;
+}
+
+static inline int prepare_hugepage_range(struct file *file,
+					 unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	return 0;
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+	clear_bit(PG_dcache_clean, &page->flags);
+}
+
+#endif /* _ASM_ARM_HUGETLB_H */
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 18f5cef..42df407 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -30,6 +30,7 @@
 #define PMD_TYPE_FAULT		(_AT(pmdval_t, 0) << 0)
 #define PMD_TYPE_TABLE		(_AT(pmdval_t, 3) << 0)
 #define PMD_TYPE_SECT		(_AT(pmdval_t, 1) << 0)
+#define PMD_TABLE_BIT		(_AT(pmdval_t, 1) << 1)
 #define PMD_BIT4		(_AT(pmdval_t, 0))
 #define PMD_DOMAIN(x)		(_AT(pmdval_t, 0))
 #define PMD_APTABLE_SHIFT	(61)
@@ -66,6 +67,7 @@
 #define PTE_TYPE_MASK		(_AT(pteval_t, 3) << 0)
 #define PTE_TYPE_FAULT		(_AT(pteval_t, 0) << 0)
 #define PTE_TYPE_PAGE		(_AT(pteval_t, 3) << 0)
+#define PTE_TABLE_BIT		(_AT(pteval_t, 1) << 1)
 #define PTE_BUFFERABLE		(_AT(pteval_t, 1) << 2)		/* AttrIndx[0] */
 #define PTE_CACHEABLE		(_AT(pteval_t, 1) << 3)		/* AttrIndx[1] */
 #define PTE_EXT_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 70f041c..d1bcd82 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -62,6 +62,14 @@
 #define USER_PTRS_PER_PGD	(PAGE_OFFSET / PGDIR_SIZE)
 
 /*
+ * Hugetlb definitions.
+ */
+#define HPAGE_SHIFT		PMD_SHIFT
+#define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
+/*
  * "Linux" PTE definitions for LPAE.
  *
  * These bits overlap with the hardware bits but the naming is preserved for
@@ -185,6 +193,9 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 
 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,__pte(pte_val(pte)|(ext)))
 
+#define pte_huge(pte)		(pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
+#define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_PGTABLE_3LEVEL_H */
-- 
cgit v1.1


From 8d962507007357d6fbbcbdd1647faa389a9aed6d Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 25 Jul 2012 14:39:26 +0100
Subject: ARM: mm: Transparent huge page support for LPAE systems.

The patch adds support for THP (transparent huge pages) to LPAE
systems. When this feature is enabled, the kernel tries to map
anonymous pages as 2MB sections where possible.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[steve.capper@linaro.org: symbolic constants used, value of
PMD_SECT_SPLITTING adjusted, tlbflush.h included in pgtable.h,
added PROT_NONE support.]
Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pgtable-3level-hwdef.h |  2 +
 arch/arm/include/asm/pgtable-3level.h       | 60 +++++++++++++++++++++++++++++
 arch/arm/include/asm/pgtable.h              |  3 ++
 arch/arm/include/asm/tlb.h                  |  6 +++
 arch/arm/include/asm/tlbflush.h             |  2 +
 5 files changed, 73 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 42df407..f088c86 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -42,6 +42,8 @@
  */
 #define PMD_SECT_BUFFERABLE	(_AT(pmdval_t, 1) << 2)
 #define PMD_SECT_CACHEABLE	(_AT(pmdval_t, 1) << 3)
+#define PMD_SECT_USER		(_AT(pmdval_t, 1) << 6)		/* AP[1] */
+#define PMD_SECT_RDONLY		(_AT(pmdval_t, 1) << 7)		/* AP[2] */
 #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
 #define PMD_SECT_AF		(_AT(pmdval_t, 1) << 10)
 #define PMD_SECT_nG		(_AT(pmdval_t, 1) << 11)
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index d1bcd82..54733e5 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -87,6 +87,11 @@
 #define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 56)	/* unused */
 #define L_PTE_NONE		(_AT(pteval_t, 1) << 57)	/* PROT_NONE */
 
+#define PMD_SECT_VALID		(_AT(pmdval_t, 1) << 0)
+#define PMD_SECT_DIRTY		(_AT(pmdval_t, 1) << 55)
+#define PMD_SECT_SPLITTING	(_AT(pmdval_t, 1) << 56)
+#define PMD_SECT_NONE		(_AT(pmdval_t, 1) << 57)
+
 /*
  * To be used in assembly code with the upper page attributes.
  */
@@ -196,6 +201,61 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 #define pte_huge(pte)		(pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
 #define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
 
+#define pmd_young(pmd)		(pmd_val(pmd) & PMD_SECT_AF)
+
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write(pmd)		(!(pmd_val(pmd) & PMD_SECT_RDONLY))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
+#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#endif
+
+#define PMD_BIT_FUNC(fn,op) \
+static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
+
+PMD_BIT_FUNC(wrprotect,	|= PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkold,	&= ~PMD_SECT_AF);
+PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
+PMD_BIT_FUNC(mkwrite,   &= ~PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkdirty,   |= PMD_SECT_DIRTY);
+PMD_BIT_FUNC(mkyoung,   |= PMD_SECT_AF);
+
+#define pmd_mkhuge(pmd)		(__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
+
+#define pmd_pfn(pmd)		(((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+
+/* represent a notpresent pmd by zero, this is used by pmdp_invalidate */
+#define pmd_mknotpresent(pmd)	(__pmd(0))
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY |
+				PMD_SECT_VALID | PMD_SECT_NONE;
+	pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
+	return pmd;
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t pmd)
+{
+	BUG_ON(addr >= TASK_SIZE);
+
+	/* create a faulting entry if PROT_NONE protected */
+	if (pmd_val(pmd) & PMD_SECT_NONE)
+		pmd_val(pmd) &= ~PMD_SECT_VALID;
+
+	*pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG);
+	flush_pmd_entry(pmdp);
+}
+
+static inline int has_transparent_hugepage(void)
+{
+	return 1;
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_PGTABLE_3LEVEL_H */
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 9bcd262..eaedce7 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -24,6 +24,9 @@
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
 
+
+#include <asm/tlbflush.h>
+
 #ifdef CONFIG_ARM_LPAE
 #include <asm/pgtable-3level.h>
 #else
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 99a1951..bdc62da 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -223,6 +223,12 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 #endif
 }
 
+static inline void
+tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
+{
+	tlb_add_flush(tlb, addr);
+}
+
 #define pte_free_tlb(tlb, ptep, addr)	__pte_free_tlb(tlb, ptep, addr)
 #define pmd_free_tlb(tlb, pmdp, addr)	__pmd_free_tlb(tlb, pmdp, addr)
 #define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index a3625d1..c374592 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -535,6 +535,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 }
 #endif
 
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+
 #endif
 
 #endif /* CONFIG_MMU */
-- 
cgit v1.1


From 0d651e4e65e96989f72236bf83bd4c6e55eb6ce4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 30 Jan 2013 17:51:26 +0000
Subject: clocksource: arch_timer: use virtual counters

Switching between reading the virtual or physical counters is
problematic, as some core code wants a view of time before we're fully
set up. Using a function pointer and switching the source after the
first read can make time appear to go backwards, and having a check in
the read function is an unfortunate block on what we want to be a fast
path.

Instead, this patch makes us always use the virtual counters. If we're a
guest, or don't have hyp mode, we'll use the virtual timers, and as such
don't care about CNTVOFF as long as it doesn't change in such a way as
to make time appear to travel backwards. As the guest will use the
virtual timers, a (potential) KVM host must use the physical timers
(which can wake up the host even if they fire while a guest is
executing), and hence a host must have CNTVOFF set to zero so as to have
a consistent view of time between the physical timers and virtual
counters.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Rob Herring <rob.herring@calxeda.com>
---
 arch/arm/include/asm/arch_timer.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 7c1bfc0..accefe0 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -80,15 +80,6 @@ static inline u32 arch_timer_get_cntfrq(void)
 	return val;
 }
 
-static inline u64 arch_counter_get_cntpct(void)
-{
-	u64 cval;
-
-	isb();
-	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
-	return cval;
-}
-
 static inline u64 arch_counter_get_cntvct(void)
 {
 	u64 cval;
-- 
cgit v1.1


From 5c709e699881afd1cf9244c719eb063c3476a405 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 28 Feb 2012 12:56:06 +0000
Subject: ARM: nommu: define dummy TLB operations for nommu configurations

nommu platforms do not perform address translation and therefore clearly
don't have TLBs. However, some SMP code assumes the presence of the TLB
flushing routines and will therefore fail to compile for a nommu system.

This patch defines dummy local_* TLB operations and #defines
tlb_ops_need_broadcast() as 0, therefore causing the usual ARM SMP TLB
operations to call the local variants instead.

Signed-off-by: Will Deacon <will.deacon@arm.com>
CC: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
CC: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/include/asm/smp_plat.h |  4 ++++
 arch/arm/include/asm/tlbflush.h | 23 ++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index aaa61b6..1c7b6f8 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -26,6 +26,9 @@ static inline bool is_smp(void)
 }
 
 /* all SMP configurations have the extended CPUID registers */
+#ifndef CONFIG_MMU
+#define tlb_ops_need_broadcast()	0
+#else
 static inline int tlb_ops_need_broadcast(void)
 {
 	if (!is_smp())
@@ -33,6 +36,7 @@ static inline int tlb_ops_need_broadcast(void)
 
 	return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2;
 }
+#endif
 
 #if !defined(CONFIG_SMP) || __LINUX_ARM_ARCH__ >= 7
 #define cache_ops_need_broadcast()	0
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index a3625d1..cc8517e 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -537,6 +537,27 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 
 #endif
 
-#endif /* CONFIG_MMU */
+#elif defined(CONFIG_SMP)	/* !CONFIG_MMU */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/mm_types.h>
+
+static inline void local_flush_tlb_all(void)									{ }
+static inline void local_flush_tlb_mm(struct mm_struct *mm)							{ }
+static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)			{ }
+static inline void local_flush_tlb_kernel_page(unsigned long kaddr)						{ }
+static inline void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)	{ }
+static inline void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)				{ }
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr);
+extern void flush_tlb_kernel_page(unsigned long kaddr);
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+#endif	/* __ASSEMBLY__ */
+
+#endif
 
 #endif
-- 
cgit v1.1


From 02ed1c7bba57b66c9a2f3146c935af12a93f2d76 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 28 Feb 2012 14:26:42 +0000
Subject: ARM: nommu: provide dummy cpu_switch_mm implementation

cpu_switch_mm is a logical nop on nommu systems, so define it as such
when !CONFIG_MMU.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/proc-fns.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index f3628fb..a6c99fe 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -137,6 +137,10 @@ extern void cpu_resume(void);
 	})
 #endif
 
+#else	/*!CONFIG_MMU */
+
+#define cpu_switch_mm(pgd,mm)	{ }
+
 #endif
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.1


From 8d655d835bc5c0cb7d485d147ba96249e356a697 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Wed, 24 Apr 2013 11:56:09 +0100
Subject: ARM: nommu: add stub local_flush_bp_all() for !CONFIG_MMUU

Since the merging of Will's tlb-ops branch, specifically 89c7e4b8bbb3
(ARM: 7661/1: mm: perform explicit branch predictor maintenance when required),
building SMP without CONFIG_MMU has been broken.

The local_flush_bp_all function is only called for operations related to
changing the kernel's view of memory and ASID rollover - both of which are
irrelevant to an !MMU kernel.

This patch adds a stub local_flush_bp_all() function to the other tlb
maintenance stubs and restores the ability to build an SMP !MMU kernel.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/tlbflush.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index cc8517e..ded7c16 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -549,6 +549,7 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned lon
 static inline void local_flush_tlb_kernel_page(unsigned long kaddr)						{ }
 static inline void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)	{ }
 static inline void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)				{ }
+static inline void local_flush_bp_all(void)									{ }
 
 extern void flush_tlb_all(void);
 extern void flush_tlb_mm(struct mm_struct *mm);
@@ -556,6 +557,7 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr);
 extern void flush_tlb_kernel_page(unsigned long kaddr);
 extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_bp_all(void);
 #endif	/* __ASSEMBLY__ */
 
 #endif
-- 
cgit v1.1


From ed18bdc875328921114139e17ade1d2569e82c85 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 30 Aug 2012 13:46:44 +0100
Subject: ARM: vexpress: Add Cortex-R Series UART, selectable via DEBUG_LL

The Cortex-R series processors on Versatile Express have a different memory
map to the RS1 and CA9X4 tiles. Most of the platform difference can be
expressed in device-trees, but the UART definitions for LL_DEBUG cannot.

This patch defines the UART location for R-Series processors on
versatile-express, allowing low-level debug and output from the decompressor.
These definitions are selectable via Kconfig

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
CC: Pawel Moll <pawel.moll@arm.com>
---
 arch/arm/include/debug/vexpress.S | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/debug/vexpress.S b/arch/arm/include/debug/vexpress.S
index dc8e882..acafb22 100644
--- a/arch/arm/include/debug/vexpress.S
+++ b/arch/arm/include/debug/vexpress.S
@@ -16,6 +16,8 @@
 #define DEBUG_LL_PHYS_BASE_RS1		0x1c000000
 #define DEBUG_LL_UART_OFFSET_RS1	0x00090000
 
+#define DEBUG_LL_UART_PHYS_CRX		0xb0090000
+
 #define DEBUG_LL_VIRT_BASE		0xf8000000
 
 #if defined(CONFIG_DEBUG_VEXPRESS_UART0_DETECT)
@@ -67,6 +69,14 @@
 
 #include <asm/hardware/debug-pl01x.S>
 
+#elif defined(CONFIG_DEBUG_VEXPRESS_UART0_CRX)
+
+		.macro	addruart,rp,tmp,tmp2
+		ldr	\rp, =DEBUG_LL_UART_PHYS_CRX
+		.endm
+
+#include <asm/hardware/debug-pl01x.S>
+
 #else /* CONFIG_DEBUG_LL_UART_NONE */
 
 		.macro	addruart, rp, rv, tmp
-- 
cgit v1.1


From aca7e5920c8e80a2a49c1e37664675d78b23398b Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 21 Feb 2013 15:21:34 +0000
Subject: ARM: mpu: add PMSA related registers and bitfields to existing
 headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds the following definitions relevant to the PMSA:

Add SCTLR bit 17, (CR_BR - Background Region bit) to the list of CR_*
bitfields. This bit determines whether to use the architecturally defined
memory map

Add the MPUIR to the available registers when using read_cpuid macro. The
MPUIR is the MPU type register.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC:"Uwe Kleine-König" <u.kleine-koenig@pengutronix.de>
---
 arch/arm/include/asm/cp15.h    | 5 +++++
 arch/arm/include/asm/cputype.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index 1f3262e..a524a23 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -23,6 +23,11 @@
 #define CR_RR	(1 << 14)	/* Round Robin cache replacement	*/
 #define CR_L4	(1 << 15)	/* LDR pc can set T bit			*/
 #define CR_DT	(1 << 16)
+#ifdef CONFIG_MMU
+#define CR_HA	(1 << 17)	/* Hardware management of Access Flag   */
+#else
+#define CR_BR	(1 << 17)	/* MPU Background region enable (PMSA)  */
+#endif
 #define CR_IT	(1 << 18)
 #define CR_ST	(1 << 19)
 #define CR_FI	(1 << 21)	/* Fast interrupt (lower latency mode)	*/
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index ec635ff..3b704df 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -8,6 +8,7 @@
 #define CPUID_CACHETYPE	1
 #define CPUID_TCM	2
 #define CPUID_TLBTYPE	3
+#define CPUID_MPUIR	4
 #define CPUID_MPIDR	5
 
 #ifdef CONFIG_CPU_V7M
-- 
cgit v1.1


From a2b45b0da8bbb98cb2f062cf16c6fdebab4243a1 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 21 Feb 2013 17:49:24 +0000
Subject: ARM: mpu: add header for MPU register layouts and region data

This commit adds definitions relevant to the ARM v7 PMSA compliant MPU.

The register layouts and region configuration data is made accessible to asm
as well as C-code so that it can be used in early bring-up of the MPU.

The mpu region information structs assume that the properties for the I/D side
are the same, though the implementation could be trivially extended for future
platforms where this is no-longer true.

The MPU_*_REGION defines are used for the basic, static MPU region setup.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/mpu.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 arch/arm/include/asm/mpu.h

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h
new file mode 100644
index 0000000..bd48c0c
--- /dev/null
+++ b/arch/arm/include/asm/mpu.h
@@ -0,0 +1,72 @@
+#ifndef __ARM_MPU_H
+#define __ARM_MPU_H
+
+#ifdef CONFIG_ARM_MPU
+
+/* MPUIR layout */
+#define MPUIR_nU		1
+#define MPUIR_DREGION		8
+#define MPUIR_IREGION		16
+#define MPUIR_DREGION_SZMASK	(0xFF << MPUIR_DREGION)
+#define MPUIR_IREGION_SZMASK	(0xFF << MPUIR_IREGION)
+
+/* ID_MMFR0 data relevant to MPU */
+#define MMFR0_PMSA		(0xF << 4)
+#define MMFR0_PMSAv7		(3 << 4)
+
+/* MPU D/I Size Register fields */
+#define MPU_RSR_SZ		1
+#define MPU_RSR_EN		0
+
+/* The D/I RSR value for an enabled region spanning the whole of memory */
+#define MPU_RSR_ALL_MEM		63
+
+/* Individual bits in the DR/IR ACR */
+#define MPU_ACR_XN		(1 << 12)
+#define MPU_ACR_SHARED		(1 << 2)
+
+/* C, B and TEX[2:0] bits only have semantic meanings when grouped */
+#define MPU_RGN_CACHEABLE	0xB
+#define MPU_RGN_SHARED_CACHEABLE (MPU_RGN_CACHEABLE | MPU_ACR_SHARED)
+#define MPU_RGN_STRONGLY_ORDERED 0
+
+/* Main region should only be shared for SMP */
+#ifdef CONFIG_SMP
+#define MPU_RGN_NORMAL		(MPU_RGN_CACHEABLE | MPU_ACR_SHARED)
+#else
+#define MPU_RGN_NORMAL		MPU_RGN_CACHEABLE
+#endif
+
+/* Access permission bits of ACR (only define those that we use)*/
+#define MPU_AP_PL1RW_PL0RW	(0x3 << 8)
+#define MPU_AP_PL1RW_PL0R0	(0x2 << 8)
+#define MPU_AP_PL1RW_PL0NA	(0x1 << 8)
+
+/* For minimal static MPU region configurations */
+#define MPU_PROBE_REGION	0
+#define MPU_BG_REGION		1
+#define MPU_RAM_REGION		2
+
+/* Maximum number of regions Linux is interested in */
+#define MPU_MAX_REGIONS		16
+
+#ifndef __ASSEMBLY__
+
+struct mpu_rgn {
+	/* Assume same attributes for d/i-side  */
+	u32 drbar;
+	u32 drsr;
+	u32 dracr;
+};
+
+struct mpu_rgn_info {
+	u32 mpuir;
+	struct mpu_rgn rgns[MPU_MAX_REGIONS];
+};
+extern struct mpu_rgn_info mpu_rgn_info;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_ARM_MPU */
+
+#endif
-- 
cgit v1.1


From 67c9845beab16a0c97b9c07f72a4b36b7175bb86 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Fri, 22 Feb 2013 17:48:56 +0000
Subject: ARM: mpu: add early bring-up code for the ARMv7 PMSA-compliant MPU

This patch adds initial support for using the MPU, which is necessary for
SMP operation on PMSAv7 processors because it is the only way to ensure
memory is shared. This is an initial patch and full SMP support is added
later in this series.

The setup of the MPU is performed in a way analagous to that for the MMU:
Very early initialisation before the C environment is brought up, followed
by a sanity check and more complete initialisation in C.

This patch provides the simplest possible memory region configuration:
MPU_PROBE_REGION: Reserved for probing MPU details, not enabled
MPU_BG_REGION: A 'background' region that specifies all memory strongly ordered
MPU_RAM_REGION: A single shared, cacheable, normal region for the valid RAM.

In this early initialisation code we simply map the whole of the address
space with the BG_REGION and (at least) the kernel with the RAM_REGION. The
MPU has region alignment constraints that require us to round past the end
of the kernel.

As region 2 has a higher priority than region 1, it overrides the strongly-
ordered behaviour for RAM only.

Subsequent patches will add more complete initialisation from the C-world
and support for bringing up secondary CPUs.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC: Hyok S. Choi <hyok.choi@samsung.com>
---
 arch/arm/include/asm/mpu.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h
index bd48c0c..0014834 100644
--- a/arch/arm/include/asm/mpu.h
+++ b/arch/arm/include/asm/mpu.h
@@ -50,6 +50,9 @@
 /* Maximum number of regions Linux is interested in */
 #define MPU_MAX_REGIONS		16
 
+#define MPU_DATA_SIDE		0
+#define MPU_INSTR_SIDE		1
+
 #ifndef __ASSEMBLY__
 
 struct mpu_rgn {
-- 
cgit v1.1


From eb08375ea66e63c5e11dea69b43c5633d531ce81 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Fri, 22 Feb 2013 18:51:30 +0000
Subject: ARM: mpu: add MPU initialisation for secondary cores

The MPU initialisation on the primary core is performed in two stages, one
minimal stage to ensure the CPU can boot and a second one after
sanity_check_meminfo. As the memory configuration is known by the time we
boot secondary cores only a single step is necessary, provided the values
for DRSR are passed to secondaries.

This patch implements this arrangement. The configuration generated for the
MPU regions is made available to the secondary core, which can then use the
asm MPU intialisation code to program a complete region configuration.

This is necessary for SMP configurations without an MMU, as the MPU
initialisation is the only way to ensure that memory is specified as
'shared'.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/include/asm/smp.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index d3a22be..a8cae71c 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -65,7 +65,10 @@ asmlinkage void secondary_start_kernel(void);
  * Initial data for bringing up a secondary CPU.
  */
 struct secondary_data {
-	unsigned long pgdir;
+	union {
+		unsigned long mpu_rgn_szr;
+		unsigned long pgdir;
+	};
 	unsigned long swapper_pg_dir;
 	void *stack;
 };
-- 
cgit v1.1


From 15e7e5c1ebf556cd620c9b091e121091ac760f6d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 5 Jun 2013 11:27:26 +0100
Subject: ARM: 7749/1: spinlock: retry trylock operation if strex fails on free
 lock

An exclusive store instruction may fail for reasons other than lock
contention (e.g. a cache eviction during the critical section) so, in
line with other architectures using similar exclusive instructions
(alpha, mips, powerpc), retry the trylock operation if the lock appears
to be free but the strex reported failure.

Reported-by: Tony Thompson <anthony.thompson@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/spinlock.h | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 6220e9f..f8b8965 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -97,19 +97,22 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
-	unsigned long tmp;
+	unsigned long contended, res;
 	u32 slock;
 
-	__asm__ __volatile__(
-"	ldrex	%0, [%2]\n"
-"	subs	%1, %0, %0, ror #16\n"
-"	addeq	%0, %0, %3\n"
-"	strexeq	%1, %0, [%2]"
-	: "=&r" (slock), "=&r" (tmp)
-	: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
-	: "cc");
-
-	if (tmp == 0) {
+	do {
+		__asm__ __volatile__(
+		"	ldrex	%0, [%3]\n"
+		"	mov	%2, #0\n"
+		"	subs	%1, %0, %0, ror #16\n"
+		"	addeq	%0, %0, %4\n"
+		"	strexeq	%2, %0, [%3]"
+		: "=&r" (slock), "=&r" (contended), "=r" (res)
+		: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
+		: "cc");
+	} while (res);
+
+	if (!contended) {
 		smp_mb();
 		return 1;
 	} else {
-- 
cgit v1.1


From 621a0147d5c921f4cc33636ccd0602ad5d7cbfbc Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 12 Jun 2013 12:25:56 +0100
Subject: ARM: 7757/1: mm: don't flush icache in switch_mm with hardware
 broadcasting

When scheduling an mm on a CPU where it hasn't previously been used, we
flush the icache on that CPU so that any code loaded previously on
a different core can be safely executed.

For cores with hardware broadcasting of cache maintenance operations,
this is clearly unnecessary, since the inner-shareable invalidation in
__sync_icache_dcache will affect all CPUs.

This patch conditionalises the icache flush in switch_mm based on
cache_ops_need_broadcast().

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Albin Tonnerre <albin.tonnerre@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mmu_context.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index a7b85e0..2a45c33 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -18,6 +18,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
 #include <asm/proc-fns.h>
+#include <asm/smp_plat.h>
 #include <asm-generic/mm_hooks.h>
 
 void __check_vmalloc_seq(struct mm_struct *mm);
@@ -98,12 +99,16 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 #ifdef CONFIG_MMU
 	unsigned int cpu = smp_processor_id();
 
-#ifdef CONFIG_SMP
-	/* check for possible thread migration */
-	if (!cpumask_empty(mm_cpumask(next)) &&
+	/*
+	 * __sync_icache_dcache doesn't broadcast the I-cache invalidation,
+	 * so check for possible thread migration and invalidate the I-cache
+	 * if we're new to this CPU.
+	 */
+	if (cache_ops_need_broadcast() &&
+	    !cpumask_empty(mm_cpumask(next)) &&
 	    !cpumask_test_cpu(cpu, mm_cpumask(next)))
 		__flush_icache_all();
-#endif
+
 	if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) {
 		check_and_switch_context(next, tsk);
 		if (cache_is_vivt())
-- 
cgit v1.1


From 9dfc28b6308096e48b54c28259825a1200f60742 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 18 Apr 2013 18:37:24 +0100
Subject: ARM: mpu: protect the vectors page with an MPU region

Without an MMU it is possible for userspace programs to start executing code
in places that they have no business executing. The MPU allows some level of
protection against this.

This patch protects the vectors page from access by userspace processes.
Userspace tasks that dereference a null pointer are already protected by an
svc at 0x0 that kills them. However when tasks use an offset from a null
pointer (eg a function in a null struct) they miss this carefully placed svc
and enter the exception vectors in user mode, ending up in the kernel.

This patch causes programs that do this to receive a SEGV instead of happily
entering the kernel in user-mode, and hence avoid a 'Bad Mode' panic.

As part of this change it is necessary to make sigreturn happen via the
stack when there is not an sa_restorer function. This change is invisible to
userspace, and irrelevant to code compiled using a uClibc toolchain, which
always uses an sa_restorer function.

Because we don't get to remap the vectors in !MMU kuser_helpers are not
in a defined location, and hence aren't usable. This means we don't need to
worry about keeping them accessible from PL0

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC: Nicolas Pitre <nico@linaro.org>
CC: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/include/asm/mpu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h
index 0014834..c3247cc 100644
--- a/arch/arm/include/asm/mpu.h
+++ b/arch/arm/include/asm/mpu.h
@@ -46,6 +46,7 @@
 #define MPU_PROBE_REGION	0
 #define MPU_BG_REGION		1
 #define MPU_RAM_REGION		2
+#define MPU_VECTORS_REGION	3
 
 /* Maximum number of regions Linux is interested in */
 #define MPU_MAX_REGIONS		16
-- 
cgit v1.1


From 8cf72172d739639f2699131821a3ebc291287cf2 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Thu, 16 May 2013 10:32:09 +0100
Subject: ARM: kernel: build MPIDR hash function data structure

On ARM SMP systems, cores are identified by their MPIDR register.
The MPIDR guidelines in the ARM ARM do not provide strict enforcement of
MPIDR layout, only recommendations that, if followed, split the MPIDR
on ARM 32 bit platforms in three affinity levels. In multi-cluster
systems like big.LITTLE, if the affinity guidelines are followed, the
MPIDR can not be considered an index anymore. This means that the
association between logical CPU in the kernel and the HW CPU identifier
becomes somewhat more complicated requiring methods like hashing to
associate a given MPIDR to a CPU logical index, in order for the look-up
to be carried out in an efficient and scalable way.

This patch provides a function in the kernel that starting from the
cpu_logical_map, implement collision-free hashing of MPIDR values by checking
all significative bits of MPIDR affinity level bitfields. The hashing
can then be carried out through bits shifting and ORing; the resulting
hash algorithm is a collision-free though not minimal hash that can be
executed with few assembly instructions. The mpidr is filtered through a
mpidr mask that is built by checking all bits that toggle in the set of
MPIDRs corresponding to possible CPUs. Bits that do not toggle do not carry
information so they do not contribute to the resulting hash.

Pseudo code:

/* check all bits that toggle, so they are required */
for (i = 1, mpidr_mask = 0; i < num_possible_cpus(); i++)
	mpidr_mask |= (cpu_logical_map(i) ^ cpu_logical_map(0));

/*
 * Build shifts to be applied to aff0, aff1, aff2 values to hash the mpidr
 * fls() returns the last bit set in a word, 0 if none
 * ffs() returns the first bit set in a word, 0 if none
 */
fs0 = mpidr_mask[7:0] ? ffs(mpidr_mask[7:0]) - 1 : 0;
fs1 = mpidr_mask[15:8] ? ffs(mpidr_mask[15:8]) - 1 : 0;
fs2 = mpidr_mask[23:16] ? ffs(mpidr_mask[23:16]) - 1 : 0;
ls0 = fls(mpidr_mask[7:0]);
ls1 = fls(mpidr_mask[15:8]);
ls2 = fls(mpidr_mask[23:16]);
bits0 = ls0 - fs0;
bits1 = ls1 - fs1;
bits2 = ls2 - fs2;
aff0_shift = fs0;
aff1_shift = 8 + fs1 - bits0;
aff2_shift = 16 + fs2 - (bits0 + bits1);
u32 hash(u32 mpidr) {
	u32 l0, l1, l2;
	u32 mpidr_masked = mpidr & mpidr_mask;
	l0 = mpidr_masked & 0xff;
	l1 = mpidr_masked & 0xff00;
	l2 = mpidr_masked & 0xff0000;
	return (l0 >> aff0_shift | l1 >> aff1_shift | l2 >> aff2_shift);
}

The hashing algorithm relies on the inherent properties set in the ARM ARM
recommendations for the MPIDR. Exotic configurations, where for instance the
MPIDR values at a given affinity level have large holes, can end up requiring
big hash tables since the compression of values that can be achieved through
shifting is somewhat crippled when holes are present. Kernel warns if
the number of buckets of the resulting hash table exceeds the number of
possible CPUs by a factor of 4, which is a symptom of a very sparse HW
MPIDR configuration.

The hash algorithm is quite simple and can easily be implemented in assembly
code, to be used in code paths where the kernel virtual address space is
not set-up (ie cpu_resume) and instruction and data fetches are strongly
ordered so code must be compact and must carry out few data accesses.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Colin Cross <ccross@android.com>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Amit Kucheria <amit.kucheria@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Stephen Warren <swarren@wwwdotorg.org>
---
 arch/arm/include/asm/smp_plat.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index 1c7b6f8..f75f8a2 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -70,4 +70,16 @@ static inline int get_logical_index(u32 mpidr)
 	return -EINVAL;
 }
 
+struct mpidr_hash {
+	u32	mask;
+	u32	shift_aff[3];
+	u32	bits;
+};
+
+extern struct mpidr_hash mpidr_hash;
+
+static inline u32 mpidr_hash_size(void)
+{
+	return 1 << mpidr_hash.bits;
+}
 #endif
-- 
cgit v1.1


From 7604537bbb5720376e8c9e6bc74a8e6305e3094d Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Thu, 16 May 2013 10:34:30 +0100
Subject: ARM: kernel: implement stack pointer save array through MPIDR hashing

Current implementation of cpu_{suspend}/cpu_{resume} relies on the MPIDR
to index the array of pointers where the context is saved and restored.
The current approach works as long as the MPIDR can be considered a
linear index, so that the pointers array can simply be dereferenced by
using the MPIDR[7:0] value.
On ARM multi-cluster systems, where the MPIDR may not be a linear index,
to properly dereference the stack pointer array, a mapping function should
be applied to it so that it can be used for arrays look-ups.

This patch adds code in the cpu_{suspend}/cpu_{resume} implementation
that relies on shifting and ORing hashing method to map a MPIDR value to a
set of buckets precomputed at boot to have a collision free mapping from
MPIDR to context pointers.

The hashing algorithm must be simple, fast, and implementable with few
instructions since in the cpu_resume path the mapping is carried out with
the MMU off and the I-cache off, hence code and data are fetched from DRAM
with no-caching available. Simplicity is counterbalanced with a little
increase of memory (allocated dynamically) for stack pointers buckets, that
should be anyway fairly limited on most systems.

Memory for context pointers is allocated in a early_initcall with
size precomputed and stashed previously in kernel data structures.
Memory for context pointers is allocated through kmalloc; this
guarantees contiguous physical addresses for the allocated memory which
is fundamental to the correct functioning of the resume mechanism that
relies on the context pointer array to be a chunk of contiguous physical
memory. Virtual to physical address conversion for the context pointer
array base is carried out at boot to avoid fiddling with virt_to_phys
conversions in the cpu_resume path which is quite fragile and should be
optimized to execute as few instructions as possible.
Virtual and physical context pointer base array addresses are stashed in a
struct that is accessible from assembly using values generated through the
asm-offsets.c mechanism.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Colin Cross <ccross@android.com>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Amit Kucheria <amit.kucheria@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Stephen Warren <swarren@wwwdotorg.org>
---
 arch/arm/include/asm/smp_plat.h | 10 ++++++++--
 arch/arm/include/asm/suspend.h  |  5 +++++
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index f75f8a2..6e63f29 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -70,9 +70,15 @@ static inline int get_logical_index(u32 mpidr)
 	return -EINVAL;
 }
 
+/*
+ * NOTE ! Assembly code relies on the following
+ * structure memory layout in order to carry out load
+ * multiple from its base address. For more
+ * information check arch/arm/kernel/sleep.S
+ */
 struct mpidr_hash {
-	u32	mask;
-	u32	shift_aff[3];
+	u32	mask; /* used by sleep.S */
+	u32	shift_aff[3]; /* used by sleep.S */
 	u32	bits;
 };
 
diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h
index 1c0a551..cd20029 100644
--- a/arch/arm/include/asm/suspend.h
+++ b/arch/arm/include/asm/suspend.h
@@ -1,6 +1,11 @@
 #ifndef __ASM_ARM_SUSPEND_H
 #define __ASM_ARM_SUSPEND_H
 
+struct sleep_save_sp {
+	u32 *save_ptr_stash;
+	u32 save_ptr_stash_phys;
+};
+
 extern void cpu_resume(void);
 extern int cpu_suspend(unsigned long, int (*)(unsigned long));
 
-- 
cgit v1.1


From a4780adeefd042482f624f5e0d577bf9cdcbb760 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
Date: Tue, 18 Jun 2013 23:23:26 +0100
Subject: ARM: 7735/2: Preserve the user r/w register TPIDRURW on context
 switch and fork
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 6a1c53124aa1 the user writeable TLS register was zeroed to
prevent it from being used as a covert channel between two tasks.

There are more and more applications coming to Windows RT,
Wine could support them, but mostly they expect to have
the thread environment block (TEB) in TPIDRURW.

This patch preserves that register per thread instead of clearing it.
Unlike the TPIDRURO, which is already switched, the TPIDRURW
can be updated from userspace so needs careful treatment in the case that we
modify TPIDRURW and call fork(). To avoid this we must always read
TPIDRURW in copy_thread.

Signed-off-by: André Hentschel <nerv@dawncrow.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/thread_info.h |  2 +-
 arch/arm/include/asm/tls.h         | 40 +++++++++++++++++++++++++-------------
 2 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 1995d1a..214d415 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];	/* TLS registers */
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..83259b8 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,27 +2,30 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+#include <asm/asm-offsets.h>
+	.macro switch_tls_none, base, tp, tpuser, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
+	.macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
 	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	mcr	p15, 0, \tpuser, c13, c0, 2	@ and the user r/w register
+	str	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
 	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
+	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+	mcrne	p15, 0, \tpuser, c13, c0, 2	@ set user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
+	.macro switch_tls_software, base, tp, tpuser, tmp1, tmp2
 	mov	\tmp1, #0xffff0fff
 	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
@@ -31,19 +34,30 @@
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
-#define set_tls		set_tls_none
+#define switch_tls	switch_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
-#define set_tls		set_tls_v6
+#define switch_tls	switch_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
-#define set_tls		set_tls_v6k
+#define switch_tls	switch_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
-#define set_tls		set_tls_software
+#define switch_tls	switch_tls_software
 #endif
 
+#ifndef __ASSEMBLY__
+static inline unsigned long get_tpuser(void)
+{
+	unsigned long reg = 0;
+
+	if (has_tls_reg && !tls_emu)
+		__asm__("mrc p15, 0, %0, c13, c0, 2" : "=r" (reg));
+
+	return reg;
+}
+#endif
 #endif	/* __ASMARM_TLS_H */
-- 
cgit v1.1


From 0d0752bca1f9a91fb646647aa4abbb21156f316c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Fri, 21 Jun 2013 12:07:27 +0100
Subject: ARM: 7769/1: Cortex-A15: fix erratum 798181 implementation

Looking into the active_asids array is not enough, as we also need
to look into the reserved_asids array (they both represent processes
that are currently running).

Also, not holding the ASID allocator lock is racy, as another CPU
could schedule that process and trigger a rollover, making the erratum
workaround miss an IPI.

Exposing this outside of context.c is a little ugly on the side, so
let's define a new entry point that the erratum workaround can call
to obtain the cpumask.

Cc: <stable@vger.kernel.org> # 3.9
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mmu_context.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index 2a45c33..b5792b7 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -28,7 +28,15 @@ void __check_vmalloc_seq(struct mm_struct *mm);
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
 #define init_new_context(tsk,mm)	({ atomic64_set(&mm->context.id, 0); 0; })
 
-DECLARE_PER_CPU(atomic64_t, active_asids);
+#ifdef CONFIG_ARM_ERRATA_798181
+void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+			     cpumask_t *mask);
+#else  /* !CONFIG_ARM_ERRATA_798181 */
+static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+					   cpumask_t *mask)
+{
+}
+#endif /* CONFIG_ARM_ERRATA_798181 */
 
 #else	/* !CONFIG_CPU_HAS_ASID */
 
-- 
cgit v1.1