From 45309493509b5acd667246c8232dd4911a7a168c Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 18 May 2015 12:46:37 +0530
Subject: ARC: fold ___flush_dcache_page into __flush_dcache_page

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache_arc700.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 12b2100..322e11b 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -493,7 +493,7 @@ void flush_dcache_page(struct page *page)
 	} else if (page_mapped(page)) {
 
 		/* kernel reading from page with U-mapping */
-		void *paddr = page_address(page);
+		unsigned long paddr = (unsigned long)page_address(page);
 		unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
 
 		if (addr_not_cache_congruent(paddr, vaddr))
@@ -605,7 +605,7 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
  * wrapper to clearout kernel or userspace mappings of a page
  * For kernel mappings @vaddr == @paddr
  */
-void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr)
+void __flush_dcache_page(unsigned long paddr, unsigned long vaddr)
 {
 	__dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
 }
@@ -637,7 +637,7 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
 
 	u_vaddr &= PAGE_MASK;
 
-	___flush_dcache_page(paddr, u_vaddr);
+	__flush_dcache_page(paddr, u_vaddr);
 
 	if (vma->vm_flags & VM_EXEC)
 		__inv_icache_page(paddr, u_vaddr);
@@ -663,8 +663,8 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page,
 void copy_user_highpage(struct page *to, struct page *from,
 	unsigned long u_vaddr, struct vm_area_struct *vma)
 {
-	void *kfrom = page_address(from);
-	void *kto = page_address(to);
+	unsigned long kfrom = (unsigned long)page_address(from);
+	unsigned long kto = (unsigned long)page_address(to);
 	int clean_src_k_mappings = 0;
 
 	/*
@@ -680,7 +680,7 @@ void copy_user_highpage(struct page *to, struct page *from,
 		clean_src_k_mappings = 1;
 	}
 
-	copy_page(kto, kfrom);
+	copy_page((void *)kto, (void *)kfrom);
 
 	/*
 	 * Mark DST page K-mapping as dirty for a later finalization by
-- 
cgit v1.1


From 454bfda9aca0f6a0487eef523ac92dfc6646807d Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 16 Apr 2015 21:04:49 +0530
Subject: ARC: remove the unused platform helpers from dma mapping API

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/dma.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 12cc648..2cfe81d 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -14,8 +14,6 @@
  * Cache bit off in the TLB entry.
  *
  * The default DMA address == Phy address which is 0x8000_0000 based.
- * A platform/device can make it zero based, by over-riding
- * plat_{dma,kernel}_addr_to_{kernel,dma}
  */
 
 #include <linux/dma-mapping.h>
@@ -37,7 +35,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
 		return NULL;
 
 	/* This is bus address, platform dependent */
-	*dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+	*dma_handle = (dma_addr_t)paddr;
 
 	return paddr;
 }
@@ -46,8 +44,7 @@ EXPORT_SYMBOL(dma_alloc_noncoherent);
 void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
 			  dma_addr_t dma_handle)
 {
-	free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
-			 size);
+	free_pages_exact((void *)dma_handle, size);
 }
 EXPORT_SYMBOL(dma_free_noncoherent);
 
@@ -67,7 +64,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 		memset(kvaddr, 0, size);
 
 	/* This is bus address, platform dependent */
-	*dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+	*dma_handle = (dma_addr_t)paddr;
 
 	return kvaddr;
 }
@@ -78,8 +75,7 @@ void dma_free_coherent(struct device *dev, size_t size, void *kvaddr,
 {
 	iounmap((void __force __iomem *)kvaddr);
 
-	free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
-			 size);
+	free_pages_exact((void *)dma_handle, size);
 }
 EXPORT_SYMBOL(dma_free_coherent);
 
-- 
cgit v1.1


From 40b552d95a545e828fb4ebbf68a385cb9eaebf64 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 13 Feb 2015 18:33:47 +0530
Subject: ARC: compress cpuinfo_arc_mmu (mainly save page size in KB)

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/tlb.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 7f47d2a..914d8e0 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -587,14 +587,14 @@ void read_decode_mmu_bcr(void)
 
 	if (mmu->ver <= 2) {
 		mmu2 = (struct bcr_mmu_1_2 *)&tmp;
-		mmu->pg_sz = PAGE_SIZE;
+		mmu->pg_sz_k = TO_KB(PAGE_SIZE);
 		mmu->sets = 1 << mmu2->sets;
 		mmu->ways = 1 << mmu2->ways;
 		mmu->u_dtlb = mmu2->u_dtlb;
 		mmu->u_itlb = mmu2->u_itlb;
 	} else {
 		mmu3 = (struct bcr_mmu_3 *)&tmp;
-		mmu->pg_sz = 512 << mmu3->pg_sz;
+		mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
 		mmu->sets = 1 << mmu3->sets;
 		mmu->ways = 1 << mmu3->ways;
 		mmu->u_dtlb = mmu3->u_dtlb;
@@ -611,7 +611,7 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 
 	n += scnprintf(buf + n, len - n,
 		      "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
-		       p_mmu->ver, TO_KB(p_mmu->pg_sz),
+		       p_mmu->ver, p_mmu->pg_sz_k,
 		       p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
 		       p_mmu->u_dtlb, p_mmu->u_itlb,
 		       IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : "");
@@ -639,7 +639,7 @@ void arc_mmu_init(void)
 		      mmu->ver, CONFIG_ARC_MMU_VER);
 	}
 
-	if (mmu->pg_sz != PAGE_SIZE)
+	if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
 		panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
 
 	/* Enable the MMU */
-- 
cgit v1.1


From 8362c389a4551614ad0586c0c2f33bef5526bcfc Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 7 May 2015 14:39:36 +0530
Subject: ARC: mm/cache_arc700.c -> mm/cache.c

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/Makefile       |   2 +-
 arch/arc/mm/cache.c        | 723 +++++++++++++++++++++++++++++++++++++++++++++
 arch/arc/mm/cache_arc700.c | 723 ---------------------------------------------
 3 files changed, 724 insertions(+), 724 deletions(-)
 create mode 100644 arch/arc/mm/cache.c
 delete mode 100644 arch/arc/mm/cache_arc700.c

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile
index ac95cc2..7beb941 100644
--- a/arch/arc/mm/Makefile
+++ b/arch/arc/mm/Makefile
@@ -7,4 +7,4 @@
 #
 
 obj-y	:= extable.o ioremap.o dma.o fault.o init.o
-obj-y	+= tlb.o tlbex.o cache_arc700.o mmap.o
+obj-y	+= tlb.o tlbex.o cache.o mmap.o
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
new file mode 100644
index 0000000..322e11b
--- /dev/null
+++ b/arch/arc/mm/cache.c
@@ -0,0 +1,723 @@
+/*
+ * ARC700 VIPT Cache Management
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
+ *   -flush_cache_dup_mm (fork)
+ *   -likewise for flush_cache_mm (exit/execve)
+ *   -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break)
+ *
+ * vineetg: Apr 2011
+ *  -Now that MMU can support larger pg sz (16K), the determiniation of
+ *   aliasing shd not be based on assumption of 8k pg
+ *
+ * vineetg: Mar 2011
+ *  -optimised version of flush_icache_range( ) for making I/D coherent
+ *   when vaddr is available (agnostic of num of aliases)
+ *
+ * vineetg: Mar 2011
+ *  -Added documentation about I-cache aliasing on ARC700 and the way it
+ *   was handled up until MMU V2.
+ *  -Spotted a three year old bug when killing the 4 aliases, which needs
+ *   bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03}
+ *                        instead of paddr | {0x00, 0x01, 0x10, 0x11}
+ *   (Rajesh you owe me one now)
+ *
+ * vineetg: Dec 2010
+ *  -Off-by-one error when computing num_of_lines to flush
+ *   This broke signal handling with bionic which uses synthetic sigret stub
+ *
+ * vineetg: Mar 2010
+ *  -GCC can't generate ZOL for core cache flush loops.
+ *   Conv them into iterations based as opposed to while (start < end) types
+ *
+ * Vineetg: July 2009
+ *  -In I-cache flush routine we used to chk for aliasing for every line INV.
+ *   Instead now we setup routines per cache geometry and invoke them
+ *   via function pointers.
+ *
+ * Vineetg: Jan 2009
+ *  -Cache Line flush routines used to flush an extra line beyond end addr
+ *   because check was while (end >= start) instead of (end > start)
+ *     =Some call sites had to work around by doing -1, -4 etc to end param
+ *     =Some callers didnt care. This was spec bad in case of INV routines
+ *      which would discard valid data (cause of the horrible ext2 bug
+ *      in ARC IDE driver)
+ *
+ * vineetg: June 11th 2008: Fixed flush_icache_range( )
+ *  -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need
+ *   to be flushed, which it was not doing.
+ *  -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API,
+ *   however ARC cache maintenance OPs require PHY addr. Thus need to do
+ *   vmalloc_to_phy.
+ *  -Also added optimisation there, that for range > PAGE SIZE we flush the
+ *   entire cache in one shot rather than line by line. For e.g. a module
+ *   with Code sz 600k, old code flushed 600k worth of cache (line-by-line),
+ *   while cache is only 16 or 32k.
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/cache.h>
+#include <linux/mmu_context.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/pagemap.h>
+#include <asm/cacheflush.h>
+#include <asm/cachectl.h>
+#include <asm/setup.h>
+
+char *arc_cache_mumbojumbo(int c, char *buf, int len)
+{
+	int n = 0;
+
+#define PR_CACHE(p, cfg, str)						\
+	if (!(p)->ver)							\
+		n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");	\
+	else								\
+		n += scnprintf(buf + n, len - n,			\
+			str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",	\
+			(p)->sz_k, (p)->assoc, (p)->line_len,		\
+			(p)->vipt ? "VIPT" : "PIPT",			\
+			(p)->alias ? " aliasing" : "",			\
+			IS_ENABLED(cfg) ? "" : " (not used)");
+
+	PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
+	PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
+
+	return buf;
+}
+
+/*
+ * Read the Cache Build Confuration Registers, Decode them and save into
+ * the cpuinfo structure for later use.
+ * No Validation done here, simply read/convert the BCRs
+ */
+void read_decode_cache_bcr(void)
+{
+	struct cpuinfo_arc_cache *p_ic, *p_dc;
+	unsigned int cpu = smp_processor_id();
+	struct bcr_cache {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
+#else
+		unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
+#endif
+	} ibcr, dbcr;
+
+	p_ic = &cpuinfo_arc700[cpu].icache;
+	READ_BCR(ARC_REG_IC_BCR, ibcr);
+
+	if (!ibcr.ver)
+		goto dc_chk;
+
+	BUG_ON(ibcr.config != 3);
+	p_ic->assoc = 2;		/* Fixed to 2w set assoc */
+	p_ic->line_len = 8 << ibcr.line_len;
+	p_ic->sz_k = 1 << (ibcr.sz - 1);
+	p_ic->ver = ibcr.ver;
+	p_ic->vipt = 1;
+	p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
+
+dc_chk:
+	p_dc = &cpuinfo_arc700[cpu].dcache;
+	READ_BCR(ARC_REG_DC_BCR, dbcr);
+
+	if (!dbcr.ver)
+		return;
+
+	BUG_ON(dbcr.config != 2);
+	p_dc->assoc = 4;		/* Fixed to 4w set assoc */
+	p_dc->line_len = 16 << dbcr.line_len;
+	p_dc->sz_k = 1 << (dbcr.sz - 1);
+	p_dc->ver = dbcr.ver;
+	p_dc->vipt = 1;
+	p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
+}
+
+/*
+ * 1. Validate the Cache Geomtery (compile time config matches hardware)
+ * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn)
+ *    (aliasing D-cache configurations are not supported YET)
+ * 3. Enable the Caches, setup default flush mode for D-Cache
+ * 3. Calculate the SHMLBA used by user space
+ */
+void arc_cache_init(void)
+{
+	unsigned int __maybe_unused cpu = smp_processor_id();
+	char str[256];
+
+	printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
+		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+
+		if (!ic->ver)
+			panic("cache support enabled but non-existent cache\n");
+
+		if (ic->line_len != L1_CACHE_BYTES)
+			panic("ICache line [%d] != kernel Config [%d]",
+			      ic->line_len, L1_CACHE_BYTES);
+
+		if (ic->ver != CONFIG_ARC_MMU_VER)
+			panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
+			      ic->ver, CONFIG_ARC_MMU_VER);
+	}
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
+		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+		int handled;
+
+		if (!dc->ver)
+			panic("cache support enabled but non-existent cache\n");
+
+		if (dc->line_len != L1_CACHE_BYTES)
+			panic("DCache line [%d] != kernel Config [%d]",
+			      dc->line_len, L1_CACHE_BYTES);
+
+		/* check for D-Cache aliasing */
+		handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
+
+		if (dc->alias && !handled)
+			panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+		else if (!dc->alias && handled)
+			panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+	}
+}
+
+#define OP_INV		0x1
+#define OP_FLUSH	0x2
+#define OP_FLUSH_N_INV	0x3
+#define OP_INV_IC	0x4
+
+/*
+ * Common Helper for Line Operations on {I,D}-Cache
+ */
+static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
+				     unsigned long sz, const int cacheop)
+{
+	unsigned int aux_cmd, aux_tag;
+	int num_lines;
+	const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+
+	if (cacheop == OP_INV_IC) {
+		aux_cmd = ARC_REG_IC_IVIL;
+#if (CONFIG_ARC_MMU_VER > 2)
+		aux_tag = ARC_REG_IC_PTAG;
+#endif
+	}
+	else {
+		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+		aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+#if (CONFIG_ARC_MMU_VER > 2)
+		aux_tag = ARC_REG_DC_PTAG;
+#endif
+	}
+
+	/* Ensure we properly floor/ceil the non-line aligned/sized requests
+	 * and have @paddr - aligned to cache line and integral @num_lines.
+	 * This however can be avoided for page sized since:
+	 *  -@paddr will be cache-line aligned already (being page aligned)
+	 *  -@sz will be integral multiple of line size (being page sized).
+	 */
+	if (!full_page_op) {
+		sz += paddr & ~CACHE_LINE_MASK;
+		paddr &= CACHE_LINE_MASK;
+		vaddr &= CACHE_LINE_MASK;
+	}
+
+	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+#if (CONFIG_ARC_MMU_VER <= 2)
+	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
+	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
+#else
+	/* if V-P const for loop, PTAG can be written once outside loop */
+	if (full_page_op)
+		write_aux_reg(aux_tag, paddr);
+#endif
+
+	while (num_lines-- > 0) {
+#if (CONFIG_ARC_MMU_VER > 2)
+		/* MMUv3, cache ops require paddr seperately */
+		if (!full_page_op) {
+			write_aux_reg(aux_tag, paddr);
+			paddr += L1_CACHE_BYTES;
+		}
+
+		write_aux_reg(aux_cmd, vaddr);
+		vaddr += L1_CACHE_BYTES;
+#else
+		write_aux_reg(aux_cmd, paddr);
+		paddr += L1_CACHE_BYTES;
+#endif
+	}
+}
+
+#ifdef CONFIG_ARC_HAS_DCACHE
+
+/***************************************************************
+ * Machine specific helpers for Entire D-Cache or Per Line ops
+ */
+
+static inline unsigned int __before_dc_op(const int op)
+{
+	unsigned int reg = reg;
+
+	if (op == OP_FLUSH_N_INV) {
+		/* Dcache provides 2 cmd: FLUSH or INV
+		 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
+		 * flush-n-inv is achieved by INV cmd but with IM=1
+		 * So toggle INV sub-mode depending on op request and default
+		 */
+		reg = read_aux_reg(ARC_REG_DC_CTRL);
+		write_aux_reg(ARC_REG_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH)
+			;
+	}
+
+	return reg;
+}
+
+static inline void __after_dc_op(const int op, unsigned int reg)
+{
+	if (op & OP_FLUSH)	/* flush / flush-n-inv both wait */
+		while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
+
+	/* Switch back to default Invalidate mode */
+	if (op == OP_FLUSH_N_INV)
+		write_aux_reg(ARC_REG_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH);
+}
+
+/*
+ * Operation on Entire D-Cache
+ * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
+ * Note that constant propagation ensures all the checks are gone
+ * in generated code
+ */
+static inline void __dc_entire_op(const int cacheop)
+{
+	unsigned int ctrl_reg;
+	int aux;
+
+	ctrl_reg = __before_dc_op(cacheop);
+
+	if (cacheop & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
+		aux = ARC_REG_DC_IVDC;
+	else
+		aux = ARC_REG_DC_FLSH;
+
+	write_aux_reg(aux, 0x1);
+
+	__after_dc_op(cacheop, ctrl_reg);
+}
+
+/* For kernel mappings cache operation: index is same as paddr */
+#define __dc_line_op_k(p, sz, op)	__dc_line_op(p, p, sz, op)
+
+/*
+ * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
+ */
+static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
+				unsigned long sz, const int cacheop)
+{
+	unsigned long flags;
+	unsigned int ctrl_reg;
+
+	local_irq_save(flags);
+
+	ctrl_reg = __before_dc_op(cacheop);
+
+	__cache_line_loop(paddr, vaddr, sz, cacheop);
+
+	__after_dc_op(cacheop, ctrl_reg);
+
+	local_irq_restore(flags);
+}
+
+#else
+
+#define __dc_entire_op(cacheop)
+#define __dc_line_op(paddr, vaddr, sz, cacheop)
+#define __dc_line_op_k(paddr, sz, cacheop)
+
+#endif /* CONFIG_ARC_HAS_DCACHE */
+
+
+#ifdef CONFIG_ARC_HAS_ICACHE
+
+/*
+ *		I-Cache Aliasing in ARC700 VIPT caches
+ *
+ * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
+ * The orig Cache Management Module "CDU" only required paddr to invalidate a
+ * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
+ * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
+ * the exact same line.
+ *
+ * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
+ * paddr alone could not be used to correctly index the cache.
+ *
+ * ------------------
+ * MMU v1/v2 (Fixed Page Size 8k)
+ * ------------------
+ * The solution was to provide CDU with these additonal vaddr bits. These
+ * would be bits [x:13], x would depend on cache-geometry, 13 comes from
+ * standard page size of 8k.
+ * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
+ * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
+ * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
+ * represent the offset within cache-line. The adv of using this "clumsy"
+ * interface for additional info was no new reg was needed in CDU programming
+ * model.
+ *
+ * 17:13 represented the max num of bits passable, actual bits needed were
+ * fewer, based on the num-of-aliases possible.
+ * -for 2 alias possibility, only bit 13 needed (32K cache)
+ * -for 4 alias possibility, bits 14:13 needed (64K cache)
+ *
+ * ------------------
+ * MMU v3
+ * ------------------
+ * This ver of MMU supports variable page sizes (1k-16k): although Linux will
+ * only support 8k (default), 16k and 4k.
+ * However from hardware perspective, smaller page sizes aggrevate aliasing
+ * meaning more vaddr bits needed to disambiguate the cache-line-op ;
+ * the existing scheme of piggybacking won't work for certain configurations.
+ * Two new registers IC_PTAG and DC_PTAG inttoduced.
+ * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
+ */
+
+/***********************************************************
+ * Machine specific helper for per line I-Cache invalidate.
+ */
+
+static inline void __ic_entire_inv(void)
+{
+	write_aux_reg(ARC_REG_IC_IVIC, 1);
+	read_aux_reg(ARC_REG_IC_CTRL);	/* blocks */
+}
+
+static inline void
+__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
+			  unsigned long sz)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__cache_line_loop(paddr, vaddr, sz, OP_INV_IC);
+	local_irq_restore(flags);
+}
+
+#ifndef CONFIG_SMP
+
+#define __ic_line_inv_vaddr(p, v, s)	__ic_line_inv_vaddr_local(p, v, s)
+
+#else
+
+struct ic_inv_args {
+	unsigned long paddr, vaddr;
+	int sz;
+};
+
+static void __ic_line_inv_vaddr_helper(void *info)
+{
+        struct ic_inv_args *ic_inv = info;
+
+        __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz);
+}
+
+static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
+				unsigned long sz)
+{
+	struct ic_inv_args ic_inv = {
+		.paddr = paddr,
+		.vaddr = vaddr,
+		.sz    = sz
+	};
+
+	on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1);
+}
+
+#endif	/* CONFIG_SMP */
+
+#else	/* !CONFIG_ARC_HAS_ICACHE */
+
+#define __ic_entire_inv()
+#define __ic_line_inv_vaddr(pstart, vstart, sz)
+
+#endif /* CONFIG_ARC_HAS_ICACHE */
+
+
+/***********************************************************
+ * Exported APIs
+ */
+
+/*
+ * Handle cache congruency of kernel and userspace mappings of page when kernel
+ * writes-to/reads-from
+ *
+ * The idea is to defer flushing of kernel mapping after a WRITE, possible if:
+ *  -dcache is NOT aliasing, hence any U/K-mappings of page are congruent
+ *  -U-mapping doesn't exist yet for page (finalised in update_mmu_cache)
+ *  -In SMP, if hardware caches are coherent
+ *
+ * There's a corollary case, where kernel READs from a userspace mapped page.
+ * If the U-mapping is not congruent to to K-mapping, former needs flushing.
+ */
+void flush_dcache_page(struct page *page)
+{
+	struct address_space *mapping;
+
+	if (!cache_is_vipt_aliasing()) {
+		clear_bit(PG_dc_clean, &page->flags);
+		return;
+	}
+
+	/* don't handle anon pages here */
+	mapping = page_mapping(page);
+	if (!mapping)
+		return;
+
+	/*
+	 * pagecache page, file not yet mapped to userspace
+	 * Make a note that K-mapping is dirty
+	 */
+	if (!mapping_mapped(mapping)) {
+		clear_bit(PG_dc_clean, &page->flags);
+	} else if (page_mapped(page)) {
+
+		/* kernel reading from page with U-mapping */
+		unsigned long paddr = (unsigned long)page_address(page);
+		unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
+
+		if (addr_not_cache_congruent(paddr, vaddr))
+			__flush_dcache_page(paddr, vaddr);
+	}
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+
+void dma_cache_wback_inv(unsigned long start, unsigned long sz)
+{
+	__dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+}
+EXPORT_SYMBOL(dma_cache_wback_inv);
+
+void dma_cache_inv(unsigned long start, unsigned long sz)
+{
+	__dc_line_op_k(start, sz, OP_INV);
+}
+EXPORT_SYMBOL(dma_cache_inv);
+
+void dma_cache_wback(unsigned long start, unsigned long sz)
+{
+	__dc_line_op_k(start, sz, OP_FLUSH);
+}
+EXPORT_SYMBOL(dma_cache_wback);
+
+/*
+ * This is API for making I/D Caches consistent when modifying
+ * kernel code (loadable modules, kprobes, kgdb...)
+ * This is called on insmod, with kernel virtual address for CODE of
+ * the module. ARC cache maintenance ops require PHY address thus we
+ * need to convert vmalloc addr to PHY addr
+ */
+void flush_icache_range(unsigned long kstart, unsigned long kend)
+{
+	unsigned int tot_sz;
+
+	WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__);
+
+	/* Shortcut for bigger flush ranges.
+	 * Here we don't care if this was kernel virtual or phy addr
+	 */
+	tot_sz = kend - kstart;
+	if (tot_sz > PAGE_SIZE) {
+		flush_cache_all();
+		return;
+	}
+
+	/* Case: Kernel Phy addr (0x8000_0000 onwards) */
+	if (likely(kstart > PAGE_OFFSET)) {
+		/*
+		 * The 2nd arg despite being paddr will be used to index icache
+		 * This is OK since no alternate virtual mappings will exist
+		 * given the callers for this case: kprobe/kgdb in built-in
+		 * kernel code only.
+		 */
+		__sync_icache_dcache(kstart, kstart, kend - kstart);
+		return;
+	}
+
+	/*
+	 * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
+	 * (1) ARC Cache Maintenance ops only take Phy addr, hence special
+	 *     handling of kernel vaddr.
+	 *
+	 * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
+	 *     it still needs to handle  a 2 page scenario, where the range
+	 *     straddles across 2 virtual pages and hence need for loop
+	 */
+	while (tot_sz > 0) {
+		unsigned int off, sz;
+		unsigned long phy, pfn;
+
+		off = kstart % PAGE_SIZE;
+		pfn = vmalloc_to_pfn((void *)kstart);
+		phy = (pfn << PAGE_SHIFT) + off;
+		sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
+		__sync_icache_dcache(phy, kstart, sz);
+		kstart += sz;
+		tot_sz -= sz;
+	}
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+/*
+ * General purpose helper to make I and D cache lines consistent.
+ * @paddr is phy addr of region
+ * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc)
+ *    However in one instance, when called by kprobe (for a breakpt in
+ *    builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
+ *    use a paddr to index the cache (despite VIPT). This is fine since since a
+ *    builtin kernel page will not have any virtual mappings.
+ *    kprobe on loadable module will be kernel vaddr.
+ */
+void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
+{
+	__dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV);
+	__ic_line_inv_vaddr(paddr, vaddr, len);
+}
+
+/* wrapper to compile time eliminate alignment checks in flush loop */
+void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
+{
+	__ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
+}
+
+/*
+ * wrapper to clearout kernel or userspace mappings of a page
+ * For kernel mappings @vaddr == @paddr
+ */
+void __flush_dcache_page(unsigned long paddr, unsigned long vaddr)
+{
+	__dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
+}
+
+noinline void flush_cache_all(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	__ic_entire_inv();
+	__dc_entire_op(OP_FLUSH_N_INV);
+
+	local_irq_restore(flags);
+
+}
+
+#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
+
+void flush_cache_mm(struct mm_struct *mm)
+{
+	flush_cache_all();
+}
+
+void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
+		      unsigned long pfn)
+{
+	unsigned int paddr = pfn << PAGE_SHIFT;
+
+	u_vaddr &= PAGE_MASK;
+
+	__flush_dcache_page(paddr, u_vaddr);
+
+	if (vma->vm_flags & VM_EXEC)
+		__inv_icache_page(paddr, u_vaddr);
+}
+
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end)
+{
+	flush_cache_all();
+}
+
+void flush_anon_page(struct vm_area_struct *vma, struct page *page,
+		     unsigned long u_vaddr)
+{
+	/* TBD: do we really need to clear the kernel mapping */
+	__flush_dcache_page(page_address(page), u_vaddr);
+	__flush_dcache_page(page_address(page), page_address(page));
+
+}
+
+#endif
+
+void copy_user_highpage(struct page *to, struct page *from,
+	unsigned long u_vaddr, struct vm_area_struct *vma)
+{
+	unsigned long kfrom = (unsigned long)page_address(from);
+	unsigned long kto = (unsigned long)page_address(to);
+	int clean_src_k_mappings = 0;
+
+	/*
+	 * If SRC page was already mapped in userspace AND it's U-mapping is
+	 * not congruent with K-mapping, sync former to physical page so that
+	 * K-mapping in memcpy below, sees the right data
+	 *
+	 * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
+	 * equally valid for SRC page as well
+	 */
+	if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
+		__flush_dcache_page(kfrom, u_vaddr);
+		clean_src_k_mappings = 1;
+	}
+
+	copy_page((void *)kto, (void *)kfrom);
+
+	/*
+	 * Mark DST page K-mapping as dirty for a later finalization by
+	 * update_mmu_cache(). Although the finalization could have been done
+	 * here as well (given that both vaddr/paddr are available).
+	 * But update_mmu_cache() already has code to do that for other
+	 * non copied user pages (e.g. read faults which wire in pagecache page
+	 * directly).
+	 */
+	clear_bit(PG_dc_clean, &to->flags);
+
+	/*
+	 * if SRC was already usermapped and non-congruent to kernel mapping
+	 * sync the kernel mapping back to physical page
+	 */
+	if (clean_src_k_mappings) {
+		__flush_dcache_page(kfrom, kfrom);
+		set_bit(PG_dc_clean, &from->flags);
+	} else {
+		clear_bit(PG_dc_clean, &from->flags);
+	}
+}
+
+void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
+{
+	clear_page(to);
+	clear_bit(PG_dc_clean, &page->flags);
+}
+
+
+/**********************************************************************
+ * Explicit Cache flush request from user space via syscall
+ * Needed for JITs which generate code on the fly
+ */
+SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
+{
+	/* TBD: optimize this */
+	flush_cache_all();
+	return 0;
+}
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
deleted file mode 100644
index 322e11b..0000000
--- a/arch/arc/mm/cache_arc700.c
+++ /dev/null
@@ -1,723 +0,0 @@
-/*
- * ARC700 VIPT Cache Management
- *
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
- *   -flush_cache_dup_mm (fork)
- *   -likewise for flush_cache_mm (exit/execve)
- *   -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break)
- *
- * vineetg: Apr 2011
- *  -Now that MMU can support larger pg sz (16K), the determiniation of
- *   aliasing shd not be based on assumption of 8k pg
- *
- * vineetg: Mar 2011
- *  -optimised version of flush_icache_range( ) for making I/D coherent
- *   when vaddr is available (agnostic of num of aliases)
- *
- * vineetg: Mar 2011
- *  -Added documentation about I-cache aliasing on ARC700 and the way it
- *   was handled up until MMU V2.
- *  -Spotted a three year old bug when killing the 4 aliases, which needs
- *   bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03}
- *                        instead of paddr | {0x00, 0x01, 0x10, 0x11}
- *   (Rajesh you owe me one now)
- *
- * vineetg: Dec 2010
- *  -Off-by-one error when computing num_of_lines to flush
- *   This broke signal handling with bionic which uses synthetic sigret stub
- *
- * vineetg: Mar 2010
- *  -GCC can't generate ZOL for core cache flush loops.
- *   Conv them into iterations based as opposed to while (start < end) types
- *
- * Vineetg: July 2009
- *  -In I-cache flush routine we used to chk for aliasing for every line INV.
- *   Instead now we setup routines per cache geometry and invoke them
- *   via function pointers.
- *
- * Vineetg: Jan 2009
- *  -Cache Line flush routines used to flush an extra line beyond end addr
- *   because check was while (end >= start) instead of (end > start)
- *     =Some call sites had to work around by doing -1, -4 etc to end param
- *     =Some callers didnt care. This was spec bad in case of INV routines
- *      which would discard valid data (cause of the horrible ext2 bug
- *      in ARC IDE driver)
- *
- * vineetg: June 11th 2008: Fixed flush_icache_range( )
- *  -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need
- *   to be flushed, which it was not doing.
- *  -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API,
- *   however ARC cache maintenance OPs require PHY addr. Thus need to do
- *   vmalloc_to_phy.
- *  -Also added optimisation there, that for range > PAGE SIZE we flush the
- *   entire cache in one shot rather than line by line. For e.g. a module
- *   with Code sz 600k, old code flushed 600k worth of cache (line-by-line),
- *   while cache is only 16 or 32k.
- */
-
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/cache.h>
-#include <linux/mmu_context.h>
-#include <linux/syscalls.h>
-#include <linux/uaccess.h>
-#include <linux/pagemap.h>
-#include <asm/cacheflush.h>
-#include <asm/cachectl.h>
-#include <asm/setup.h>
-
-char *arc_cache_mumbojumbo(int c, char *buf, int len)
-{
-	int n = 0;
-
-#define PR_CACHE(p, cfg, str)						\
-	if (!(p)->ver)							\
-		n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");	\
-	else								\
-		n += scnprintf(buf + n, len - n,			\
-			str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",	\
-			(p)->sz_k, (p)->assoc, (p)->line_len,		\
-			(p)->vipt ? "VIPT" : "PIPT",			\
-			(p)->alias ? " aliasing" : "",			\
-			IS_ENABLED(cfg) ? "" : " (not used)");
-
-	PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
-	PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
-
-	return buf;
-}
-
-/*
- * Read the Cache Build Confuration Registers, Decode them and save into
- * the cpuinfo structure for later use.
- * No Validation done here, simply read/convert the BCRs
- */
-void read_decode_cache_bcr(void)
-{
-	struct cpuinfo_arc_cache *p_ic, *p_dc;
-	unsigned int cpu = smp_processor_id();
-	struct bcr_cache {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
-#else
-		unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
-#endif
-	} ibcr, dbcr;
-
-	p_ic = &cpuinfo_arc700[cpu].icache;
-	READ_BCR(ARC_REG_IC_BCR, ibcr);
-
-	if (!ibcr.ver)
-		goto dc_chk;
-
-	BUG_ON(ibcr.config != 3);
-	p_ic->assoc = 2;		/* Fixed to 2w set assoc */
-	p_ic->line_len = 8 << ibcr.line_len;
-	p_ic->sz_k = 1 << (ibcr.sz - 1);
-	p_ic->ver = ibcr.ver;
-	p_ic->vipt = 1;
-	p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
-
-dc_chk:
-	p_dc = &cpuinfo_arc700[cpu].dcache;
-	READ_BCR(ARC_REG_DC_BCR, dbcr);
-
-	if (!dbcr.ver)
-		return;
-
-	BUG_ON(dbcr.config != 2);
-	p_dc->assoc = 4;		/* Fixed to 4w set assoc */
-	p_dc->line_len = 16 << dbcr.line_len;
-	p_dc->sz_k = 1 << (dbcr.sz - 1);
-	p_dc->ver = dbcr.ver;
-	p_dc->vipt = 1;
-	p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
-}
-
-/*
- * 1. Validate the Cache Geomtery (compile time config matches hardware)
- * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn)
- *    (aliasing D-cache configurations are not supported YET)
- * 3. Enable the Caches, setup default flush mode for D-Cache
- * 3. Calculate the SHMLBA used by user space
- */
-void arc_cache_init(void)
-{
-	unsigned int __maybe_unused cpu = smp_processor_id();
-	char str[256];
-
-	printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
-
-	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
-		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
-
-		if (!ic->ver)
-			panic("cache support enabled but non-existent cache\n");
-
-		if (ic->line_len != L1_CACHE_BYTES)
-			panic("ICache line [%d] != kernel Config [%d]",
-			      ic->line_len, L1_CACHE_BYTES);
-
-		if (ic->ver != CONFIG_ARC_MMU_VER)
-			panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
-			      ic->ver, CONFIG_ARC_MMU_VER);
-	}
-
-	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
-		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
-		int handled;
-
-		if (!dc->ver)
-			panic("cache support enabled but non-existent cache\n");
-
-		if (dc->line_len != L1_CACHE_BYTES)
-			panic("DCache line [%d] != kernel Config [%d]",
-			      dc->line_len, L1_CACHE_BYTES);
-
-		/* check for D-Cache aliasing */
-		handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-
-		if (dc->alias && !handled)
-			panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-		else if (!dc->alias && handled)
-			panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-	}
-}
-
-#define OP_INV		0x1
-#define OP_FLUSH	0x2
-#define OP_FLUSH_N_INV	0x3
-#define OP_INV_IC	0x4
-
-/*
- * Common Helper for Line Operations on {I,D}-Cache
- */
-static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
-				     unsigned long sz, const int cacheop)
-{
-	unsigned int aux_cmd, aux_tag;
-	int num_lines;
-	const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
-
-	if (cacheop == OP_INV_IC) {
-		aux_cmd = ARC_REG_IC_IVIL;
-#if (CONFIG_ARC_MMU_VER > 2)
-		aux_tag = ARC_REG_IC_PTAG;
-#endif
-	}
-	else {
-		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
-		aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
-#if (CONFIG_ARC_MMU_VER > 2)
-		aux_tag = ARC_REG_DC_PTAG;
-#endif
-	}
-
-	/* Ensure we properly floor/ceil the non-line aligned/sized requests
-	 * and have @paddr - aligned to cache line and integral @num_lines.
-	 * This however can be avoided for page sized since:
-	 *  -@paddr will be cache-line aligned already (being page aligned)
-	 *  -@sz will be integral multiple of line size (being page sized).
-	 */
-	if (!full_page_op) {
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
-		vaddr &= CACHE_LINE_MASK;
-	}
-
-	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
-
-#if (CONFIG_ARC_MMU_VER <= 2)
-	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
-	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-#else
-	/* if V-P const for loop, PTAG can be written once outside loop */
-	if (full_page_op)
-		write_aux_reg(aux_tag, paddr);
-#endif
-
-	while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
-		/* MMUv3, cache ops require paddr seperately */
-		if (!full_page_op) {
-			write_aux_reg(aux_tag, paddr);
-			paddr += L1_CACHE_BYTES;
-		}
-
-		write_aux_reg(aux_cmd, vaddr);
-		vaddr += L1_CACHE_BYTES;
-#else
-		write_aux_reg(aux_cmd, paddr);
-		paddr += L1_CACHE_BYTES;
-#endif
-	}
-}
-
-#ifdef CONFIG_ARC_HAS_DCACHE
-
-/***************************************************************
- * Machine specific helpers for Entire D-Cache or Per Line ops
- */
-
-static inline unsigned int __before_dc_op(const int op)
-{
-	unsigned int reg = reg;
-
-	if (op == OP_FLUSH_N_INV) {
-		/* Dcache provides 2 cmd: FLUSH or INV
-		 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
-		 * flush-n-inv is achieved by INV cmd but with IM=1
-		 * So toggle INV sub-mode depending on op request and default
-		 */
-		reg = read_aux_reg(ARC_REG_DC_CTRL);
-		write_aux_reg(ARC_REG_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH)
-			;
-	}
-
-	return reg;
-}
-
-static inline void __after_dc_op(const int op, unsigned int reg)
-{
-	if (op & OP_FLUSH)	/* flush / flush-n-inv both wait */
-		while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
-
-	/* Switch back to default Invalidate mode */
-	if (op == OP_FLUSH_N_INV)
-		write_aux_reg(ARC_REG_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH);
-}
-
-/*
- * Operation on Entire D-Cache
- * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
- * Note that constant propagation ensures all the checks are gone
- * in generated code
- */
-static inline void __dc_entire_op(const int cacheop)
-{
-	unsigned int ctrl_reg;
-	int aux;
-
-	ctrl_reg = __before_dc_op(cacheop);
-
-	if (cacheop & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
-		aux = ARC_REG_DC_IVDC;
-	else
-		aux = ARC_REG_DC_FLSH;
-
-	write_aux_reg(aux, 0x1);
-
-	__after_dc_op(cacheop, ctrl_reg);
-}
-
-/* For kernel mappings cache operation: index is same as paddr */
-#define __dc_line_op_k(p, sz, op)	__dc_line_op(p, p, sz, op)
-
-/*
- * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
- */
-static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
-				unsigned long sz, const int cacheop)
-{
-	unsigned long flags;
-	unsigned int ctrl_reg;
-
-	local_irq_save(flags);
-
-	ctrl_reg = __before_dc_op(cacheop);
-
-	__cache_line_loop(paddr, vaddr, sz, cacheop);
-
-	__after_dc_op(cacheop, ctrl_reg);
-
-	local_irq_restore(flags);
-}
-
-#else
-
-#define __dc_entire_op(cacheop)
-#define __dc_line_op(paddr, vaddr, sz, cacheop)
-#define __dc_line_op_k(paddr, sz, cacheop)
-
-#endif /* CONFIG_ARC_HAS_DCACHE */
-
-
-#ifdef CONFIG_ARC_HAS_ICACHE
-
-/*
- *		I-Cache Aliasing in ARC700 VIPT caches
- *
- * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
- * The orig Cache Management Module "CDU" only required paddr to invalidate a
- * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
- * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
- * the exact same line.
- *
- * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
- * paddr alone could not be used to correctly index the cache.
- *
- * ------------------
- * MMU v1/v2 (Fixed Page Size 8k)
- * ------------------
- * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geometry, 13 comes from
- * standard page size of 8k.
- * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
- * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
- * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
- * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU programming
- * model.
- *
- * 17:13 represented the max num of bits passable, actual bits needed were
- * fewer, based on the num-of-aliases possible.
- * -for 2 alias possibility, only bit 13 needed (32K cache)
- * -for 4 alias possibility, bits 14:13 needed (64K cache)
- *
- * ------------------
- * MMU v3
- * ------------------
- * This ver of MMU supports variable page sizes (1k-16k): although Linux will
- * only support 8k (default), 16k and 4k.
- * However from hardware perspective, smaller page sizes aggrevate aliasing
- * meaning more vaddr bits needed to disambiguate the cache-line-op ;
- * the existing scheme of piggybacking won't work for certain configurations.
- * Two new registers IC_PTAG and DC_PTAG inttoduced.
- * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
- */
-
-/***********************************************************
- * Machine specific helper for per line I-Cache invalidate.
- */
-
-static inline void __ic_entire_inv(void)
-{
-	write_aux_reg(ARC_REG_IC_IVIC, 1);
-	read_aux_reg(ARC_REG_IC_CTRL);	/* blocks */
-}
-
-static inline void
-__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
-			  unsigned long sz)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__cache_line_loop(paddr, vaddr, sz, OP_INV_IC);
-	local_irq_restore(flags);
-}
-
-#ifndef CONFIG_SMP
-
-#define __ic_line_inv_vaddr(p, v, s)	__ic_line_inv_vaddr_local(p, v, s)
-
-#else
-
-struct ic_inv_args {
-	unsigned long paddr, vaddr;
-	int sz;
-};
-
-static void __ic_line_inv_vaddr_helper(void *info)
-{
-        struct ic_inv_args *ic_inv = info;
-
-        __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz);
-}
-
-static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
-				unsigned long sz)
-{
-	struct ic_inv_args ic_inv = {
-		.paddr = paddr,
-		.vaddr = vaddr,
-		.sz    = sz
-	};
-
-	on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1);
-}
-
-#endif	/* CONFIG_SMP */
-
-#else	/* !CONFIG_ARC_HAS_ICACHE */
-
-#define __ic_entire_inv()
-#define __ic_line_inv_vaddr(pstart, vstart, sz)
-
-#endif /* CONFIG_ARC_HAS_ICACHE */
-
-
-/***********************************************************
- * Exported APIs
- */
-
-/*
- * Handle cache congruency of kernel and userspace mappings of page when kernel
- * writes-to/reads-from
- *
- * The idea is to defer flushing of kernel mapping after a WRITE, possible if:
- *  -dcache is NOT aliasing, hence any U/K-mappings of page are congruent
- *  -U-mapping doesn't exist yet for page (finalised in update_mmu_cache)
- *  -In SMP, if hardware caches are coherent
- *
- * There's a corollary case, where kernel READs from a userspace mapped page.
- * If the U-mapping is not congruent to to K-mapping, former needs flushing.
- */
-void flush_dcache_page(struct page *page)
-{
-	struct address_space *mapping;
-
-	if (!cache_is_vipt_aliasing()) {
-		clear_bit(PG_dc_clean, &page->flags);
-		return;
-	}
-
-	/* don't handle anon pages here */
-	mapping = page_mapping(page);
-	if (!mapping)
-		return;
-
-	/*
-	 * pagecache page, file not yet mapped to userspace
-	 * Make a note that K-mapping is dirty
-	 */
-	if (!mapping_mapped(mapping)) {
-		clear_bit(PG_dc_clean, &page->flags);
-	} else if (page_mapped(page)) {
-
-		/* kernel reading from page with U-mapping */
-		unsigned long paddr = (unsigned long)page_address(page);
-		unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
-
-		if (addr_not_cache_congruent(paddr, vaddr))
-			__flush_dcache_page(paddr, vaddr);
-	}
-}
-EXPORT_SYMBOL(flush_dcache_page);
-
-
-void dma_cache_wback_inv(unsigned long start, unsigned long sz)
-{
-	__dc_line_op_k(start, sz, OP_FLUSH_N_INV);
-}
-EXPORT_SYMBOL(dma_cache_wback_inv);
-
-void dma_cache_inv(unsigned long start, unsigned long sz)
-{
-	__dc_line_op_k(start, sz, OP_INV);
-}
-EXPORT_SYMBOL(dma_cache_inv);
-
-void dma_cache_wback(unsigned long start, unsigned long sz)
-{
-	__dc_line_op_k(start, sz, OP_FLUSH);
-}
-EXPORT_SYMBOL(dma_cache_wback);
-
-/*
- * This is API for making I/D Caches consistent when modifying
- * kernel code (loadable modules, kprobes, kgdb...)
- * This is called on insmod, with kernel virtual address for CODE of
- * the module. ARC cache maintenance ops require PHY address thus we
- * need to convert vmalloc addr to PHY addr
- */
-void flush_icache_range(unsigned long kstart, unsigned long kend)
-{
-	unsigned int tot_sz;
-
-	WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__);
-
-	/* Shortcut for bigger flush ranges.
-	 * Here we don't care if this was kernel virtual or phy addr
-	 */
-	tot_sz = kend - kstart;
-	if (tot_sz > PAGE_SIZE) {
-		flush_cache_all();
-		return;
-	}
-
-	/* Case: Kernel Phy addr (0x8000_0000 onwards) */
-	if (likely(kstart > PAGE_OFFSET)) {
-		/*
-		 * The 2nd arg despite being paddr will be used to index icache
-		 * This is OK since no alternate virtual mappings will exist
-		 * given the callers for this case: kprobe/kgdb in built-in
-		 * kernel code only.
-		 */
-		__sync_icache_dcache(kstart, kstart, kend - kstart);
-		return;
-	}
-
-	/*
-	 * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
-	 * (1) ARC Cache Maintenance ops only take Phy addr, hence special
-	 *     handling of kernel vaddr.
-	 *
-	 * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
-	 *     it still needs to handle  a 2 page scenario, where the range
-	 *     straddles across 2 virtual pages and hence need for loop
-	 */
-	while (tot_sz > 0) {
-		unsigned int off, sz;
-		unsigned long phy, pfn;
-
-		off = kstart % PAGE_SIZE;
-		pfn = vmalloc_to_pfn((void *)kstart);
-		phy = (pfn << PAGE_SHIFT) + off;
-		sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
-		__sync_icache_dcache(phy, kstart, sz);
-		kstart += sz;
-		tot_sz -= sz;
-	}
-}
-EXPORT_SYMBOL(flush_icache_range);
-
-/*
- * General purpose helper to make I and D cache lines consistent.
- * @paddr is phy addr of region
- * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc)
- *    However in one instance, when called by kprobe (for a breakpt in
- *    builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
- *    use a paddr to index the cache (despite VIPT). This is fine since since a
- *    builtin kernel page will not have any virtual mappings.
- *    kprobe on loadable module will be kernel vaddr.
- */
-void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
-{
-	__dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV);
-	__ic_line_inv_vaddr(paddr, vaddr, len);
-}
-
-/* wrapper to compile time eliminate alignment checks in flush loop */
-void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
-{
-	__ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
-}
-
-/*
- * wrapper to clearout kernel or userspace mappings of a page
- * For kernel mappings @vaddr == @paddr
- */
-void __flush_dcache_page(unsigned long paddr, unsigned long vaddr)
-{
-	__dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
-}
-
-noinline void flush_cache_all(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	__ic_entire_inv();
-	__dc_entire_op(OP_FLUSH_N_INV);
-
-	local_irq_restore(flags);
-
-}
-
-#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
-
-void flush_cache_mm(struct mm_struct *mm)
-{
-	flush_cache_all();
-}
-
-void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
-		      unsigned long pfn)
-{
-	unsigned int paddr = pfn << PAGE_SHIFT;
-
-	u_vaddr &= PAGE_MASK;
-
-	__flush_dcache_page(paddr, u_vaddr);
-
-	if (vma->vm_flags & VM_EXEC)
-		__inv_icache_page(paddr, u_vaddr);
-}
-
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-		       unsigned long end)
-{
-	flush_cache_all();
-}
-
-void flush_anon_page(struct vm_area_struct *vma, struct page *page,
-		     unsigned long u_vaddr)
-{
-	/* TBD: do we really need to clear the kernel mapping */
-	__flush_dcache_page(page_address(page), u_vaddr);
-	__flush_dcache_page(page_address(page), page_address(page));
-
-}
-
-#endif
-
-void copy_user_highpage(struct page *to, struct page *from,
-	unsigned long u_vaddr, struct vm_area_struct *vma)
-{
-	unsigned long kfrom = (unsigned long)page_address(from);
-	unsigned long kto = (unsigned long)page_address(to);
-	int clean_src_k_mappings = 0;
-
-	/*
-	 * If SRC page was already mapped in userspace AND it's U-mapping is
-	 * not congruent with K-mapping, sync former to physical page so that
-	 * K-mapping in memcpy below, sees the right data
-	 *
-	 * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
-	 * equally valid for SRC page as well
-	 */
-	if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
-		__flush_dcache_page(kfrom, u_vaddr);
-		clean_src_k_mappings = 1;
-	}
-
-	copy_page((void *)kto, (void *)kfrom);
-
-	/*
-	 * Mark DST page K-mapping as dirty for a later finalization by
-	 * update_mmu_cache(). Although the finalization could have been done
-	 * here as well (given that both vaddr/paddr are available).
-	 * But update_mmu_cache() already has code to do that for other
-	 * non copied user pages (e.g. read faults which wire in pagecache page
-	 * directly).
-	 */
-	clear_bit(PG_dc_clean, &to->flags);
-
-	/*
-	 * if SRC was already usermapped and non-congruent to kernel mapping
-	 * sync the kernel mapping back to physical page
-	 */
-	if (clean_src_k_mappings) {
-		__flush_dcache_page(kfrom, kfrom);
-		set_bit(PG_dc_clean, &from->flags);
-	} else {
-		clear_bit(PG_dc_clean, &from->flags);
-	}
-}
-
-void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
-{
-	clear_page(to);
-	clear_bit(PG_dc_clean, &page->flags);
-}
-
-
-/**********************************************************************
- * Explicit Cache flush request from user space via syscall
- * Needed for JITs which generate code on the fly
- */
-SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
-{
-	/* TBD: optimize this */
-	flush_cache_all();
-	return 0;
-}
-- 
cgit v1.1


From 8ea2ddff41f795e3e9a0550612096b5115c593f1 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 4 Jun 2015 15:35:53 +0530
Subject: ARC: cacheflush: move some code around, delete old comments

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache.c | 267 ++++++++++++++++++++--------------------------------
 1 file changed, 102 insertions(+), 165 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 322e11b..1ed3430 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -1,64 +1,12 @@
 /*
- * ARC700 VIPT Cache Management
+ * ARC Cache Management
  *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- *  vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
- *   -flush_cache_dup_mm (fork)
- *   -likewise for flush_cache_mm (exit/execve)
- *   -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break)
- *
- * vineetg: Apr 2011
- *  -Now that MMU can support larger pg sz (16K), the determiniation of
- *   aliasing shd not be based on assumption of 8k pg
- *
- * vineetg: Mar 2011
- *  -optimised version of flush_icache_range( ) for making I/D coherent
- *   when vaddr is available (agnostic of num of aliases)
- *
- * vineetg: Mar 2011
- *  -Added documentation about I-cache aliasing on ARC700 and the way it
- *   was handled up until MMU V2.
- *  -Spotted a three year old bug when killing the 4 aliases, which needs
- *   bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03}
- *                        instead of paddr | {0x00, 0x01, 0x10, 0x11}
- *   (Rajesh you owe me one now)
- *
- * vineetg: Dec 2010
- *  -Off-by-one error when computing num_of_lines to flush
- *   This broke signal handling with bionic which uses synthetic sigret stub
- *
- * vineetg: Mar 2010
- *  -GCC can't generate ZOL for core cache flush loops.
- *   Conv them into iterations based as opposed to while (start < end) types
- *
- * Vineetg: July 2009
- *  -In I-cache flush routine we used to chk for aliasing for every line INV.
- *   Instead now we setup routines per cache geometry and invoke them
- *   via function pointers.
- *
- * Vineetg: Jan 2009
- *  -Cache Line flush routines used to flush an extra line beyond end addr
- *   because check was while (end >= start) instead of (end > start)
- *     =Some call sites had to work around by doing -1, -4 etc to end param
- *     =Some callers didnt care. This was spec bad in case of INV routines
- *      which would discard valid data (cause of the horrible ext2 bug
- *      in ARC IDE driver)
- *
- * vineetg: June 11th 2008: Fixed flush_icache_range( )
- *  -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need
- *   to be flushed, which it was not doing.
- *  -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API,
- *   however ARC cache maintenance OPs require PHY addr. Thus need to do
- *   vmalloc_to_phy.
- *  -Also added optimisation there, that for range > PAGE SIZE we flush the
- *   entire cache in one shot rather than line by line. For e.g. a module
- *   with Code sz 600k, old code flushed 600k worth of cache (line-by-line),
- *   while cache is only 16 or 32k.
  */
 
 #include <linux/module.h>
@@ -142,54 +90,8 @@ dc_chk:
 }
 
 /*
- * 1. Validate the Cache Geomtery (compile time config matches hardware)
- * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn)
- *    (aliasing D-cache configurations are not supported YET)
- * 3. Enable the Caches, setup default flush mode for D-Cache
- * 3. Calculate the SHMLBA used by user space
+ * Line Operation on {I,D}-Cache
  */
-void arc_cache_init(void)
-{
-	unsigned int __maybe_unused cpu = smp_processor_id();
-	char str[256];
-
-	printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
-
-	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
-		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
-
-		if (!ic->ver)
-			panic("cache support enabled but non-existent cache\n");
-
-		if (ic->line_len != L1_CACHE_BYTES)
-			panic("ICache line [%d] != kernel Config [%d]",
-			      ic->line_len, L1_CACHE_BYTES);
-
-		if (ic->ver != CONFIG_ARC_MMU_VER)
-			panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
-			      ic->ver, CONFIG_ARC_MMU_VER);
-	}
-
-	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
-		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
-		int handled;
-
-		if (!dc->ver)
-			panic("cache support enabled but non-existent cache\n");
-
-		if (dc->line_len != L1_CACHE_BYTES)
-			panic("DCache line [%d] != kernel Config [%d]",
-			      dc->line_len, L1_CACHE_BYTES);
-
-		/* check for D-Cache aliasing */
-		handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-
-		if (dc->alias && !handled)
-			panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-		else if (!dc->alias && handled)
-			panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-	}
-}
 
 #define OP_INV		0x1
 #define OP_FLUSH	0x2
@@ -197,16 +99,55 @@ void arc_cache_init(void)
 #define OP_INV_IC	0x4
 
 /*
- * Common Helper for Line Operations on {I,D}-Cache
+ *		I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
+ *
+ * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
+ * The orig Cache Management Module "CDU" only required paddr to invalidate a
+ * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
+ * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
+ * the exact same line.
+ *
+ * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
+ * paddr alone could not be used to correctly index the cache.
+ *
+ * ------------------
+ * MMU v1/v2 (Fixed Page Size 8k)
+ * ------------------
+ * The solution was to provide CDU with these additonal vaddr bits. These
+ * would be bits [x:13], x would depend on cache-geometry, 13 comes from
+ * standard page size of 8k.
+ * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
+ * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
+ * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
+ * represent the offset within cache-line. The adv of using this "clumsy"
+ * interface for additional info was no new reg was needed in CDU programming
+ * model.
+ *
+ * 17:13 represented the max num of bits passable, actual bits needed were
+ * fewer, based on the num-of-aliases possible.
+ * -for 2 alias possibility, only bit 13 needed (32K cache)
+ * -for 4 alias possibility, bits 14:13 needed (64K cache)
+ *
+ * ------------------
+ * MMU v3
+ * ------------------
+ * This ver of MMU supports variable page sizes (1k-16k): although Linux will
+ * only support 8k (default), 16k and 4k.
+ * However from hardware perspective, smaller page sizes aggrevate aliasing
+ * meaning more vaddr bits needed to disambiguate the cache-line-op ;
+ * the existing scheme of piggybacking won't work for certain configurations.
+ * Two new registers IC_PTAG and DC_PTAG inttoduced.
+ * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
  */
+
 static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
-				     unsigned long sz, const int cacheop)
+				     unsigned long sz, const int op)
 {
 	unsigned int aux_cmd, aux_tag;
 	int num_lines;
 	const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
 
-	if (cacheop == OP_INV_IC) {
+	if (op == OP_INV_IC) {
 		aux_cmd = ARC_REG_IC_IVIL;
 #if (CONFIG_ARC_MMU_VER > 2)
 		aux_tag = ARC_REG_IC_PTAG;
@@ -214,7 +155,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
 	}
 	else {
 		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
-		aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+		aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
 #if (CONFIG_ARC_MMU_VER > 2)
 		aux_tag = ARC_REG_DC_PTAG;
 #endif
@@ -296,107 +237,60 @@ static inline void __after_dc_op(const int op, unsigned int reg)
 
 /*
  * Operation on Entire D-Cache
- * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
+ * @op = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
  * Note that constant propagation ensures all the checks are gone
  * in generated code
  */
-static inline void __dc_entire_op(const int cacheop)
+static inline void __dc_entire_op(const int op)
 {
 	unsigned int ctrl_reg;
 	int aux;
 
-	ctrl_reg = __before_dc_op(cacheop);
+	ctrl_reg = __before_dc_op(op);
 
-	if (cacheop & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
+	if (op & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
 		aux = ARC_REG_DC_IVDC;
 	else
 		aux = ARC_REG_DC_FLSH;
 
 	write_aux_reg(aux, 0x1);
 
-	__after_dc_op(cacheop, ctrl_reg);
+	__after_dc_op(op, ctrl_reg);
 }
 
 /* For kernel mappings cache operation: index is same as paddr */
 #define __dc_line_op_k(p, sz, op)	__dc_line_op(p, p, sz, op)
 
 /*
- * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
+ * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback)
  */
 static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
-				unsigned long sz, const int cacheop)
+				unsigned long sz, const int op)
 {
 	unsigned long flags;
 	unsigned int ctrl_reg;
 
 	local_irq_save(flags);
 
-	ctrl_reg = __before_dc_op(cacheop);
+	ctrl_reg = __before_dc_op(op);
 
-	__cache_line_loop(paddr, vaddr, sz, cacheop);
+	__cache_line_loop(paddr, vaddr, sz, op);
 
-	__after_dc_op(cacheop, ctrl_reg);
+	__after_dc_op(op, ctrl_reg);
 
 	local_irq_restore(flags);
 }
 
 #else
 
-#define __dc_entire_op(cacheop)
-#define __dc_line_op(paddr, vaddr, sz, cacheop)
-#define __dc_line_op_k(paddr, sz, cacheop)
+#define __dc_entire_op(op)
+#define __dc_line_op(paddr, vaddr, sz, op)
+#define __dc_line_op_k(paddr, sz, op)
 
 #endif /* CONFIG_ARC_HAS_DCACHE */
 
-
 #ifdef CONFIG_ARC_HAS_ICACHE
 
-/*
- *		I-Cache Aliasing in ARC700 VIPT caches
- *
- * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
- * The orig Cache Management Module "CDU" only required paddr to invalidate a
- * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
- * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
- * the exact same line.
- *
- * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
- * paddr alone could not be used to correctly index the cache.
- *
- * ------------------
- * MMU v1/v2 (Fixed Page Size 8k)
- * ------------------
- * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geometry, 13 comes from
- * standard page size of 8k.
- * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
- * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
- * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
- * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU programming
- * model.
- *
- * 17:13 represented the max num of bits passable, actual bits needed were
- * fewer, based on the num-of-aliases possible.
- * -for 2 alias possibility, only bit 13 needed (32K cache)
- * -for 4 alias possibility, bits 14:13 needed (64K cache)
- *
- * ------------------
- * MMU v3
- * ------------------
- * This ver of MMU supports variable page sizes (1k-16k): although Linux will
- * only support 8k (default), 16k and 4k.
- * However from hardware perspective, smaller page sizes aggrevate aliasing
- * meaning more vaddr bits needed to disambiguate the cache-line-op ;
- * the existing scheme of piggybacking won't work for certain configurations.
- * Two new registers IC_PTAG and DC_PTAG inttoduced.
- * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
- */
-
-/***********************************************************
- * Machine specific helper for per line I-Cache invalidate.
- */
-
 static inline void __ic_entire_inv(void)
 {
 	write_aux_reg(ARC_REG_IC_IVIC, 1);
@@ -721,3 +615,46 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
 	flush_cache_all();
 	return 0;
 }
+
+void arc_cache_init(void)
+{
+	unsigned int __maybe_unused cpu = smp_processor_id();
+	char str[256];
+
+	printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
+		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+
+		if (!ic->ver)
+			panic("cache support enabled but non-existent cache\n");
+
+		if (ic->line_len != L1_CACHE_BYTES)
+			panic("ICache line [%d] != kernel Config [%d]",
+			      ic->line_len, L1_CACHE_BYTES);
+
+		if (ic->ver != CONFIG_ARC_MMU_VER)
+			panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
+			      ic->ver, CONFIG_ARC_MMU_VER);
+	}
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
+		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+		int handled;
+
+		if (!dc->ver)
+			panic("cache support enabled but non-existent cache\n");
+
+		if (dc->line_len != L1_CACHE_BYTES)
+			panic("DCache line [%d] != kernel Config [%d]",
+			      dc->line_len, L1_CACHE_BYTES);
+
+		/* check for D-Cache aliasing */
+		handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
+
+		if (dc->alias && !handled)
+			panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+		else if (!dc->alias && handled)
+			panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+	}
+}
-- 
cgit v1.1


From 6c310681379548ab1aa748e20da16a943fb0f234 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 4 Jun 2015 08:53:47 +0530
Subject: ARC: cacheflush: No need to retain DC_CTRL from __before_dc_op()

That is because __after_dc_op() already reads it for status check, so it
is better anyways to use that "newer" value.

Also reduces the clutter in callers for passing from/to these routines.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 1ed3430..82d24c3 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -207,32 +207,33 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
  * Machine specific helpers for Entire D-Cache or Per Line ops
  */
 
-static inline unsigned int __before_dc_op(const int op)
+static inline void __before_dc_op(const int op)
 {
-	unsigned int reg = reg;
-
 	if (op == OP_FLUSH_N_INV) {
 		/* Dcache provides 2 cmd: FLUSH or INV
 		 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
 		 * flush-n-inv is achieved by INV cmd but with IM=1
 		 * So toggle INV sub-mode depending on op request and default
 		 */
-		reg = read_aux_reg(ARC_REG_DC_CTRL);
-		write_aux_reg(ARC_REG_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH)
-			;
+		const unsigned int ctl = ARC_REG_DC_CTRL;
+		write_aux_reg(ctl, read_aux_reg(ctl) | DC_CTRL_INV_MODE_FLUSH);
 	}
-
-	return reg;
 }
 
-static inline void __after_dc_op(const int op, unsigned int reg)
+static inline void __after_dc_op(const int op)
 {
-	if (op & OP_FLUSH)	/* flush / flush-n-inv both wait */
-		while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
+	if (op & OP_FLUSH) {
+		const unsigned int ctl = ARC_REG_DC_CTRL;
+		unsigned int reg;
 
-	/* Switch back to default Invalidate mode */
-	if (op == OP_FLUSH_N_INV)
-		write_aux_reg(ARC_REG_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH);
+		/* flush / flush-n-inv both wait */
+		while ((reg = read_aux_reg(ctl)) & DC_CTRL_FLUSH_STATUS)
+			;
+
+		/* Switch back to default Invalidate mode */
+		if (op == OP_FLUSH_N_INV)
+			write_aux_reg(ctl, reg & ~DC_CTRL_INV_MODE_FLUSH);
+	}
 }
 
 /*
@@ -243,10 +244,9 @@ static inline void __after_dc_op(const int op, unsigned int reg)
  */
 static inline void __dc_entire_op(const int op)
 {
-	unsigned int ctrl_reg;
 	int aux;
 
-	ctrl_reg = __before_dc_op(op);
+	__before_dc_op(op);
 
 	if (op & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
 		aux = ARC_REG_DC_IVDC;
@@ -255,7 +255,7 @@ static inline void __dc_entire_op(const int op)
 
 	write_aux_reg(aux, 0x1);
 
-	__after_dc_op(op, ctrl_reg);
+	__after_dc_op(op);
 }
 
 /* For kernel mappings cache operation: index is same as paddr */
@@ -268,15 +268,14 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
 				unsigned long sz, const int op)
 {
 	unsigned long flags;
-	unsigned int ctrl_reg;
 
 	local_irq_save(flags);
 
-	ctrl_reg = __before_dc_op(op);
+	__before_dc_op(op);
 
 	__cache_line_loop(paddr, vaddr, sz, op);
 
-	__after_dc_op(op, ctrl_reg);
+	__after_dc_op(op);
 
 	local_irq_restore(flags);
 }
-- 
cgit v1.1


From 11e14896ea3b0acbdcadb171bc40fc8fef37370e Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 4 Aug 2014 08:32:31 -0700
Subject: ARC: untangle cache flush loop

- Remove the ifdef'ery and write distinct versions for each mmu ver even
  if there is some code duplication

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache.c | 80 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 55 insertions(+), 25 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 82d24c3..6fa5f0f 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -140,25 +140,19 @@ dc_chk:
  * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
  */
 
-static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
-				     unsigned long sz, const int op)
+static inline
+void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr,
+			  unsigned long sz, const int op)
 {
-	unsigned int aux_cmd, aux_tag;
+	unsigned int aux_cmd;
 	int num_lines;
-	const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+	const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
 
 	if (op == OP_INV_IC) {
 		aux_cmd = ARC_REG_IC_IVIL;
-#if (CONFIG_ARC_MMU_VER > 2)
-		aux_tag = ARC_REG_IC_PTAG;
-#endif
-	}
-	else {
+	} else {
 		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
 		aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
-#if (CONFIG_ARC_MMU_VER > 2)
-		aux_tag = ARC_REG_DC_PTAG;
-#endif
 	}
 
 	/* Ensure we properly floor/ceil the non-line aligned/sized requests
@@ -167,7 +161,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
 	 *  -@paddr will be cache-line aligned already (being page aligned)
 	 *  -@sz will be integral multiple of line size (being page sized).
 	 */
-	if (!full_page_op) {
+	if (!full_page) {
 		sz += paddr & ~CACHE_LINE_MASK;
 		paddr &= CACHE_LINE_MASK;
 		vaddr &= CACHE_LINE_MASK;
@@ -175,32 +169,68 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
 
 	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
 
-#if (CONFIG_ARC_MMU_VER <= 2)
 	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
 	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-#else
-	/* if V-P const for loop, PTAG can be written once outside loop */
-	if (full_page_op)
+
+	while (num_lines-- > 0) {
+		write_aux_reg(aux_cmd, paddr);
+		paddr += L1_CACHE_BYTES;
+	}
+}
+
+static inline
+void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr,
+			  unsigned long sz, const int op)
+{
+	unsigned int aux_cmd, aux_tag;
+	int num_lines;
+	const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+
+	if (op == OP_INV_IC) {
+		aux_cmd = ARC_REG_IC_IVIL;
+		aux_tag = ARC_REG_IC_PTAG;
+	} else {
+		aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+		aux_tag = ARC_REG_DC_PTAG;
+	}
+
+	/* Ensure we properly floor/ceil the non-line aligned/sized requests
+	 * and have @paddr - aligned to cache line and integral @num_lines.
+	 * This however can be avoided for page sized since:
+	 *  -@paddr will be cache-line aligned already (being page aligned)
+	 *  -@sz will be integral multiple of line size (being page sized).
+	 */
+	if (!full_page) {
+		sz += paddr & ~CACHE_LINE_MASK;
+		paddr &= CACHE_LINE_MASK;
+		vaddr &= CACHE_LINE_MASK;
+	}
+	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+	/*
+	 * MMUv3, cache ops require paddr in PTAG reg
+	 * if V-P const for loop, PTAG can be written once outside loop
+	 */
+	if (full_page)
 		write_aux_reg(aux_tag, paddr);
-#endif
 
 	while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
-		/* MMUv3, cache ops require paddr seperately */
-		if (!full_page_op) {
+		if (!full_page) {
 			write_aux_reg(aux_tag, paddr);
 			paddr += L1_CACHE_BYTES;
 		}
 
 		write_aux_reg(aux_cmd, vaddr);
 		vaddr += L1_CACHE_BYTES;
-#else
-		write_aux_reg(aux_cmd, paddr);
-		paddr += L1_CACHE_BYTES;
-#endif
 	}
 }
 
+#if (CONFIG_ARC_MMU_VER < 3)
+#define __cache_line_loop	__cache_line_loop_v2
+#elif (CONFIG_ARC_MMU_VER == 3)
+#define __cache_line_loop	__cache_line_loop_v3
+#endif
+
 #ifdef CONFIG_ARC_HAS_DCACHE
 
 /***************************************************************
-- 
cgit v1.1


From a615b47dbf0d2cd8ba9ff922addef4e189c627bb Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 13 Oct 2014 14:20:39 +0530
Subject: ARC: entry.S: confine EXCEPTION_* macros to one file

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/tlbex.S | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index d572f1c..d224bf0 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -366,19 +366,5 @@ do_slow_path_pf:
 
 	; Slow path TLB Miss handled as a regular ARC Exception
 	; (stack switching / save the complete reg-file).
-	EXCEPTION_PROLOGUE
-
-	; ------- setup args for Linux Page fault Hanlder ---------
-	mov_s r1, sp
-	lr    r0, [efa]
-
-	; We don't want exceptions to be disabled while the fault is handled.
-	; Now that we have saved the context we return from exception hence
-	; exceptions get re-enable
-
-	FAKE_RET_FROM_EXCPN  r9
-
-	bl  do_page_fault
-	b   ret_from_exception
-
+	b  call_do_page_fault
 END(EV_TLBMissD)
-- 
cgit v1.1


From 1f6ccfff6314672743ad7252160654709e997a2a Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 13 May 2013 18:30:41 +0530
Subject: ARCv2: Support for ARCv2 ISA and HS38x cores

The notable features are:
    - SMP configurations of upto 4 cores with coherency
    - Optional L2 Cache and IO-Coherency
    - Revised Interrupt Architecture (multiple priorites, reg banks,
        auto stack switch, auto regfile save/restore)
    - MMUv4 (PIPT dcache, Huge Pages)
    - Instructions for
	* 64bit load/store: LDD, STD
	* Hardware assisted divide/remainder: DIV, REM
	* Function prologue/epilogue: ENTER_S, LEAVE_S
	* IRQ enable/disable: CLRI, SETI
	* pop count: FFS, FLS
	* SETcc, BMSKN, XBFU...

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/tlbex.S | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index d224bf0..00c8d7f 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -35,8 +35,6 @@
  * Rahul Trivedi, Amit Bhor: Codito Technologies 2004
  */
 
-	.cpu A7
-
 #include <linux/linkage.h>
 #include <asm/entry.h>
 #include <asm/mmu.h>
-- 
cgit v1.1


From d7a512bfe0be3790bae8465b4cb6c1bbca03c616 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 6 Apr 2015 17:22:39 +0530
Subject: ARCv2: MMUv4: TLB programming Model changes

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/tlb.c   | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 arch/arc/mm/tlbex.S | 24 ++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 3 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 914d8e0..2c7ce8b 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -113,6 +113,8 @@ static inline void __tlb_entry_erase(void)
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 
+#if (CONFIG_ARC_MMU_VER < 4)
+
 static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
 {
 	unsigned int idx;
@@ -210,6 +212,28 @@ static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 
+#else	/* CONFIG_ARC_MMU_VER >= 4) */
+
+static void utlb_invalidate(void)
+{
+	/* No need since uTLB is always in sync with JTLB */
+}
+
+static void tlb_entry_erase(unsigned int vaddr_n_asid)
+{
+	write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT);
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
+}
+
+static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+{
+	write_aux_reg(ARC_REG_TLBPD0, pd0);
+	write_aux_reg(ARC_REG_TLBPD1, pd1);
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
+}
+
+#endif
+
 /*
  * Un-conditionally (without lookup) erase the entire MMU contents
  */
@@ -582,6 +606,17 @@ void read_decode_mmu_bcr(void)
 #endif
 	} *mmu3;
 
+	struct bcr_mmu_4 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
+		     n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
+#else
+	/*           DTLB      ITLB      JES        JE         JA      */
+	unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
+		     pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
+#endif
+	} *mmu4;
+
 	tmp = read_aux_reg(ARC_REG_MMU_BCR);
 	mmu->ver = (tmp >> 24);
 
@@ -592,13 +627,21 @@ void read_decode_mmu_bcr(void)
 		mmu->ways = 1 << mmu2->ways;
 		mmu->u_dtlb = mmu2->u_dtlb;
 		mmu->u_itlb = mmu2->u_itlb;
-	} else {
+	} else if (mmu->ver == 3) {
 		mmu3 = (struct bcr_mmu_3 *)&tmp;
 		mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
 		mmu->sets = 1 << mmu3->sets;
 		mmu->ways = 1 << mmu3->ways;
 		mmu->u_dtlb = mmu3->u_dtlb;
 		mmu->u_itlb = mmu3->u_itlb;
+	} else {
+		mmu4 = (struct bcr_mmu_4 *)&tmp;
+		mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
+		mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11);
+		mmu->sets = 64 << mmu4->n_entry;
+		mmu->ways = mmu4->n_ways * 2;
+		mmu->u_dtlb = mmu4->u_dtlb * 4;
+		mmu->u_itlb = mmu4->u_itlb * 4;
 	}
 
 	mmu->num_tlb = mmu->sets * mmu->ways;
@@ -608,10 +651,15 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 {
 	int n = 0;
 	struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
+	char super_pg[64] = "";
+
+	if (p_mmu->s_pg_sz_m)
+		scnprintf(super_pg, 64, "%dM Super Page%s, ",
+			  p_mmu->s_pg_sz_m, " (not used)");
 
 	n += scnprintf(buf + n, len - n,
-		      "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
-		       p_mmu->ver, p_mmu->pg_sz_k,
+		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
+		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
 		       p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
 		       p_mmu->u_dtlb, p_mmu->u_itlb,
 		       IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : "");
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 00c8d7f..8624ebd 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -44,6 +44,7 @@
 #include <asm/processor.h>
 #include <asm/tlb-mmu1.h>
 
+#ifdef CONFIG_ISA_ARCOMPACT
 ;-----------------------------------------------------------------
 ; ARC700 Exception Handling doesn't auto-switch stack and it only provides
 ; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
@@ -121,6 +122,24 @@ ex_saved_reg1:
 #endif
 .endm
 
+#else	/* ARCv2 */
+
+.macro TLBMISS_FREEUP_REGS
+	PUSH  r0
+	PUSH  r1
+	PUSH  r2
+	PUSH  r3
+.endm
+
+.macro TLBMISS_RESTORE_REGS
+	POP   r3
+	POP   r2
+	POP   r1
+	POP   r0
+.endm
+
+#endif
+
 ;============================================================================
 ;  Troubleshooting Stuff
 ;============================================================================
@@ -239,6 +258,7 @@ ex_saved_reg1:
 ; Commit the TLB entry into MMU
 
 .macro COMMIT_ENTRY_TO_MMU
+#if (CONFIG_ARC_MMU_VER < 4)
 
 	/* Get free TLB slot: Set = computed from vaddr, way = random */
 	sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
@@ -249,6 +269,10 @@ ex_saved_reg1:
 #else
 	sr TLBWrite, [ARC_REG_TLBCOMMAND]
 #endif
+
+#else
+	sr TLBInsertEntry, [ARC_REG_TLBCOMMAND]
+#endif
 .endm
 
 
-- 
cgit v1.1


From d1f317d8254413447bcd6b6adbde24a985d256c2 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 6 Apr 2015 17:23:57 +0530
Subject: ARCv2: MMUv4: cache programming model changes

Caveats about cache flush on ARCv2 based cores

- dcache is PIPT so paddr is sufficient for cache maintenance ops (no
  need to setup PTAG reg

- icache is still VIPT but only aliasing configs need PTAG setup

So basically this is departure from MMU-v3 which always need vaddr in
line ops registers (DC_IVDL, DC_FLDL, IC_IVIL) but paddr in DC_PTAG,
IC_PTAG respectively.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache.c | 112 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 97 insertions(+), 15 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 6fa5f0f..7a898f5 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -24,6 +24,7 @@
 char *arc_cache_mumbojumbo(int c, char *buf, int len)
 {
 	int n = 0;
+	struct cpuinfo_arc_cache *p;
 
 #define PR_CACHE(p, cfg, str)						\
 	if (!(p)->ver)							\
@@ -39,6 +40,11 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
 	PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
 	PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
 
+	p = &cpuinfo_arc700[c].slc;
+	if (p->ver)
+		n += scnprintf(buf + n, len - n,
+			"SLC\t\t: %uK, %uB Line\n", p->sz_k, p->line_len);
+
 	return buf;
 }
 
@@ -49,7 +55,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
  */
 void read_decode_cache_bcr(void)
 {
-	struct cpuinfo_arc_cache *p_ic, *p_dc;
+	struct cpuinfo_arc_cache *p_ic, *p_dc, *p_slc;
 	unsigned int cpu = smp_processor_id();
 	struct bcr_cache {
 #ifdef CONFIG_CPU_BIG_ENDIAN
@@ -59,14 +65,29 @@ void read_decode_cache_bcr(void)
 #endif
 	} ibcr, dbcr;
 
+	struct bcr_generic sbcr;
+
+	struct bcr_slc_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int pad:24, way:2, lsz:2, sz:4;
+#else
+		unsigned int sz:4, lsz:2, way:2, pad:24;
+#endif
+	} slc_cfg;
+
 	p_ic = &cpuinfo_arc700[cpu].icache;
 	READ_BCR(ARC_REG_IC_BCR, ibcr);
 
 	if (!ibcr.ver)
 		goto dc_chk;
 
-	BUG_ON(ibcr.config != 3);
-	p_ic->assoc = 2;		/* Fixed to 2w set assoc */
+	if (ibcr.ver <= 3) {
+		BUG_ON(ibcr.config != 3);
+		p_ic->assoc = 2;		/* Fixed to 2w set assoc */
+	} else if (ibcr.ver >= 4) {
+		p_ic->assoc = 1 << ibcr.config;	/* 1,2,4,8 */
+	}
+
 	p_ic->line_len = 8 << ibcr.line_len;
 	p_ic->sz_k = 1 << (ibcr.sz - 1);
 	p_ic->ver = ibcr.ver;
@@ -78,15 +99,32 @@ dc_chk:
 	READ_BCR(ARC_REG_DC_BCR, dbcr);
 
 	if (!dbcr.ver)
-		return;
+		goto slc_chk;
+
+	if (dbcr.ver <= 3) {
+		BUG_ON(dbcr.config != 2);
+		p_dc->assoc = 4;		/* Fixed to 4w set assoc */
+		p_dc->vipt = 1;
+		p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
+	} else if (dbcr.ver >= 4) {
+		p_dc->assoc = 1 << dbcr.config;	/* 1,2,4,8 */
+		p_dc->vipt = 0;
+		p_dc->alias = 0;		/* PIPT so can't VIPT alias */
+	}
 
-	BUG_ON(dbcr.config != 2);
-	p_dc->assoc = 4;		/* Fixed to 4w set assoc */
 	p_dc->line_len = 16 << dbcr.line_len;
 	p_dc->sz_k = 1 << (dbcr.sz - 1);
 	p_dc->ver = dbcr.ver;
-	p_dc->vipt = 1;
-	p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
+
+slc_chk:
+	p_slc = &cpuinfo_arc700[cpu].slc;
+	READ_BCR(ARC_REG_SLC_BCR, sbcr);
+	if (sbcr.ver) {
+		READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
+		p_slc->ver = sbcr.ver;
+		p_slc->sz_k = 128 << slc_cfg.sz;
+		p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+	}
 }
 
 /*
@@ -225,10 +263,53 @@ void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr,
 	}
 }
 
+/*
+ * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache
+ * maintenance ops (in IVIL reg), as long as icache doesn't alias.
+ *
+ * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is
+ * specified in PTAG (similar to MMU v3)
+ */
+static inline
+void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr,
+			  unsigned long sz, const int cacheop)
+{
+	unsigned int aux_cmd;
+	int num_lines;
+	const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+
+	if (cacheop == OP_INV_IC) {
+		aux_cmd = ARC_REG_IC_IVIL;
+	} else {
+		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+		aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+	}
+
+	/* Ensure we properly floor/ceil the non-line aligned/sized requests
+	 * and have @paddr - aligned to cache line and integral @num_lines.
+	 * This however can be avoided for page sized since:
+	 *  -@paddr will be cache-line aligned already (being page aligned)
+	 *  -@sz will be integral multiple of line size (being page sized).
+	 */
+	if (!full_page_op) {
+		sz += paddr & ~CACHE_LINE_MASK;
+		paddr &= CACHE_LINE_MASK;
+	}
+
+	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+	while (num_lines-- > 0) {
+		write_aux_reg(aux_cmd, paddr);
+		paddr += L1_CACHE_BYTES;
+	}
+}
+
 #if (CONFIG_ARC_MMU_VER < 3)
 #define __cache_line_loop	__cache_line_loop_v2
 #elif (CONFIG_ARC_MMU_VER == 3)
 #define __cache_line_loop	__cache_line_loop_v3
+#elif (CONFIG_ARC_MMU_VER > 3)
+#define __cache_line_loop	__cache_line_loop_v4
 #endif
 
 #ifdef CONFIG_ARC_HAS_DCACHE
@@ -669,7 +750,6 @@ void arc_cache_init(void)
 
 	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
 		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
-		int handled;
 
 		if (!dc->ver)
 			panic("cache support enabled but non-existent cache\n");
@@ -678,12 +758,14 @@ void arc_cache_init(void)
 			panic("DCache line [%d] != kernel Config [%d]",
 			      dc->line_len, L1_CACHE_BYTES);
 
-		/* check for D-Cache aliasing */
-		handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
+		/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
+		if (is_isa_arcompact()) {
+			int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
 
-		if (dc->alias && !handled)
-			panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-		else if (!dc->alias && handled)
-			panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+			if (dc->alias && !handled)
+				panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+			else if (!dc->alias && handled)
+				panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+		}
 	}
 }
-- 
cgit v1.1


From bcc4d65abec2adb74157b34519e80331eb4427eb Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 4 Jun 2015 14:39:15 +0530
Subject: ARCv2: MMUv4: support aliasing icache config

This is also default for AXS103 release

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 7a898f5..0eaaee6 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -21,6 +21,9 @@
 #include <asm/cachectl.h>
 #include <asm/setup.h>
 
+void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr,
+			       unsigned long sz, const int cacheop);
+
 char *arc_cache_mumbojumbo(int c, char *buf, int len)
 {
 	int n = 0;
@@ -414,7 +417,7 @@ __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
 	unsigned long flags;
 
 	local_irq_save(flags);
-	__cache_line_loop(paddr, vaddr, sz, OP_INV_IC);
+	(*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC);
 	local_irq_restore(flags);
 }
 
@@ -746,6 +749,15 @@ void arc_cache_init(void)
 		if (ic->ver != CONFIG_ARC_MMU_VER)
 			panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
 			      ic->ver, CONFIG_ARC_MMU_VER);
+
+		/*
+		 * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG
+		 * pair to provide vaddr/paddr respectively, just as in MMU v3
+		 */
+		if (is_isa_arcv2() && ic->alias)
+			_cache_line_loop_ic_fn = __cache_line_loop_v3;
+		else
+			_cache_line_loop_ic_fn = __cache_line_loop;
 	}
 
 	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
-- 
cgit v1.1


From 795f4558562fd5318260d5d8144a2f8612aeda7b Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 3 Apr 2015 12:37:07 +0300
Subject: ARCv2: SLC: Handle explcit flush for DMA ops (w/o IO-coherency)

L2 cache on ARCHS processors is called SLC (System Level Cache)
For working DMA (in absence of hardware assisted IO Coherency) we need
to manage SLC explicitly when buffers transition between cpu and
controllers.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 arch/arc/mm/dma.c   | 12 ++++++++++
 2 files changed, 74 insertions(+), 2 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 0eaaee6..b29d62e 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -21,6 +21,8 @@
 #include <asm/cachectl.h>
 #include <asm/setup.h>
 
+static int l2_line_sz;
+
 void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr,
 			       unsigned long sz, const int cacheop);
 
@@ -120,13 +122,16 @@ dc_chk:
 	p_dc->ver = dbcr.ver;
 
 slc_chk:
+	if (!is_isa_arcv2())
+		return;
+
 	p_slc = &cpuinfo_arc700[cpu].slc;
 	READ_BCR(ARC_REG_SLC_BCR, sbcr);
 	if (sbcr.ver) {
 		READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
 		p_slc->ver = sbcr.ver;
 		p_slc->sz_k = 128 << slc_cfg.sz;
-		p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+		l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
 	}
 }
 
@@ -460,6 +465,53 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
 
 #endif /* CONFIG_ARC_HAS_ICACHE */
 
+noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
+{
+#ifdef CONFIG_ISA_ARCV2
+	unsigned long flags;
+	unsigned int ctrl;
+
+	local_irq_save(flags);
+
+	/*
+	 * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
+	 *  - b'000 (default) is Flush,
+	 *  - b'001 is Invalidate if CTRL.IM == 0
+	 *  - b'001 is Flush-n-Invalidate if CTRL.IM == 1
+	 */
+	ctrl = read_aux_reg(ARC_REG_SLC_CTRL);
+
+	/* Don't rely on default value of IM bit */
+	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
+		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
+	else
+		ctrl |= SLC_CTRL_IM;
+
+	if (op & OP_INV)
+		ctrl |= SLC_CTRL_RGN_OP_INV;	/* Inv or flush-n-inv */
+	else
+		ctrl &= ~SLC_CTRL_RGN_OP_INV;
+
+	write_aux_reg(ARC_REG_SLC_CTRL, ctrl);
+
+	/*
+	 * Lower bits are ignored, no need to clip
+	 * END needs to be setup before START (latter triggers the operation)
+	 * END can't be same as START, so add (l2_line_sz - 1) to sz
+	 */
+	write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1));
+	write_aux_reg(ARC_REG_SLC_RGN_START, paddr);
+
+	while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
+
+	local_irq_restore(flags);
+#endif
+}
+
+static inline int need_slc_flush(void)
+{
+	return is_isa_arcv2() && l2_line_sz;
+}
 
 /***********************************************************
  * Exported APIs
@@ -509,22 +561,30 @@ void flush_dcache_page(struct page *page)
 }
 EXPORT_SYMBOL(flush_dcache_page);
 
-
 void dma_cache_wback_inv(unsigned long start, unsigned long sz)
 {
 	__dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+
+	if (need_slc_flush())
+		slc_op(start, sz, OP_FLUSH_N_INV);
 }
 EXPORT_SYMBOL(dma_cache_wback_inv);
 
 void dma_cache_inv(unsigned long start, unsigned long sz)
 {
 	__dc_line_op_k(start, sz, OP_INV);
+
+	if (need_slc_flush())
+		slc_op(start, sz, OP_INV);
 }
 EXPORT_SYMBOL(dma_cache_inv);
 
 void dma_cache_wback(unsigned long start, unsigned long sz)
 {
 	__dc_line_op_k(start, sz, OP_FLUSH);
+
+	if (need_slc_flush())
+		slc_op(start, sz, OP_FLUSH);
 }
 EXPORT_SYMBOL(dma_cache_wback);
 
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 2cfe81d..74a637a 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -66,6 +66,18 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 	/* This is bus address, platform dependent */
 	*dma_handle = (dma_addr_t)paddr;
 
+	/*
+	 * Evict any existing L1 and/or L2 lines for the backing page
+	 * in case it was used earlier as a normal "cached" page.
+	 * Yeah this bit us - STAR 9000898266
+	 *
+	 * Although core does call flush_cache_vmap(), it gets kvaddr hence
+	 * can't be used to efficiently flush L1 and/or L2 which need paddr
+	 * Currently flush_cache_vmap nukes the L1 cache completely which
+	 * will be optimized as a separate commit
+	 */
+	dma_cache_wback_inv((unsigned long)paddr, size);
+
 	return kvaddr;
 }
 EXPORT_SYMBOL(dma_alloc_coherent);
-- 
cgit v1.1


From 2924cd18c434c79ee777dc63616a8505045d7509 Mon Sep 17 00:00:00 2001
From: Ruud Derwig <rderwig@synopsys.com>
Date: Wed, 3 Dec 2014 15:52:41 +0100
Subject: ARCv2: [vdk] dts files and defconfig for HS38 VDK

 - CONFIG_ARC_UBOOT_SUPPORT to handle arguments passed in r0, r1, r2
 - CONFIG_DEVTMPFS_MOUNT for mouting rootfs since it uses external cpio
   for rootfs

Cc: Grant Likely <grant.likely@linaro.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: devicetree@vger.kernel.org
Signed-off-by: Ruud Derwig <rderwig@synopsys.com>
[vgupta: folded the Main baord DT files for smp/up into one]
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/tlbex.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 8624ebd..f6f4c3c 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -313,6 +313,7 @@ ENTRY(EV_TLBMissI)
 	CONV_PTE_TO_TLB
 	COMMIT_ENTRY_TO_MMU
 	TLBMISS_RESTORE_REGS
+EV_TLBMissI_fast_ret:	; additional label for VDK OS-kit instrumentation
 	rtie
 
 END(EV_TLBMissI)
@@ -378,6 +379,7 @@ ENTRY(EV_TLBMissD)
 
 	COMMIT_ENTRY_TO_MMU
 	TLBMISS_RESTORE_REGS
+EV_TLBMissD_fast_ret:	; additional label for VDK OS-kit instrumentation
 	rtie
 
 ;-------- Common routine to call Linux Page Fault Handler -----------
-- 
cgit v1.1