From 905cdf9dda5d89d843667b2f11da2308d1fd1c34 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 17 Mar 2015 23:37:58 -0400
Subject: ARM: hisi/hip04: remove the MCPM overhead

This platform is currently relying on the MCPM infrastructure for no
apparent reason.  The MCPM concurrency handling brings no benefits here
as there is no asynchronous CPU wake-ups to be concerned about (this is
used for CPU hotplug and secondary boot only, not for CPU idle).

This platform is also different from the other MCPM users because a given
CPU can't shut itself down completely without the assistance of another
CPU. This is at odds with the on-going MCPM backend refactoring.

To simplify things, this is converted to hook directly into the
smp_operations callbacks, bypassing the MCPM infrastructure.

Tested-by: Wei Xu <xuwei5@hisilicon.com>
Cc: Haojian Zhuang <haojian.zhuang@linaro.org>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/mach-hisi/platmcpm.c | 127 ++++++++++++++----------------------------
 1 file changed, 42 insertions(+), 85 deletions(-)

diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c
index 280f3f1..880cbfa 100644
--- a/arch/arm/mach-hisi/platmcpm.c
+++ b/arch/arm/mach-hisi/platmcpm.c
@@ -6,6 +6,8 @@
  * under the terms and conditions of the GNU General Public License,
  * version 2, as published by the Free Software Foundation.
  */
+#include <linux/init.h>
+#include <linux/smp.h>
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/memblock.h>
@@ -13,7 +15,9 @@
 
 #include <asm/cputype.h>
 #include <asm/cp15.h>
-#include <asm/mcpm.h>
+#include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <asm/smp_plat.h>
 
 #include "core.h"
 
@@ -94,11 +98,16 @@ static void hip04_set_snoop_filter(unsigned int cluster, unsigned int on)
 	} while (data != readl_relaxed(fabric + FAB_SF_MODE));
 }
 
-static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
+static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle)
 {
+	unsigned int mpidr, cpu, cluster;
 	unsigned long data;
 	void __iomem *sys_dreq, *sys_status;
 
+	mpidr = cpu_logical_map(l_cpu);
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
 	if (!sysctrl)
 		return -ENODEV;
 	if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
@@ -118,6 +127,7 @@ static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
 			cpu_relax();
 			data = readl_relaxed(sys_status);
 		} while (data & CLUSTER_DEBUG_RESET_STATUS);
+		hip04_set_snoop_filter(cluster, 1);
 	}
 
 	data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
@@ -126,11 +136,15 @@ static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
 	do {
 		cpu_relax();
 	} while (data == readl_relaxed(sys_status));
+
 	/*
 	 * We may fail to power up core again without this delay.
 	 * It's not mentioned in document. It's found by test.
 	 */
 	udelay(20);
+
+	arch_send_wakeup_ipi_mask(cpumask_of(l_cpu));
+
 out:
 	hip04_cpu_table[cluster][cpu]++;
 	spin_unlock_irq(&boot_lock);
@@ -138,31 +152,29 @@ out:
 	return 0;
 }
 
-static void hip04_mcpm_power_down(void)
+static void hip04_cpu_die(unsigned int l_cpu)
 {
 	unsigned int mpidr, cpu, cluster;
-	bool skip_wfi = false, last_man = false;
+	bool last_man;
 
-	mpidr = read_cpuid_mpidr();
+	mpidr = cpu_logical_map(l_cpu);
 	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
 
-	__mcpm_cpu_going_down(cpu, cluster);
-
 	spin_lock(&boot_lock);
-	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
 	hip04_cpu_table[cluster][cpu]--;
 	if (hip04_cpu_table[cluster][cpu] == 1) {
 		/* A power_up request went ahead of us. */
-		skip_wfi = true;
+		spin_unlock(&boot_lock);
+		return;
 	} else if (hip04_cpu_table[cluster][cpu] > 1) {
 		pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
 		BUG();
 	}
 
 	last_man = hip04_cluster_is_down(cluster);
-	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
-		spin_unlock(&boot_lock);
+	spin_unlock(&boot_lock);
+	if (last_man) {
 		/* Since it's Cortex A15, disable L2 prefetching. */
 		asm volatile(
 		"mcr	p15, 1, %0, c15, c0, 3 \n\t"
@@ -170,34 +182,30 @@ static void hip04_mcpm_power_down(void)
 		"dsb	"
 		: : "r" (0x400) );
 		v7_exit_coherency_flush(all);
-		hip04_set_snoop_filter(cluster, 0);
-		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
 	} else {
-		spin_unlock(&boot_lock);
 		v7_exit_coherency_flush(louis);
 	}
 
-	__mcpm_cpu_down(cpu, cluster);
-
-	if (!skip_wfi)
+	for (;;)
 		wfi();
 }
 
-static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
+static int hip04_cpu_kill(unsigned int l_cpu)
 {
+	unsigned int mpidr, cpu, cluster;
 	unsigned int data, tries, count;
-	int ret = -ETIMEDOUT;
 
+	mpidr = cpu_logical_map(l_cpu);
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
 	BUG_ON(cluster >= HIP04_MAX_CLUSTERS ||
 	       cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
 
 	count = TIMEOUT_MSEC / POLL_MSEC;
 	spin_lock_irq(&boot_lock);
 	for (tries = 0; tries < count; tries++) {
-		if (hip04_cpu_table[cluster][cpu]) {
-			ret = -EBUSY;
+		if (hip04_cpu_table[cluster][cpu])
 			goto err;
-		}
 		cpu_relax();
 		data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
 		if (data & CORE_WFI_STATUS(cpu))
@@ -220,64 +228,19 @@ static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
 	}
 	if (tries >= count)
 		goto err;
+	if (hip04_cluster_is_down(cluster))
+		hip04_set_snoop_filter(cluster, 0);
 	spin_unlock_irq(&boot_lock);
-	return 0;
+	return 1;
 err:
 	spin_unlock_irq(&boot_lock);
-	return ret;
-}
-
-static void hip04_mcpm_powered_up(void)
-{
-	unsigned int mpidr, cpu, cluster;
-
-	mpidr = read_cpuid_mpidr();
-	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-
-	spin_lock(&boot_lock);
-	if (!hip04_cpu_table[cluster][cpu])
-		hip04_cpu_table[cluster][cpu] = 1;
-	spin_unlock(&boot_lock);
-}
-
-static void __naked hip04_mcpm_power_up_setup(unsigned int affinity_level)
-{
-	asm volatile ("			\n"
-"	cmp	r0, #0			\n"
-"	bxeq	lr			\n"
-	/* calculate fabric phys address */
-"	adr	r2, 2f			\n"
-"	ldmia	r2, {r1, r3}		\n"
-"	sub	r0, r2, r1		\n"
-"	ldr	r2, [r0, r3]		\n"
-	/* get cluster id from MPIDR */
-"	mrc	p15, 0, r0, c0, c0, 5	\n"
-"	ubfx	r1, r0, #8, #8		\n"
-	/* 1 << cluster id */
-"	mov	r0, #1			\n"
-"	mov	r3, r0, lsl r1		\n"
-"	ldr	r0, [r2, #"__stringify(FAB_SF_MODE)"]	\n"
-"	tst	r0, r3			\n"
-"	bxne	lr			\n"
-"	orr	r1, r0, r3		\n"
-"	str	r1, [r2, #"__stringify(FAB_SF_MODE)"]	\n"
-"1:	ldr	r0, [r2, #"__stringify(FAB_SF_MODE)"]	\n"
-"	tst	r0, r3			\n"
-"	beq	1b			\n"
-"	bx	lr			\n"
-
-"	.align	2			\n"
-"2:	.word	.			\n"
-"	.word	fabric_phys_addr	\n"
-	);
+	return 0;
 }
 
-static const struct mcpm_platform_ops hip04_mcpm_ops = {
-	.power_up		= hip04_mcpm_power_up,
-	.power_down		= hip04_mcpm_power_down,
-	.wait_for_powerdown	= hip04_mcpm_wait_for_powerdown,
-	.powered_up		= hip04_mcpm_powered_up,
+static struct smp_operations __initdata hip04_smp_ops = {
+	.smp_boot_secondary	= hip04_boot_secondary,
+	.cpu_die		= hip04_cpu_die,
+	.cpu_kill		= hip04_cpu_kill,
 };
 
 static bool __init hip04_cpu_table_init(void)
@@ -298,7 +261,7 @@ static bool __init hip04_cpu_table_init(void)
 	return true;
 }
 
-static int __init hip04_mcpm_init(void)
+static int __init hip04_smp_init(void)
 {
 	struct device_node *np, *np_sctl, *np_fab;
 	struct resource fab_res;
@@ -353,10 +316,6 @@ static int __init hip04_mcpm_init(void)
 		ret = -EINVAL;
 		goto err_table;
 	}
-	ret = mcpm_platform_register(&hip04_mcpm_ops);
-	if (ret) {
-		goto err_table;
-	}
 
 	/*
 	 * Fill the instruction address that is used after secondary core
@@ -364,13 +323,11 @@ static int __init hip04_mcpm_init(void)
 	 */
 	writel_relaxed(hip04_boot_method[0], relocation);
 	writel_relaxed(0xa5a5a5a5, relocation + 4);	/* magic number */
-	writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8);
+	writel_relaxed(virt_to_phys(secondary_startup), relocation + 8);
 	writel_relaxed(0, relocation + 12);
 	iounmap(relocation);
 
-	mcpm_sync_init(hip04_mcpm_power_up_setup);
-	mcpm_smp_set_ops();
-	pr_info("HiP04 MCPM initialized\n");
+	smp_set_ops(&hip04_smp_ops);
 	return ret;
 err_table:
 	iounmap(fabric);
@@ -383,4 +340,4 @@ err_reloc:
 err:
 	return ret;
 }
-early_initcall(hip04_mcpm_init);
+early_initcall(hip04_smp_init);
-- 
cgit v1.1


From 77404d81cadf192cc1261d6269f622a06b83cdd5 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 28 Apr 2015 13:44:00 -0400
Subject: ARM: MCPM: remove backward compatibility code

Now that no one uses the old callbacks anymore, let's remove them
and associated support code.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Dave Martin <Dave.Martin@arm.com>
---
 arch/arm/common/mcpm_entry.c | 45 +++++---------------------------------------
 arch/arm/include/asm/mcpm.h  |  6 ------
 2 files changed, 5 insertions(+), 46 deletions(-)

diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 5f8a52a..0908f96 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -78,16 +78,11 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster)
 	bool cpu_is_down, cluster_is_down;
 	int ret = 0;
 
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
 	if (!platform_ops)
 		return -EUNATCH; /* try not to shadow power_up errors */
 	might_sleep();
 
-	/* backward compatibility callback */
-	if (platform_ops->power_up)
-		return platform_ops->power_up(cpu, cluster);
-
-	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
-
 	/*
 	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
 	 * variant exists, we need to disable IRQs manually here.
@@ -128,29 +123,17 @@ void mcpm_cpu_power_down(void)
 	bool cpu_going_down, last_man;
 	phys_reset_t phys_reset;
 
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
 	if (WARN_ON_ONCE(!platform_ops))
 	       return;
 	BUG_ON(!irqs_disabled());
 
-	/*
-	 * Do this before calling into the power_down method,
-	 * as it might not always be safe to do afterwards.
-	 */
 	setup_mm_for_reboot();
 
-	/* backward compatibility callback */
-	if (platform_ops->power_down) {
-		platform_ops->power_down();
-		goto not_dead;
-	}
-
-	mpidr = read_cpuid_mpidr();
-	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
-
 	__mcpm_cpu_going_down(cpu, cluster);
-
 	arch_spin_lock(&mcpm_lock);
 	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
 
@@ -187,7 +170,6 @@ void mcpm_cpu_power_down(void)
 	if (cpu_going_down)
 		wfi();
 
-not_dead:
 	/*
 	 * It is possible for a power_up request to happen concurrently
 	 * with a power_down request for the same CPU. In this case the
@@ -224,17 +206,6 @@ void mcpm_cpu_suspend(u64 expected_residency)
 	if (WARN_ON_ONCE(!platform_ops))
 		return;
 
-	/* backward compatibility callback */
-	if (platform_ops->suspend) {
-		phys_reset_t phys_reset;
-		BUG_ON(!irqs_disabled());
-		setup_mm_for_reboot();
-		platform_ops->suspend(expected_residency);
-		phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
-		phys_reset(virt_to_phys(mcpm_entry_point));
-		BUG();
-	}
-
 	/* Some platforms might have to enable special resume modes, etc. */
 	if (platform_ops->cpu_suspend_prepare) {
 		unsigned int mpidr = read_cpuid_mpidr();
@@ -256,12 +227,6 @@ int mcpm_cpu_powered_up(void)
 	if (!platform_ops)
 		return -EUNATCH;
 
-	/* backward compatibility callback */
-	if (platform_ops->powered_up) {
-		platform_ops->powered_up();
-		return 0;
-	}
-
 	mpidr = read_cpuid_mpidr();
 	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index 50b378f..e2118c9 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -234,12 +234,6 @@ struct mcpm_platform_ops {
 	void (*cpu_is_up)(unsigned int cpu, unsigned int cluster);
 	void (*cluster_is_up)(unsigned int cluster);
 	int (*wait_for_powerdown)(unsigned int cpu, unsigned int cluster);
-
-	/* deprecated callbacks */
-	int (*power_up)(unsigned int cpu, unsigned int cluster);
-	void (*power_down)(void);
-	void (*suspend)(u64);
-	void (*powered_up)(void);
 };
 
 /**
-- 
cgit v1.1


From 7cc8b991cdc985aaa73bf9c429c810cd442fb74d Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 28 Apr 2015 14:11:07 -0400
Subject: ARM: MCPM: make internal helpers private to the core code

This concerns the following helpers:

	__mcpm_cpu_going_down()
	__mcpm_cpu_down()
	__mcpm_outbound_enter_critical()
	__mcpm_outbound_leave_critical()
	__mcpm_cluster_state()

They are and should only be used by the core code now.  Therefore their
declarations are removed from mcpm.h and their definitions are made
static, hence the need to move them before their users which accounts
for the bulk of this patch.

This left the mcpm_sync_struct definition at an odd location, therefore
it is moved as well with some comment clarifications.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Dave Martin <Dave.Martin@arm.com>
---
 arch/arm/common/mcpm_entry.c | 229 ++++++++++++++++++++++---------------------
 arch/arm/include/asm/mcpm.h  |  52 +++++-----
 2 files changed, 138 insertions(+), 143 deletions(-)

diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 0908f96..c5fe2e3 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -20,6 +20,121 @@
 #include <asm/cputype.h>
 #include <asm/suspend.h>
 
+
+struct sync_struct mcpm_sync;
+
+/*
+ * __mcpm_cpu_going_down: Indicates that the cpu is being torn down.
+ *    This must be called at the point of committing to teardown of a CPU.
+ *    The CPU cache (SCTRL.C bit) is expected to still be active.
+ */
+static void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster)
+{
+	mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN;
+	sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
+}
+
+/*
+ * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the
+ *    cluster can be torn down without disrupting this CPU.
+ *    To avoid deadlocks, this must be called before a CPU is powered down.
+ *    The CPU cache (SCTRL.C bit) is expected to be off.
+ *    However L2 cache might or might not be active.
+ */
+static void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster)
+{
+	dmb();
+	mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN;
+	sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
+	sev();
+}
+
+/*
+ * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section.
+ * @state: the final state of the cluster:
+ *     CLUSTER_UP: no destructive teardown was done and the cluster has been
+ *         restored to the previous state (CPU cache still active); or
+ *     CLUSTER_DOWN: the cluster has been torn-down, ready for power-off
+ *         (CPU cache disabled, L2 cache either enabled or disabled).
+ */
+static void __mcpm_outbound_leave_critical(unsigned int cluster, int state)
+{
+	dmb();
+	mcpm_sync.clusters[cluster].cluster = state;
+	sync_cache_w(&mcpm_sync.clusters[cluster].cluster);
+	sev();
+}
+
+/*
+ * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section.
+ * This function should be called by the last man, after local CPU teardown
+ * is complete.  CPU cache expected to be active.
+ *
+ * Returns:
+ *     false: the critical section was not entered because an inbound CPU was
+ *         observed, or the cluster is already being set up;
+ *     true: the critical section was entered: it is now safe to tear down the
+ *         cluster.
+ */
+static bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int i;
+	struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster];
+
+	/* Warn inbound CPUs that the cluster is being torn down: */
+	c->cluster = CLUSTER_GOING_DOWN;
+	sync_cache_w(&c->cluster);
+
+	/* Back out if the inbound cluster is already in the critical region: */
+	sync_cache_r(&c->inbound);
+	if (c->inbound == INBOUND_COMING_UP)
+		goto abort;
+
+	/*
+	 * Wait for all CPUs to get out of the GOING_DOWN state, so that local
+	 * teardown is complete on each CPU before tearing down the cluster.
+	 *
+	 * If any CPU has been woken up again from the DOWN state, then we
+	 * shouldn't be taking the cluster down at all: abort in that case.
+	 */
+	sync_cache_r(&c->cpus);
+	for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) {
+		int cpustate;
+
+		if (i == cpu)
+			continue;
+
+		while (1) {
+			cpustate = c->cpus[i].cpu;
+			if (cpustate != CPU_GOING_DOWN)
+				break;
+
+			wfe();
+			sync_cache_r(&c->cpus[i].cpu);
+		}
+
+		switch (cpustate) {
+		case CPU_DOWN:
+			continue;
+
+		default:
+			goto abort;
+		}
+	}
+
+	return true;
+
+abort:
+	__mcpm_outbound_leave_critical(cluster, CLUSTER_UP);
+	return false;
+}
+
+static int __mcpm_cluster_state(unsigned int cluster)
+{
+	sync_cache_r(&mcpm_sync.clusters[cluster].cluster);
+	return mcpm_sync.clusters[cluster].cluster;
+}
+
 extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
 
 void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
@@ -299,120 +414,6 @@ int __init mcpm_loopback(void (*cache_disable)(void))
 
 #endif
 
-struct sync_struct mcpm_sync;
-
-/*
- * __mcpm_cpu_going_down: Indicates that the cpu is being torn down.
- *    This must be called at the point of committing to teardown of a CPU.
- *    The CPU cache (SCTRL.C bit) is expected to still be active.
- */
-void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster)
-{
-	mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN;
-	sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
-}
-
-/*
- * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the
- *    cluster can be torn down without disrupting this CPU.
- *    To avoid deadlocks, this must be called before a CPU is powered down.
- *    The CPU cache (SCTRL.C bit) is expected to be off.
- *    However L2 cache might or might not be active.
- */
-void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster)
-{
-	dmb();
-	mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN;
-	sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
-	sev();
-}
-
-/*
- * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section.
- * @state: the final state of the cluster:
- *     CLUSTER_UP: no destructive teardown was done and the cluster has been
- *         restored to the previous state (CPU cache still active); or
- *     CLUSTER_DOWN: the cluster has been torn-down, ready for power-off
- *         (CPU cache disabled, L2 cache either enabled or disabled).
- */
-void __mcpm_outbound_leave_critical(unsigned int cluster, int state)
-{
-	dmb();
-	mcpm_sync.clusters[cluster].cluster = state;
-	sync_cache_w(&mcpm_sync.clusters[cluster].cluster);
-	sev();
-}
-
-/*
- * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section.
- * This function should be called by the last man, after local CPU teardown
- * is complete.  CPU cache expected to be active.
- *
- * Returns:
- *     false: the critical section was not entered because an inbound CPU was
- *         observed, or the cluster is already being set up;
- *     true: the critical section was entered: it is now safe to tear down the
- *         cluster.
- */
-bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster)
-{
-	unsigned int i;
-	struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster];
-
-	/* Warn inbound CPUs that the cluster is being torn down: */
-	c->cluster = CLUSTER_GOING_DOWN;
-	sync_cache_w(&c->cluster);
-
-	/* Back out if the inbound cluster is already in the critical region: */
-	sync_cache_r(&c->inbound);
-	if (c->inbound == INBOUND_COMING_UP)
-		goto abort;
-
-	/*
-	 * Wait for all CPUs to get out of the GOING_DOWN state, so that local
-	 * teardown is complete on each CPU before tearing down the cluster.
-	 *
-	 * If any CPU has been woken up again from the DOWN state, then we
-	 * shouldn't be taking the cluster down at all: abort in that case.
-	 */
-	sync_cache_r(&c->cpus);
-	for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) {
-		int cpustate;
-
-		if (i == cpu)
-			continue;
-
-		while (1) {
-			cpustate = c->cpus[i].cpu;
-			if (cpustate != CPU_GOING_DOWN)
-				break;
-
-			wfe();
-			sync_cache_r(&c->cpus[i].cpu);
-		}
-
-		switch (cpustate) {
-		case CPU_DOWN:
-			continue;
-
-		default:
-			goto abort;
-		}
-	}
-
-	return true;
-
-abort:
-	__mcpm_outbound_leave_critical(cluster, CLUSTER_UP);
-	return false;
-}
-
-int __mcpm_cluster_state(unsigned int cluster)
-{
-	sync_cache_r(&mcpm_sync.clusters[cluster].cluster);
-	return mcpm_sync.clusters[cluster].cluster;
-}
-
 extern unsigned long mcpm_power_up_setup_phys;
 
 int __init mcpm_sync_init(
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index e2118c9..6a40d5f 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -245,35 +245,6 @@ struct mcpm_platform_ops {
  */
 int __init mcpm_platform_register(const struct mcpm_platform_ops *ops);
 
-/* Synchronisation structures for coordinating safe cluster setup/teardown: */
-
-/*
- * When modifying this structure, make sure you update the MCPM_SYNC_ defines
- * to match.
- */
-struct mcpm_sync_struct {
-	/* individual CPU states */
-	struct {
-		s8 cpu __aligned(__CACHE_WRITEBACK_GRANULE);
-	} cpus[MAX_CPUS_PER_CLUSTER];
-
-	/* cluster state */
-	s8 cluster __aligned(__CACHE_WRITEBACK_GRANULE);
-
-	/* inbound-side state */
-	s8 inbound __aligned(__CACHE_WRITEBACK_GRANULE);
-};
-
-struct sync_struct {
-	struct mcpm_sync_struct clusters[MAX_NR_CLUSTERS];
-};
-
-void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster);
-void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster);
-void __mcpm_outbound_leave_critical(unsigned int cluster, int state);
-bool __mcpm_outbound_enter_critical(unsigned int this_cpu, unsigned int cluster);
-int __mcpm_cluster_state(unsigned int cluster);
-
 /**
  * mcpm_sync_init - Initialize the cluster synchronization support
  *
@@ -312,6 +283,29 @@ int __init mcpm_loopback(void (*cache_disable)(void));
 
 void __init mcpm_smp_set_ops(void);
 
+/*
+ * Synchronisation structures for coordinating safe cluster setup/teardown.
+ * This is private to the MCPM core code and shared between C and assembly.
+ * When modifying this structure, make sure you update the MCPM_SYNC_ defines
+ * to match.
+ */
+struct mcpm_sync_struct {
+	/* individual CPU states */
+	struct {
+		s8 cpu __aligned(__CACHE_WRITEBACK_GRANULE);
+	} cpus[MAX_CPUS_PER_CLUSTER];
+
+	/* cluster state */
+	s8 cluster __aligned(__CACHE_WRITEBACK_GRANULE);
+
+	/* inbound-side state */
+	s8 inbound __aligned(__CACHE_WRITEBACK_GRANULE);
+};
+
+struct sync_struct {
+	struct mcpm_sync_struct clusters[MAX_NR_CLUSTERS];
+};
+
 #else
 
 /* 
-- 
cgit v1.1


From 1c2c7d51c8101ab3c5d8585713d2dcd52d77d33e Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 28 Apr 2015 14:51:36 -0400
Subject: ARM: MCPM: add references to the available documentation in the code

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Dave Martin <Dave.Martin@arm.com>
---
 arch/arm/common/mcpm_entry.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index c5fe2e3..4924675 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -20,6 +20,11 @@
 #include <asm/cputype.h>
 #include <asm/suspend.h>
 
+/*
+ * The public API for this code is documented in arch/arm/include/asm/mcpm.h.
+ * For a comprehensive description of the main algorithm used here, please
+ * see Documentation/arm/cluster-pm-race-avoidance.txt.
+ */
 
 struct sync_struct mcpm_sync;
 
-- 
cgit v1.1


From 7895f73169ade9a74940ae6b0b4ee82faf286861 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 28 Apr 2015 15:51:19 -0400
Subject: ARM: MCPM: remove residency argument from mcpm_cpu_suspend()

This is currently unused.

If a suspend must be limited to CPU level only by preventing the last man
from triggering a cluster level suspend then this should be determined
according to many other criteria the MCPM layer is currently not aware of.
It is unlikely that mcpm_cpu_suspend() would be the proper conduit for
that information anyway.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Dave Martin <Dave.Martin@arm.com>
---
 arch/arm/common/mcpm_entry.c         |  2 +-
 arch/arm/include/asm/mcpm.h          | 15 +++++----------
 arch/arm/mach-exynos/suspend.c       |  8 +-------
 drivers/cpuidle/cpuidle-big_little.c |  8 +-------
 4 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 4924675..a923524 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -321,7 +321,7 @@ int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster)
 	return ret;
 }
 
-void mcpm_cpu_suspend(u64 expected_residency)
+void mcpm_cpu_suspend(void)
 {
 	if (WARN_ON_ONCE(!platform_ops))
 		return;
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index 6a40d5f..acd4983 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -137,17 +137,12 @@ int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster);
 /**
  * mcpm_cpu_suspend - bring the calling CPU in a suspended state
  *
- * @expected_residency: duration in microseconds the CPU is expected
- *			to remain suspended, or 0 if unknown/infinity.
- *
- * The calling CPU is suspended.  The expected residency argument is used
- * as a hint by the platform specific backend to implement the appropriate
- * sleep state level according to the knowledge it has on wake-up latency
- * for the given hardware.
+ * The calling CPU is suspended.  This is similar to mcpm_cpu_power_down()
+ * except for possible extra platform specific configuration steps to allow
+ * an asynchronous wake-up e.g. with a pending interrupt.
  *
  * If this CPU is found to be the "last man standing" in the cluster
- * then the cluster may be prepared for power-down too, if the expected
- * residency makes it worthwhile.
+ * then the cluster may be prepared for power-down too.
  *
  * This must be called with interrupts disabled.
  *
@@ -157,7 +152,7 @@ int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster);
  * This will return if mcpm_platform_register() has not been called
  * previously in which case the caller should take appropriate action.
  */
-void mcpm_cpu_suspend(u64 expected_residency);
+void mcpm_cpu_suspend(void);
 
 /**
  * mcpm_cpu_powered_up - housekeeping workafter a CPU has been powered up
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index 3e6aea7..372bd0b 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -311,13 +311,7 @@ static int exynos5420_cpu_suspend(unsigned long arg)
 
 	if (IS_ENABLED(CONFIG_EXYNOS5420_MCPM)) {
 		mcpm_set_entry_vector(cpu, cluster, exynos_cpu_resume);
-
-		/*
-		 * Residency value passed to mcpm_cpu_suspend back-end
-		 * has to be given clear semantics. Set to 0 as a
-		 * temporary value.
-		 */
-		mcpm_cpu_suspend(0);
+		mcpm_cpu_suspend();
 	}
 
 	pr_info("Failed to suspend the system\n");
diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c
index 40c34fa..db2ede5 100644
--- a/drivers/cpuidle/cpuidle-big_little.c
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -108,13 +108,7 @@ static int notrace bl_powerdown_finisher(unsigned long arg)
 	unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 
 	mcpm_set_entry_vector(cpu, cluster, cpu_resume);
-
-	/*
-	 * Residency value passed to mcpm_cpu_suspend back-end
-	 * has to be given clear semantics. Set to 0 as a
-	 * temporary value.
-	 */
-	mcpm_cpu_suspend(0);
+	mcpm_cpu_suspend();
 
 	/* return value != 0 means failure */
 	return 1;
-- 
cgit v1.1


From 6b1814cde5c79c6aa4d02c9aedc14a709c2c0737 Mon Sep 17 00:00:00 2001
From: Maxime Coquelin stm32 <mcoquelin.stm32@gmail.com>
Date: Fri, 10 Apr 2015 09:46:46 +0100
Subject: ARM: 8340/1: ARMv7-M: Enlarge vector table up to 256 entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From Cortex-M reference manuals, the nvic supports up to 240 interrupts.
So the number of entries in vectors table is up to 256.

This patch adds a new config flag to specify the number of external interrupts.
Some ifdeferies are added in order to respect the natural alignment without
wasting too much space on smaller systems.

Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Stefan Agner <stefan@agner.ch>
Tested-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-v7m.S | 13 +++++++++----
 arch/arm/mm/Kconfig         | 15 +++++++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
index 8944f49..b6c8bb9 100644
--- a/arch/arm/kernel/entry-v7m.S
+++ b/arch/arm/kernel/entry-v7m.S
@@ -117,9 +117,14 @@ ENTRY(__switch_to)
 ENDPROC(__switch_to)
 
 	.data
-	.align	8
+#if CONFIG_CPU_V7M_NUM_IRQ <= 112
+	.align	9
+#else
+	.align	10
+#endif
+
 /*
- * Vector table (64 words => 256 bytes natural alignment)
+ * Vector table (Natural alignment need to be ensured)
  */
 ENTRY(vector_table)
 	.long	0			@ 0 - Reset stack pointer
@@ -138,6 +143,6 @@ ENTRY(vector_table)
 	.long	__invalid_entry		@ 13 - Reserved
 	.long	__pendsv_entry		@ 14 - PendSV
 	.long	__invalid_entry		@ 15 - SysTick
-	.rept	64 - 16
-	.long	__irq_entry		@ 16..64 - External Interrupts
+	.rept	CONFIG_CPU_V7M_NUM_IRQ
+	.long	__irq_entry		@ External Interrupts
 	.endr
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index b4f92b9..6173aa3 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -604,6 +604,21 @@ config CPU_USE_DOMAINS
 	  This option enables or disables the use of domain switching
 	  via the set_fs() function.
 
+config CPU_V7M_NUM_IRQ
+	int "Number of external interrupts connected to the NVIC"
+	depends on CPU_V7M
+	default 90 if ARCH_STM32
+	default 38 if ARCH_EFM32
+	default 240
+	help
+	  This option indicates the number of interrupts connected to the NVIC.
+	  The value can be larger than the real number of interrupts supported
+	  by the system, but must not be lower.
+	  The default value is 240, corresponding to the maximum number of
+	  interrupts supported by the NVIC on Cortex-M family.
+
+	  If unsure, keep default value.
+
 #
 # CPU supports 36-bit I/O
 #
-- 
cgit v1.1


From 5bb5d66d89041b7891cb42617343b1e8067cc3fa Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Mon, 13 Apr 2015 14:18:50 +0100
Subject: ARM: 8341/1: io: Unpessimize relaxed io accessors

commit 195bbcac2e5c12f7fb ("ARM: 7500/1: io: avoid writeback addressing
modes for __raw_ accessors") disables writeback addressing modes for
raw i/o. However, the "+Q" output constraint forces the compiler to
disable load hoist optimizations (because the output constraint informs the
compiler of memory stores which the compiler assumes may alias other memory).

Since the relaxed accessors only guarantee ordering wrt i/o accesses to the
same device and not to main memory, there's never a possibility of an accessor
invalidating a hoisted load (because only non-i/o loads would have been hoisted).

The effect is especially noticable with complex address inputs in loops.
For example, the following code:

    #include <linux/kernel.h>
    #include <linux/io.h>

    static const int *remap;

    void wr_loop(void __iomem *base, int c, int val)
    {
            int i;

            for (i = 0; i < c; i++)
                    writew_relaxed(val, base + remap[c >> 2]);
    }

generates

           current master             |             this patch
 0: e3510000    cmp     r1, #0        |  0: e3510000    cmp     r1, #0
 4: d12fff1e    bxle    lr            |  4: d12fff1e    bxle    lr
 8: e3003000    movw    r3, #0        |  8: e3c1c003    bic     ip, r1, #3
 c: e3403000    movt    r3, #0        |  c: e6ff2072    uxth    r2, r2
10: e92d4010    push    {r4, lr}      | 10: e3a03000    mov     r3, #0
14: e6ff2072    uxth    r2, r2        | 14: e59cc000    ldr     ip, [ip]
18: e3c14003    bic     r4, r1, #3    | 18: e080000c    add     r0, r0, ip
1c: e593e000    ldr     lr, [r3]      |
20: e3a03000    mov     r3, #0        | 1c: e1c020b0    strh    r2, [r0]
                                      | 20: e2833001    add     r3, r3, #1
24: e79ec004    ldr     ip, [lr, r4]  | 24: e1530001    cmp     r3, r1
28: e080c00c    add     ip, r0, ip    | 28: 1afffffb    bne     1c
2c: e1cc20b0    strh    r2, [ip]      | 2c: e12fff1e    bx      lr
30: e2833001    add     r3, r3, #1    |
34: e1530001    cmp     r3, r1        |
38: 1afffff9    bne     24            |
                                      |
3c: e8bd8010    pop     {r4, pc}      |

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/io.h | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index db58deb..7744e58 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -73,17 +73,16 @@ void __raw_readsl(const volatile void __iomem *addr, void *data, int longlen);
 static inline void __raw_writew(u16 val, volatile void __iomem *addr)
 {
 	asm volatile("strh %1, %0"
-		     : "+Q" (*(volatile u16 __force *)addr)
-		     : "r" (val));
+		     : : "Q" (*(volatile u16 __force *)addr), "r" (val));
 }
 
 #define __raw_readw __raw_readw
 static inline u16 __raw_readw(const volatile void __iomem *addr)
 {
 	u16 val;
-	asm volatile("ldrh %1, %0"
-		     : "+Q" (*(volatile u16 __force *)addr),
-		       "=r" (val));
+	asm volatile("ldrh %0, %1"
+		     : "=r" (val)
+		     : "Q" (*(volatile u16 __force *)addr));
 	return val;
 }
 #endif
@@ -92,25 +91,23 @@ static inline u16 __raw_readw(const volatile void __iomem *addr)
 static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
 {
 	asm volatile("strb %1, %0"
-		     : "+Qo" (*(volatile u8 __force *)addr)
-		     : "r" (val));
+		     : : "Qo" (*(volatile u8 __force *)addr), "r" (val));
 }
 
 #define __raw_writel __raw_writel
 static inline void __raw_writel(u32 val, volatile void __iomem *addr)
 {
 	asm volatile("str %1, %0"
-		     : "+Qo" (*(volatile u32 __force *)addr)
-		     : "r" (val));
+		     : : "Qo" (*(volatile u32 __force *)addr), "r" (val));
 }
 
 #define __raw_readb __raw_readb
 static inline u8 __raw_readb(const volatile void __iomem *addr)
 {
 	u8 val;
-	asm volatile("ldrb %1, %0"
-		     : "+Qo" (*(volatile u8 __force *)addr),
-		       "=r" (val));
+	asm volatile("ldrb %0, %1"
+		     : "=r" (val)
+		     : "Qo" (*(volatile u8 __force *)addr));
 	return val;
 }
 
@@ -118,9 +115,9 @@ static inline u8 __raw_readb(const volatile void __iomem *addr)
 static inline u32 __raw_readl(const volatile void __iomem *addr)
 {
 	u32 val;
-	asm volatile("ldr %1, %0"
-		     : "+Qo" (*(volatile u32 __force *)addr),
-		       "=r" (val));
+	asm volatile("ldr %0, %1"
+		     : "=r" (val)
+		     : "Qo" (*(volatile u32 __force *)addr));
 	return val;
 }
 
-- 
cgit v1.1


From 9827e8e55532f5a7967d31da847fbd3185a5b5ed Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 27 Apr 2015 14:55:12 +0100
Subject: ARM: 8346/1: sa1100: Constify irq_domain_ops

The irq_domain_ops are not modified by the driver and the irqdomain core
code accepts pointer to a const data.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-sa1100/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
index 65aebfa..ea3eb41 100644
--- a/arch/arm/mach-sa1100/irq.c
+++ b/arch/arm/mach-sa1100/irq.c
@@ -73,7 +73,7 @@ static int sa1100_normal_irqdomain_map(struct irq_domain *d,
 	return 0;
 }
 
-static struct irq_domain_ops sa1100_normal_irqdomain_ops = {
+static const struct irq_domain_ops sa1100_normal_irqdomain_ops = {
 	.map = sa1100_normal_irqdomain_map,
 	.xlate = irq_domain_xlate_onetwocell,
 };
-- 
cgit v1.1


From e748994f5cc59e82ef28e31bae680f15fdadb26f Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 4 May 2015 15:22:41 +0100
Subject: ARM: 8353/1: mm: Fix Cortex-A8 erratum 430973 segfaults for
 bootloaders and multiarch

Looks like apps can be made to segfault easily on armhf distros
just by running cpuburn-a8 in the background, then starting apt
get update unless erratum 430973 workaround is enabled. This happens
on r3p2 also, which has 430973 fixed in hardware.

Turns out the reason for this is some bootloaders incorrectly
setting the auxilary register IBE bit, which probably causes us
to hit erratum 687067 on Cortex-A8 later than r1p2.

If the bootloader incorrectly sets the IBE bit in the auxilary control
register for Cortex-A8 revisions with 430973 fixed in hardware, we
need to call flush BTAC/BTB to avoid segfaults probably caused by
erratum 687067. So let's flush BTAC/BTB unconditionally for Cortex-A8.
It won't do anything unless the IBE bit is set.

Note that we keep the erratum 430973 Kconfig option still around and
disabled for multiarch as it may be unsafe to enable for some secure
SoC. It is known safe to be enabled for n900, but won't do anything
on n900 as the IBE bit needs to be set with SMC.

Also note that SoCs probably should also add checks and print warnings
for the misconfigured IBE bit depending on the Cortex-A8 revision
so the bootloaders can be fixed Cortex-A8 revisions later than
r1p2 to not set the IBE bit.

Tested-by: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-v7-2level.S | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index 10405b8..6f2f8f3 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -36,14 +36,16 @@
  *
  *	It is assumed that:
  *	- we are not using split page tables
+ *
+ *	Note that we always need to flush BTAC/BTB if IBE is set
+ *	even on Cortex-A8 revisions not affected by 430973.
+ *	If IBE is not set, the flush BTAC/BTB won't do anything.
  */
 ENTRY(cpu_ca8_switch_mm)
 #ifdef CONFIG_MMU
 	mov	r2, #0
-#ifdef CONFIG_ARM_ERRATA_430973
 	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
 #endif
-#endif
 ENTRY(cpu_v7_switch_mm)
 #ifdef CONFIG_MMU
 	mmid	r1, r1				@ get mm->context.id
-- 
cgit v1.1


From 7d485f647c1f4a6976264c90447fb0dbf07b111d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Nov 2014 16:54:35 +0100
Subject: ARM: 8220/1: allow modules outside of bl range

Loading modules far away from the kernel in memory is problematic
because the 'bl' instruction only has limited reach, and modules are not
built with PLTs. Instead of using the -mlong-calls option (which affects
all compiler emitted bl instructions, but not the ones in assembler),
this patch allocates some additional space at module load time, and
populates it with PLT like veneers when encountering relocations that
are out of range.

This should work with all relocations against symbols exported by the
kernel, including those resulting from GCC generated implicit function
calls for ftrace etc.

The module memory size increases by about 5% on average, regardless of
whether any PLT entries were actually needed. However, due to the page
based rounding that occurs when allocating module memory, the average
memory footprint increase is negligible.

Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig              |  17 +++-
 arch/arm/Makefile             |   4 +
 arch/arm/include/asm/module.h |  12 ++-
 arch/arm/kernel/Makefile      |   1 +
 arch/arm/kernel/module-plts.c | 181 ++++++++++++++++++++++++++++++++++++++++++
 arch/arm/kernel/module.c      |  32 +++++++-
 arch/arm/kernel/module.lds    |   4 +
 7 files changed, 248 insertions(+), 3 deletions(-)
 create mode 100644 arch/arm/kernel/module-plts.c
 create mode 100644 arch/arm/kernel/module.lds

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 45df48b..d0950ce 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -60,7 +60,7 @@ config ARM
 	select HAVE_KPROBES if !XIP_KERNEL
 	select HAVE_KRETPROBES if (HAVE_KPROBES)
 	select HAVE_MEMBLOCK
-	select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
+	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
 	select HAVE_OPTPROBES if !THUMB2_KERNEL
 	select HAVE_PERF_EVENTS
@@ -1681,6 +1681,21 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
 config ARCH_WANT_GENERAL_HUGETLB
 	def_bool y
 
+config ARM_MODULE_PLTS
+	bool "Use PLTs to allow module memory to spill over into vmalloc area"
+	depends on MODULES
+	help
+	  Allocate PLTs when loading modules so that jumps and calls whose
+	  targets are too far away for their relative offsets to be encoded
+	  in the instructions themselves can be bounced via veneers in the
+	  module's PLT. This allows modules to be allocated in the generic
+	  vmalloc area after the dedicated module memory area has been
+	  exhausted. The modules will use slightly more memory, but after
+	  rounding up to page size, the actual memory footprint is usually
+	  the same.
+
+	  Say y if you are getting out of memory errors while loading modules
+
 source "mm/Kconfig"
 
 config FORCE_MAX_ZONEORDER
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 985227c..ffb53e8 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -19,6 +19,10 @@ LDFLAGS_vmlinux	+= --be8
 LDFLAGS_MODULE	+= --be8
 endif
 
+ifeq ($(CONFIG_ARM_MODULE_PLTS),y)
+LDFLAGS_MODULE	+= -T $(srctree)/arch/arm/kernel/module.lds
+endif
+
 OBJCOPYFLAGS	:=-O binary -R .comment -S
 GZFLAGS		:=-9
 #KBUILD_CFLAGS	+=-pipe
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index ed690c4..e358b79 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -16,11 +16,21 @@ enum {
 	ARM_SEC_UNLIKELY,
 	ARM_SEC_MAX,
 };
+#endif
 
 struct mod_arch_specific {
+#ifdef CONFIG_ARM_UNWIND
 	struct unwind_table *unwind[ARM_SEC_MAX];
-};
 #endif
+#ifdef CONFIG_ARM_MODULE_PLTS
+	struct elf32_shdr   *core_plt;
+	struct elf32_shdr   *init_plt;
+	int		    core_plt_count;
+	int		    init_plt_count;
+#endif
+};
+
+u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val);
 
 /*
  * Add the ARM architecture version to the version magic string
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 752725d..32c0990 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 obj-$(CONFIG_ISA_DMA_API)	+= dma.o
 obj-$(CONFIG_FIQ)		+= fiq.o fiqasm.o
 obj-$(CONFIG_MODULES)		+= armksyms.o module.o
+obj-$(CONFIG_ARM_MODULE_PLTS)	+= module-plts.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
 obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
new file mode 100644
index 0000000..71a65c4
--- /dev/null
+++ b/arch/arm/kernel/module-plts.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cache.h>
+#include <asm/opcodes.h>
+
+#define PLT_ENT_STRIDE		L1_CACHE_BYTES
+#define PLT_ENT_COUNT		(PLT_ENT_STRIDE / sizeof(u32))
+#define PLT_ENT_SIZE		(sizeof(struct plt_entries) / PLT_ENT_COUNT)
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define PLT_ENT_LDR		__opcode_to_mem_thumb32(0xf8dff000 | \
+							(PLT_ENT_STRIDE - 4))
+#else
+#define PLT_ENT_LDR		__opcode_to_mem_arm(0xe59ff000 | \
+						    (PLT_ENT_STRIDE - 8))
+#endif
+
+struct plt_entries {
+	u32	ldr[PLT_ENT_COUNT];
+	u32	lit[PLT_ENT_COUNT];
+};
+
+static bool in_init(const struct module *mod, u32 addr)
+{
+	return addr - (u32)mod->module_init < mod->init_size;
+}
+
+u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
+{
+	struct plt_entries *plt, *plt_end;
+	int c, *count;
+
+	if (in_init(mod, loc)) {
+		plt = (void *)mod->arch.init_plt->sh_addr;
+		plt_end = (void *)plt + mod->arch.init_plt->sh_size;
+		count = &mod->arch.init_plt_count;
+	} else {
+		plt = (void *)mod->arch.core_plt->sh_addr;
+		plt_end = (void *)plt + mod->arch.core_plt->sh_size;
+		count = &mod->arch.core_plt_count;
+	}
+
+	/* Look for an existing entry pointing to 'val' */
+	for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) {
+		int i;
+
+		if (!c) {
+			/* Populate a new set of entries */
+			*plt = (struct plt_entries){
+				{ [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
+				{ val, }
+			};
+			++*count;
+			return (u32)plt->ldr;
+		}
+		for (i = 0; i < PLT_ENT_COUNT; i++) {
+			if (!plt->lit[i]) {
+				plt->lit[i] = val;
+				++*count;
+			}
+			if (plt->lit[i] == val)
+				return (u32)&plt->ldr[i];
+		}
+	}
+	BUG();
+}
+
+static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num,
+			   u32 mask)
+{
+	u32 *loc1, *loc2;
+	int i;
+
+	for (i = 0; i < num; i++) {
+		if (rel[i].r_info != rel[num].r_info)
+			continue;
+
+		/*
+		 * Identical relocation types against identical symbols can
+		 * still result in different PLT entries if the addend in the
+		 * place is different. So resolve the target of the relocation
+		 * to compare the values.
+		 */
+		loc1 = (u32 *)(base + rel[i].r_offset);
+		loc2 = (u32 *)(base + rel[num].r_offset);
+		if (((*loc1 ^ *loc2) & mask) == 0)
+			return 1;
+	}
+	return 0;
+}
+
+/* Count how many PLT entries we may need */
+static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num)
+{
+	unsigned int ret = 0;
+	int i;
+
+	/*
+	 * Sure, this is order(n^2), but it's usually short, and not
+	 * time critical
+	 */
+	for (i = 0; i < num; i++)
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+		case R_ARM_CALL:
+		case R_ARM_PC24:
+		case R_ARM_JUMP24:
+			if (!duplicate_rel(base, rel, i,
+					   __opcode_to_mem_arm(0x00ffffff)))
+				ret++;
+			break;
+		case R_ARM_THM_CALL:
+		case R_ARM_THM_JUMP24:
+			if (!duplicate_rel(base, rel, i,
+					   __opcode_to_mem_thumb32(0x07ff2fff)))
+				ret++;
+		}
+	return ret;
+}
+
+int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+			      char *secstrings, struct module *mod)
+{
+	unsigned long core_plts = 0, init_plts = 0;
+	Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
+
+	/*
+	 * To store the PLTs, we expand the .text section for core module code
+	 * and the .init.text section for initialization code.
+	 */
+	for (s = sechdrs; s < sechdrs_end; ++s)
+		if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
+			mod->arch.core_plt = s;
+		else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
+			mod->arch.init_plt = s;
+
+	if (!mod->arch.core_plt || !mod->arch.init_plt) {
+		pr_err("%s: sections missing\n", mod->name);
+		return -ENOEXEC;
+	}
+
+	for (s = sechdrs + 1; s < sechdrs_end; ++s) {
+		const Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
+		int numrels = s->sh_size / sizeof(Elf32_Rel);
+		Elf32_Shdr *dstsec = sechdrs + s->sh_info;
+
+		if (s->sh_type != SHT_REL)
+			continue;
+
+		if (strstr(secstrings + s->sh_name, ".init"))
+			init_plts += count_plts(dstsec->sh_addr, rels, numrels);
+		else
+			core_plts += count_plts(dstsec->sh_addr, rels, numrels);
+	}
+
+	mod->arch.core_plt->sh_type = SHT_NOBITS;
+	mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
+					       sizeof(struct plt_entries));
+	mod->arch.core_plt_count = 0;
+
+	mod->arch.init_plt->sh_type = SHT_NOBITS;
+	mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
+					       sizeof(struct plt_entries));
+	mod->arch.init_plt_count = 0;
+	pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__,
+		 mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size);
+	return 0;
+}
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index af791f4..efdddcb 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -40,7 +40,12 @@
 #ifdef CONFIG_MMU
 void *module_alloc(unsigned long size)
 {
-	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+	void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+				GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+				__builtin_return_address(0));
+	if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
+		return p;
+	return __vmalloc_node_range(size, 1,  VMALLOC_START, VMALLOC_END,
 				GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
 				__builtin_return_address(0));
 }
@@ -110,6 +115,20 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 				offset -= 0x04000000;
 
 			offset += sym->st_value - loc;
+
+			/*
+			 * Route through a PLT entry if 'offset' exceeds the
+			 * supported range. Note that 'offset + loc + 8'
+			 * contains the absolute jump target, i.e.,
+			 * @sym + addend, corrected for the +8 PC bias.
+			 */
+			if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) &&
+			    (offset <= (s32)0xfe000000 ||
+			     offset >= (s32)0x02000000))
+				offset = get_module_plt(module, loc,
+							offset + loc + 8)
+					 - loc - 8;
+
 			if (offset <= (s32)0xfe000000 ||
 			    offset >= (s32)0x02000000) {
 				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
@@ -203,6 +222,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 				offset -= 0x02000000;
 			offset += sym->st_value - loc;
 
+			/*
+			 * Route through a PLT entry if 'offset' exceeds the
+			 * supported range.
+			 */
+			if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) &&
+			    (offset <= (s32)0xff000000 ||
+			     offset >= (s32)0x01000000))
+				offset = get_module_plt(module, loc,
+							offset + loc + 4)
+					 - loc - 4;
+
 			if (offset <= (s32)0xff000000 ||
 			    offset >= (s32)0x01000000) {
 				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds
new file mode 100644
index 0000000..3682fa1
--- /dev/null
+++ b/arch/arm/kernel/module.lds
@@ -0,0 +1,4 @@
+SECTIONS {
+        .core.plt : { BYTE(0) }
+        .init.plt : { BYTE(0) }
+}
-- 
cgit v1.1


From 7ddfe625cbc14c83153c78aacd52a20c5805920e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 7 May 2015 14:22:40 +0100
Subject: ARM: optimize memset_io()/memcpy_fromio()/memcpy_toio()

If we are building for a LE platform, and we haven't overriden the
MMIO ops, then we can optimize the mem*io operations using the
standard string functions.

Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/io.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 7744e58..addfb3d 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -23,6 +23,7 @@
 
 #ifdef __KERNEL__
 
+#include <linux/string.h>
 #include <linux/types.h>
 #include <linux/blk_types.h>
 #include <asm/byteorder.h>
@@ -316,9 +317,33 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
 #define writesw(p,d,l)		__raw_writesw(p,d,l)
 #define writesl(p,d,l)		__raw_writesl(p,d,l)
 
+#ifndef __ARMBE__
+static inline void memset_io(volatile void __iomem *dst, unsigned c,
+	size_t count)
+{
+	memset((void __force *)dst, c, count);
+}
+#define memset_io(dst,c,count) memset_io(dst,c,count)
+
+static inline void memcpy_fromio(void *to, const volatile void __iomem *from,
+	size_t count)
+{
+	memcpy(to, (const void __force *)from, count);
+}
+#define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count)
+
+static inline void memcpy_toio(volatile void __iomem *to, const void *from,
+	size_t count)
+{
+	memcpy((void __force *)to, from, count);
+}
+#define memcpy_toio(to,from,count) memcpy_toio(to,from,count)
+
+#else
 #define memset_io(c,v,l)	_memset_io(c,(v),(l))
 #define memcpy_fromio(a,c,l)	_memcpy_fromio((a),c,(l))
 #define memcpy_toio(c,a,l)	_memcpy_toio(c,(a),(l))
+#endif
 
 #endif	/* readl */
 
-- 
cgit v1.1


From 13dd92bb4599b5655cafe1f2c0365396a096b94a Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <contact@paulk.fr>
Date: Wed, 6 May 2015 15:22:36 +0100
Subject: ARM: 8354/1: Documentation: devicetree: root node serial-number
 property documentation

Open firmware is already using the serial-number property for passing the
device's serial number from the bootloader to the kernel. In addition, lshw
already has support for scanning this property.

The serial number is a string that somewhat represents the device's serial
number. It might come from some form of storage (e.g. an eeprom) and be
programmed at factory-time by the manufacturer or come from identification
bits available in e.g. the SoC (note that the soc_id property in the SoC bus
should hold a full account of those bits).

The serial number is taken as-is from the bootloader, so it is up to the
bootloader to define where the serial number comes from and what length it
should be. Some use cases for the serial number require it to have a maximum
length (e.g. for USB serial number) and some other cases imply more restrictions
on what the serial number should look like (e.g. in Android, the ro.serialno
property is usually a 16-bytes (plus one null byte) representation of a 64 bit
number).

Signed-off-by: Paul Kocialkowski <contact@paulk.fr>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 Documentation/devicetree/booting-without-of.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt
index e49e423..04d34f6 100644
--- a/Documentation/devicetree/booting-without-of.txt
+++ b/Documentation/devicetree/booting-without-of.txt
@@ -856,6 +856,10 @@ address which can extend beyond that limit.
   name may clash with standard defined ones, you prefix them with your
   vendor name and a comma.
 
+  Additional properties for the root node:
+
+    - serial-number : a string representing the device's serial number
+
   b) The /cpus node
 
   This node is the parent of all individual CPU nodes. It doesn't
-- 
cgit v1.1


From 3f599875e5202986b350618a617527ab441bf206 Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <contact@paulk.fr>
Date: Wed, 6 May 2015 15:23:56 +0100
Subject: ARM: 8355/1: arch: Show the serial number from devicetree in cpuinfo

This grabs the serial number shown in cpuinfo from the serial-number device-tree
property in priority. When booting with ATAGs (and without device-tree), the
provided number is still shown instead.

Signed-off-by: Paul Kocialkowski <contact@paulk.fr>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/system_info.h |  1 +
 arch/arm/kernel/setup.c            | 23 +++++++++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/system_info.h b/arch/arm/include/asm/system_info.h
index 720ea03..3860cbd40 100644
--- a/arch/arm/include/asm/system_info.h
+++ b/arch/arm/include/asm/system_info.h
@@ -17,6 +17,7 @@
 
 /* information about the system we're running on */
 extern unsigned int system_rev;
+extern const char *system_serial;
 extern unsigned int system_serial_low;
 extern unsigned int system_serial_high;
 extern unsigned int mem_fclk_21285;
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 6c777e9..ee3e329 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -93,6 +93,9 @@ unsigned int __atags_pointer __initdata;
 unsigned int system_rev;
 EXPORT_SYMBOL(system_rev);
 
+const char *system_serial;
+EXPORT_SYMBOL(system_serial);
+
 unsigned int system_serial_low;
 EXPORT_SYMBOL(system_serial_low);
 
@@ -839,8 +842,25 @@ arch_initcall(customize_machine);
 
 static int __init init_machine_late(void)
 {
+	struct device_node *root;
+	int ret;
+
 	if (machine_desc->init_late)
 		machine_desc->init_late();
+
+	root = of_find_node_by_path("/");
+	if (root) {
+		ret = of_property_read_string(root, "serial-number",
+					      &system_serial);
+		if (ret)
+			system_serial = NULL;
+	}
+
+	if (!system_serial)
+		system_serial = kasprintf(GFP_KERNEL, "%08x%08x",
+					  system_serial_high,
+					  system_serial_low);
+
 	return 0;
 }
 late_initcall(init_machine_late);
@@ -1109,8 +1129,7 @@ static int c_show(struct seq_file *m, void *v)
 
 	seq_printf(m, "Hardware\t: %s\n", machine_name);
 	seq_printf(m, "Revision\t: %04x\n", system_rev);
-	seq_printf(m, "Serial\t\t: %08x%08x\n",
-		   system_serial_high, system_serial_low);
+	seq_printf(m, "Serial\t\t: %s\n", system_serial);
 
 	return 0;
 }
-- 
cgit v1.1


From 5890298a834c04aaa9b5fb576e5f2b77e79ab38d Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 21 Apr 2015 14:16:05 +0100
Subject: ARM: kvm: fix a bad BSYM() usage

BSYM() should only be used when refering to local symbols in the same
assembly file which are resolved by the assembler, and not for
linker-fixed up symbols.  The use of BSYM() with panic is incorrect as
the linker is involved in fixing up this relocation, and it knows
whether panic() is ARM or Thumb.

Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kvm/interrupts.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 79caf79..87847d2 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -309,7 +309,7 @@ ENTRY(kvm_call_hyp)
 THUMB(	orr	r2, r2, #PSR_T_BIT	)
 	msr	spsr_cxsf, r2
 	mrs	r1, ELR_hyp
-	ldr	r2, =BSYM(panic)
+	ldr	r2, =panic
 	msr	ELR_hyp, r2
 	ldr	r0, =\panic_str
 	clrex				@ Clear exclusive monitor
-- 
cgit v1.1


From 14327c662822e5e874cb971a7162067519300ca8 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 21 Apr 2015 14:17:25 +0100
Subject: ARM: replace BSYM() with badr assembly macro

BSYM() was invented to allow us to work around a problem with the
assembler, where local symbols resolved by the assembler for the 'adr'
instruction did not take account of their ISA.

Since we don't want BSYM() used elsewhere, replace BSYM() with a new
macro 'badr', which is like the 'adr' pseudo-op, but with the BSYM()
mechanics integrated into it.  This ensures that the BSYM()-ification
is only used in conjunction with 'adr'.

Acked-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/boot/compressed/head.S          |  4 ++--
 arch/arm/common/mcpm_head.S              |  2 +-
 arch/arm/include/asm/assembler.h         | 17 ++++++++++++++++-
 arch/arm/include/asm/entry-macro-multi.S |  4 ++--
 arch/arm/include/asm/unified.h           |  2 --
 arch/arm/kernel/entry-armv.S             | 12 ++++++------
 arch/arm/kernel/entry-common.S           |  6 +++---
 arch/arm/kernel/entry-ftrace.S           |  2 +-
 arch/arm/kernel/head-nommu.S             |  6 +++---
 arch/arm/kernel/head.S                   |  8 ++++----
 arch/arm/kernel/sleep.S                  |  2 +-
 arch/arm/lib/call_with_stack.S           |  2 +-
 arch/arm/mm/proc-v7m.S                   |  2 +-
 13 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 2c45b57..06e983f 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -130,7 +130,7 @@ start:
 		.endr
    ARM(		mov	r0, r0		)
    ARM(		b	1f		)
- THUMB(		adr	r12, BSYM(1f)	)
+ THUMB(		badr	r12, 1f		)
  THUMB(		bx	r12		)
 
 		.word	_magic_sig	@ Magic numbers to help the loader
@@ -447,7 +447,7 @@ dtb_check_done:
 
 		bl	cache_clean_flush
 
-		adr	r0, BSYM(restart)
+		badr	r0, restart
 		add	r0, r0, r6
 		mov	pc, r0
 
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
index e02db4b..08b3bb9 100644
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@@ -49,7 +49,7 @@
 ENTRY(mcpm_entry_point)
 
  ARM_BE8(setend        be)
- THUMB(	adr	r12, BSYM(1f)	)
+ THUMB(	badr	r12, 1f		)
  THUMB(	bx	r12		)
  THUMB(	.thumb			)
 1:
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 186270b..4abe572 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -178,6 +178,21 @@
 	.endm
 
 /*
+ * Assembly version of "adr rd, BSYM(sym)".  This should only be used to
+ * reference local symbols in the same assembly file which are to be
+ * resolved by the assembler.  Other usage is undefined.
+ */
+	.irp	c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+	.macro	badr\c, rd, sym
+#ifdef CONFIG_THUMB2_KERNEL
+	adr\c	\rd, \sym + 1
+#else
+	adr\c	\rd, \sym
+#endif
+	.endm
+	.endr
+
+/*
  * Get current thread_info.
  */
 	.macro	get_thread_info, rd
@@ -326,7 +341,7 @@
 THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	bne	1f
 	orr	\reg, \reg, #PSR_A_BIT
-	adr	lr, BSYM(2f)
+	badr	lr, 2f
 	msr	spsr_cxsf, \reg
 	__MSR_ELR_HYP(14)
 	__ERET
diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S
index 469a2b3..609184f 100644
--- a/arch/arm/include/asm/entry-macro-multi.S
+++ b/arch/arm/include/asm/entry-macro-multi.S
@@ -10,7 +10,7 @@
 	@
 	@ routine called with r0 = irq number, r1 = struct pt_regs *
 	@
-	adrne	lr, BSYM(1b)
+	badrne	lr, 1b
 	bne	asm_do_IRQ
 
 #ifdef CONFIG_SMP
@@ -23,7 +23,7 @@
 	ALT_SMP(test_for_ipi r0, r2, r6, lr)
 	ALT_UP_B(9997f)
 	movne	r1, sp
-	adrne	lr, BSYM(1b)
+	badrne	lr, 1b
 	bne	do_IPI
 #endif
 9997:
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index 200f9a7..a91ae49 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -45,7 +45,6 @@
 #define THUMB(x...)	x
 #ifdef __ASSEMBLY__
 #define W(instr)	instr.w
-#define BSYM(sym)	sym + 1
 #else
 #define WASM(instr)	#instr ".w"
 #endif
@@ -59,7 +58,6 @@
 #define THUMB(x...)
 #ifdef __ASSEMBLY__
 #define W(instr)	instr
-#define BSYM(sym)	sym
 #else
 #define WASM(instr)	#instr
 #endif
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 570306c..f8f7398 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -40,7 +40,7 @@
 #ifdef CONFIG_MULTI_IRQ_HANDLER
 	ldr	r1, =handle_arch_irq
 	mov	r0, sp
-	adr	lr, BSYM(9997f)
+	badr	lr, 9997f
 	ldr	pc, [r1]
 #else
 	arch_irq_handler_default
@@ -273,7 +273,7 @@ __und_svc:
 	str	r4, [sp, #S_PC]
 	orr	r0, r9, r0, lsl #16
 #endif
-	adr	r9, BSYM(__und_svc_finish)
+	badr	r9, __und_svc_finish
 	mov	r2, r4
 	bl	call_fpe
 
@@ -469,7 +469,7 @@ __und_usr:
 	@ instruction, or the more conventional lr if we are to treat
 	@ this as a real undefined instruction
 	@
-	adr	r9, BSYM(ret_from_exception)
+	badr	r9, ret_from_exception
 
 	@ IRQs must be enabled before attempting to read the instruction from
 	@ user space since that could cause a page/translation fault if the
@@ -486,7 +486,7 @@ __und_usr:
 	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
 	@ r4 = PC value for the faulting instruction
 	@ lr = 32-bit undefined instruction function
-	adr	lr, BSYM(__und_usr_fault_32)
+	badr	lr, __und_usr_fault_32
 	b	call_fpe
 
 __und_usr_thumb:
@@ -522,7 +522,7 @@ ARM_BE8(rev16	r0, r0)				@ little endian instruction
 	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
 	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
 	orr	r0, r0, r5, lsl #16
-	adr	lr, BSYM(__und_usr_fault_32)
+	badr	lr, __und_usr_fault_32
 	@ r0 = the two 16-bit Thumb instructions which caused the exception
 	@ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
 	@ r4 = PC value for the first 16-bit Thumb instruction
@@ -716,7 +716,7 @@ __und_usr_fault_32:
 __und_usr_fault_16:
 	mov	r1, #2
 1:	mov	r0, sp
-	adr	lr, BSYM(ret_from_exception)
+	badr	lr, ret_from_exception
 	b	__und_fault
 ENDPROC(__und_usr_fault_32)
 ENDPROC(__und_usr_fault_16)
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index f8ccc21..6ab1593 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -88,7 +88,7 @@ ENTRY(ret_from_fork)
 	bl	schedule_tail
 	cmp	r5, #0
 	movne	r0, r4
-	adrne	lr, BSYM(1f)
+	badrne	lr, 1f
 	retne	r5
 1:	get_thread_info tsk
 	b	ret_slow_syscall
@@ -196,7 +196,7 @@ local_restart:
 	bne	__sys_trace
 
 	cmp	scno, #NR_syscalls		@ check upper syscall limit
-	adr	lr, BSYM(ret_fast_syscall)	@ return address
+	badr	lr, ret_fast_syscall		@ return address
 	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
 
 	add	r1, sp, #S_OFF
@@ -231,7 +231,7 @@ __sys_trace:
 	add	r0, sp, #S_OFF
 	bl	syscall_trace_enter
 
-	adr	lr, BSYM(__sys_trace_return)	@ return address
+	badr	lr, __sys_trace_return		@ return address
 	mov	scno, r0			@ syscall number (possibly new)
 	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
 	cmp	scno, #NR_syscalls		@ check upper syscall limit
diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
index fe57c73..c73c403 100644
--- a/arch/arm/kernel/entry-ftrace.S
+++ b/arch/arm/kernel/entry-ftrace.S
@@ -87,7 +87,7 @@
 
 1: 	mcount_get_lr	r1			@ lr of instrumented func
 	mcount_adjust_addr	r0, lr		@ instrumented function
-	adr	lr, BSYM(2f)
+	badr	lr, 2f
 	mov	pc, r2
 2:	mcount_exit
 .endm
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index aebfbf7..b6f3cb6 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -46,7 +46,7 @@ ENTRY(stext)
 	.arm
 ENTRY(stext)
 
- THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
+ THUMB(	badr	r9, 1f		)	@ Kernel is always entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
  THUMB(	.thumb			)	@ switch to Thumb now.
  THUMB(1:			)
@@ -79,7 +79,7 @@ ENTRY(stext)
 #endif
 	ldr	r13, =__mmap_switched		@ address to jump to after
 						@ initialising sctlr
-	adr	lr, BSYM(1f)			@ return (PIC) address
+	badr	lr, 1f				@ return (PIC) address
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10
 	ret	r12
@@ -115,7 +115,7 @@ ENTRY(secondary_startup)
 	bl      __setup_mpu			@ Initialize the MPU
 #endif
 
-	adr	lr, BSYM(__after_proc_init)	@ return address
+	badr	lr, __after_proc_init		@ return address
 	mov	r13, r12			@ __secondary_switched address
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 3637973..ab3c478 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -80,7 +80,7 @@
 ENTRY(stext)
  ARM_BE8(setend	be )			@ ensure we are in BE8 mode
 
- THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
+ THUMB(	badr	r9, 1f		)	@ Kernel is always entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
  THUMB(	.thumb			)	@ switch to Thumb now.
  THUMB(1:			)
@@ -136,7 +136,7 @@ ENTRY(stext)
 	 */
 	ldr	r13, =__mmap_switched		@ address to jump to after
 						@ mmu has been enabled
-	adr	lr, BSYM(1f)			@ return (PIC) address
+	badr	lr, 1f				@ return (PIC) address
 	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10
@@ -348,7 +348,7 @@ __turn_mmu_on_loc:
 	.text
 ENTRY(secondary_startup_arm)
 	.arm
- THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is entered in ARM.
+ THUMB(	badr	r9, 1f		)	@ Kernel is entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
  THUMB(	.thumb			)	@ switch to Thumb now.
  THUMB(1:			)
@@ -384,7 +384,7 @@ ENTRY(secondary_startup)
 	ldr	r4, [r7, lr]			@ get secondary_data.pgdir
 	add	r7, r7, #4
 	ldr	r8, [r7, lr]			@ get secondary_data.swapper_pg_dir
-	adr	lr, BSYM(__enable_mmu)		@ return address
+	badr	lr, __enable_mmu		@ return address
 	mov	r13, r12			@ __secondary_switched address
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10			@ initialise processor
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 7d37bfc..76bb312 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -81,7 +81,7 @@ ENTRY(__cpu_suspend)
 	mov	r1, r4			@ size of save block
 	add	r0, sp, #8		@ pointer to save block
 	bl	__cpu_suspend_save
-	adr	lr, BSYM(cpu_suspend_abort)
+	badr	lr, cpu_suspend_abort
 	ldmfd	sp!, {r0, pc}		@ call suspend fn
 ENDPROC(__cpu_suspend)
 	.ltorg
diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S
index ed1a421..bf3a408 100644
--- a/arch/arm/lib/call_with_stack.S
+++ b/arch/arm/lib/call_with_stack.S
@@ -35,7 +35,7 @@ ENTRY(call_with_stack)
 	mov	r2, r0
 	mov	r0, r1
 
-	adr	lr, BSYM(1f)
+	badr	lr, 1f
 	ret	r2
 
 1:	ldr	lr, [sp]
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index e08e1f2..67d9209 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -98,7 +98,7 @@ __v7m_setup:
 	str	r5, [r0, V7M_SCB_SHPR3]	@ set PendSV priority
 
 	@ SVC to run the kernel in this mode
-	adr	r1, BSYM(1f)
+	badr	r1, 1f
 	ldr	r5, [r12, #11 * 4]	@ read the SVC vector entry
 	str	r1, [r12, #11 * 4]	@ write the temporary SVC vector entry
 	mov	r6, lr			@ save LR
-- 
cgit v1.1


From d965b0fca7dcde3f82c982e0bf1631069fdeb8c9 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 15 May 2015 11:56:45 +0100
Subject: ARM: l2c: restore the behaviour documented above l2c_enable()

l2c_enable() is documented that it must not be called if the cache has
already been enabled.  Unfortunately, commit 6b49241ac252 ("ARM: 8259/1:
l2c: Refactor the driver to use commit-like interface") changed this
without updating the comment, for very little reason.  Revert this
change and restore the expected behaviour.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index e309c8f..1471c0f 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -129,10 +129,6 @@ static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
 {
 	unsigned long flags;
 
-	/* Do not touch the controller if already enabled. */
-	if (readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)
-		return;
-
 	l2x0_saved_regs.aux_ctrl = aux;
 	l2c_configure(base);
 
@@ -163,7 +159,11 @@ static void l2c_save(void __iomem *base)
 
 static void l2c_resume(void)
 {
-	l2c_enable(l2x0_base, l2x0_saved_regs.aux_ctrl, l2x0_data->num_lock);
+	void __iomem *base = l2x0_base;
+
+	/* Do not touch the controller if already enabled. */
+	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
+		l2c_enable(base, l2x0_saved_regs.aux_ctrl, l2x0_data->num_lock);
 }
 
 /*
-- 
cgit v1.1


From 7705dd256ce363f8b01429efb2f0dc4d1ee23c89 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 15 May 2015 11:07:14 +0100
Subject: ARM: l2c: write auxiliary control register first

Before calling the controller specific configuration function, write
the auxiliary control register first, so that bits shared with other
registers (such as the prefetch control register) are not overwritten
by the later write to the auxctrl register.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 1471c0f..977eb9f 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -115,10 +115,10 @@ static void l2c_configure(void __iomem *base)
 		return;
 	}
 
+	l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL);
+
 	if (l2x0_data->configure)
 		l2x0_data->configure(base);
-
-	l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL);
 }
 
 /*
-- 
cgit v1.1


From 50beefde30224888d6d63224405ace4bdd4b32a0 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 15 May 2015 11:05:54 +0100
Subject: ARM: l2c: clean up l2c_configure()

l2c_configure() does not follow the pattern of other l2c_* functions.
Fix this so that it does to avoid future confusion.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 977eb9f..2864a7b 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -110,15 +110,7 @@ static inline void l2c_unlock(void __iomem *base, unsigned num)
 
 static void l2c_configure(void __iomem *base)
 {
-	if (outer_cache.configure) {
-		outer_cache.configure(&l2x0_saved_regs);
-		return;
-	}
-
 	l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL);
-
-	if (l2x0_data->configure)
-		l2x0_data->configure(base);
 }
 
 /*
@@ -130,7 +122,11 @@ static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
 	unsigned long flags;
 
 	l2x0_saved_regs.aux_ctrl = aux;
-	l2c_configure(base);
+
+	if (outer_cache.configure)
+		outer_cache.configure(&l2x0_saved_regs);
+	else
+		l2x0_data->configure(base);
 
 	l2c_unlock(base, num_lock);
 
@@ -252,6 +248,7 @@ static const struct l2c_init_data l2c210_data __initconst = {
 	.num_lock = 1,
 	.enable = l2c_enable,
 	.save = l2c_save,
+	.configure = l2c_configure,
 	.outer_cache = {
 		.inv_range = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -409,6 +406,7 @@ static const struct l2c_init_data l2c220_data = {
 	.num_lock = 1,
 	.enable = l2c220_enable,
 	.save = l2c_save,
+	.configure = l2c_configure,
 	.outer_cache = {
 		.inv_range = l2c220_inv_range,
 		.clean_range = l2c220_clean_range,
@@ -569,6 +567,8 @@ static void l2c310_configure(void __iomem *base)
 {
 	unsigned revision;
 
+	l2c_configure(base);
+
 	/* restore pl310 setup */
 	l2c_write_sec(l2x0_saved_regs.tag_latency, base,
 		      L310_TAG_LATENCY_CTRL);
@@ -1066,6 +1066,7 @@ static const struct l2c_init_data of_l2c210_data __initconst = {
 	.of_parse = l2x0_of_parse,
 	.enable = l2c_enable,
 	.save = l2c_save,
+	.configure = l2c_configure,
 	.outer_cache = {
 		.inv_range   = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -1084,6 +1085,7 @@ static const struct l2c_init_data of_l2c220_data __initconst = {
 	.of_parse = l2x0_of_parse,
 	.enable = l2c220_enable,
 	.save = l2c_save,
+	.configure = l2c_configure,
 	.outer_cache = {
 		.inv_range   = l2c220_inv_range,
 		.clean_range = l2c220_clean_range,
@@ -1416,6 +1418,7 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
 	.enable = l2c_enable,
 	.fixup = aurora_fixup,
 	.save  = aurora_save,
+	.configure = l2c_configure,
 	.outer_cache = {
 		.inv_range   = aurora_inv_range,
 		.clean_range = aurora_clean_range,
@@ -1435,6 +1438,7 @@ static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
 	.enable = aurora_enable_no_outer,
 	.fixup = aurora_fixup,
 	.save  = aurora_save,
+	.configure = l2c_configure,
 	.outer_cache = {
 		.resume      = l2c_resume,
 	},
@@ -1608,6 +1612,7 @@ static void __init tauros3_save(void __iomem *base)
 
 static void tauros3_configure(void __iomem *base)
 {
+	l2c_configure(base);
 	writel_relaxed(l2x0_saved_regs.aux2_ctrl,
 		       base + TAUROS3_AUX2_CTRL);
 	writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
-- 
cgit v1.1


From e946a8cbe4a47a7c2615ffb0d45712e72c7d0f3a Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 15 May 2015 11:51:51 +0100
Subject: ARM: l2c: only unlock caches if NS_LOCKDOWN bit is set

Some L2C caches have a bit which allows non-secure software to control
the cache lockdown.  Some platforms are unable to set this bit.  To
avoid receiving an abort while trying to unlock the cache lines, check
the state of this bit before unlocking.  We do this by providing a new
method in the l2c_init_data to perform the unlocking.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 2864a7b..95f3362 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -42,6 +42,7 @@ struct l2c_init_data {
 	void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
 	void (*save)(void __iomem *);
 	void (*configure)(void __iomem *);
+	void (*unlock)(void __iomem *, unsigned);
 	struct outer_cache_fns outer_cache;
 };
 
@@ -128,7 +129,7 @@ static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
 	else
 		l2x0_data->configure(base);
 
-	l2c_unlock(base, num_lock);
+	l2x0_data->unlock(base, num_lock);
 
 	local_irq_save(flags);
 	__l2c_op_way(base + L2X0_INV_WAY);
@@ -249,6 +250,7 @@ static const struct l2c_init_data l2c210_data __initconst = {
 	.enable = l2c_enable,
 	.save = l2c_save,
 	.configure = l2c_configure,
+	.unlock = l2c_unlock,
 	.outer_cache = {
 		.inv_range = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -400,6 +402,12 @@ static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock)
 	l2c_enable(base, aux, num_lock);
 }
 
+static void l2c220_unlock(void __iomem *base, unsigned num_lock)
+{
+	if (readl_relaxed(base + L2X0_AUX_CTRL) & L220_AUX_CTRL_NS_LOCKDOWN)
+		l2c_unlock(base, num_lock);
+}
+
 static const struct l2c_init_data l2c220_data = {
 	.type = "L2C-220",
 	.way_size_0 = SZ_8K,
@@ -407,6 +415,7 @@ static const struct l2c_init_data l2c220_data = {
 	.enable = l2c220_enable,
 	.save = l2c_save,
 	.configure = l2c_configure,
+	.unlock = l2c220_unlock,
 	.outer_cache = {
 		.inv_range = l2c220_inv_range,
 		.clean_range = l2c220_clean_range,
@@ -755,6 +764,12 @@ static void l2c310_resume(void)
 		set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
 }
 
+static void l2c310_unlock(void __iomem *base, unsigned num_lock)
+{
+	if (readl_relaxed(base + L2X0_AUX_CTRL) & L310_AUX_CTRL_NS_LOCKDOWN)
+		l2c_unlock(base, num_lock);
+}
+
 static const struct l2c_init_data l2c310_init_fns __initconst = {
 	.type = "L2C-310",
 	.way_size_0 = SZ_8K,
@@ -763,6 +778,7 @@ static const struct l2c_init_data l2c310_init_fns __initconst = {
 	.fixup = l2c310_fixup,
 	.save = l2c310_save,
 	.configure = l2c310_configure,
+	.unlock = l2c310_unlock,
 	.outer_cache = {
 		.inv_range = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -1067,6 +1083,7 @@ static const struct l2c_init_data of_l2c210_data __initconst = {
 	.enable = l2c_enable,
 	.save = l2c_save,
 	.configure = l2c_configure,
+	.unlock = l2c_unlock,
 	.outer_cache = {
 		.inv_range   = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -1086,6 +1103,7 @@ static const struct l2c_init_data of_l2c220_data __initconst = {
 	.enable = l2c220_enable,
 	.save = l2c_save,
 	.configure = l2c_configure,
+	.unlock = l2c220_unlock,
 	.outer_cache = {
 		.inv_range   = l2c220_inv_range,
 		.clean_range = l2c220_clean_range,
@@ -1213,6 +1231,7 @@ static const struct l2c_init_data of_l2c310_data __initconst = {
 	.fixup = l2c310_fixup,
 	.save  = l2c310_save,
 	.configure = l2c310_configure,
+	.unlock = l2c310_unlock,
 	.outer_cache = {
 		.inv_range   = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -1242,6 +1261,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
 	.fixup = l2c310_fixup,
 	.save  = l2c310_save,
 	.configure = l2c310_configure,
+	.unlock = l2c310_unlock,
 	.outer_cache = {
 		.inv_range   = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -1419,6 +1439,7 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
 	.fixup = aurora_fixup,
 	.save  = aurora_save,
 	.configure = l2c_configure,
+	.unlock = l2c_unlock,
 	.outer_cache = {
 		.inv_range   = aurora_inv_range,
 		.clean_range = aurora_clean_range,
@@ -1439,6 +1460,7 @@ static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
 	.fixup = aurora_fixup,
 	.save  = aurora_save,
 	.configure = l2c_configure,
+	.unlock = l2c_unlock,
 	.outer_cache = {
 		.resume      = l2c_resume,
 	},
@@ -1589,6 +1611,7 @@ static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
 	.enable = l2c310_enable,
 	.save  = l2c310_save,
 	.configure = l2c310_configure,
+	.unlock = l2c310_unlock,
 	.outer_cache = {
 		.inv_range   = bcm_inv_range,
 		.clean_range = bcm_clean_range,
@@ -1626,6 +1649,7 @@ static const struct l2c_init_data of_tauros3_data __initconst = {
 	.enable = l2c_enable,
 	.save  = tauros3_save,
 	.configure = tauros3_configure,
+	.unlock = l2c_unlock,
 	/* Tauros3 broadcasts L1 cache operations to L2 */
 	.outer_cache = {
 		.resume      = l2c_resume,
-- 
cgit v1.1


From 5b290ec2074c68b9f4f8f8789fa9b3e1782869e7 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 15 May 2015 12:03:29 +0100
Subject: ARM: l2c: avoid passing auxiliary control register through enable
 method

Avoid passing the auxiliary control register value through the enable
method.  In the resume path, we have to read the value stored in
l2x0_saved_regs.aux_ctrl, only to have it immediately written back by
l2c_enable().  We can avoid this if we have __l2c_init() save the value
directly to l2x0_saved_regs.aux_ctrl before calling the specific enable
method.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 95f3362..90599f6 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -38,7 +38,7 @@ struct l2c_init_data {
 	unsigned way_size_0;
 	unsigned num_lock;
 	void (*of_parse)(const struct device_node *, u32 *, u32 *);
-	void (*enable)(void __iomem *, u32, unsigned);
+	void (*enable)(void __iomem *, unsigned);
 	void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
 	void (*save)(void __iomem *);
 	void (*configure)(void __iomem *);
@@ -118,12 +118,10 @@ static void l2c_configure(void __iomem *base)
  * Enable the L2 cache controller.  This function must only be
  * called when the cache controller is known to be disabled.
  */
-static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
+static void l2c_enable(void __iomem *base, unsigned num_lock)
 {
 	unsigned long flags;
 
-	l2x0_saved_regs.aux_ctrl = aux;
-
 	if (outer_cache.configure)
 		outer_cache.configure(&l2x0_saved_regs);
 	else
@@ -160,7 +158,7 @@ static void l2c_resume(void)
 
 	/* Do not touch the controller if already enabled. */
 	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
-		l2c_enable(base, l2x0_saved_regs.aux_ctrl, l2x0_data->num_lock);
+		l2c_enable(base, l2x0_data->num_lock);
 }
 
 /*
@@ -390,16 +388,16 @@ static void l2c220_sync(void)
 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
-static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock)
+static void l2c220_enable(void __iomem *base, unsigned num_lock)
 {
 	/*
 	 * Always enable non-secure access to the lockdown registers -
 	 * we write to them as part of the L2C enable sequence so they
 	 * need to be accessible.
 	 */
-	aux |= L220_AUX_CTRL_NS_LOCKDOWN;
+	l2x0_saved_regs.aux_ctrl |= L220_AUX_CTRL_NS_LOCKDOWN;
 
-	l2c_enable(base, aux, num_lock);
+	l2c_enable(base, num_lock);
 }
 
 static void l2c220_unlock(void __iomem *base, unsigned num_lock)
@@ -612,10 +610,11 @@ static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, v
 	return NOTIFY_OK;
 }
 
-static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
+static void __init l2c310_enable(void __iomem *base, unsigned num_lock)
 {
 	unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK;
 	bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
+	u32 aux = l2x0_saved_regs.aux_ctrl;
 
 	if (rev >= L310_CACHE_ID_RTL_R2P0) {
 		if (cortex_a9) {
@@ -658,9 +657,9 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
 	 * we write to them as part of the L2C enable sequence so they
 	 * need to be accessible.
 	 */
-	aux |= L310_AUX_CTRL_NS_LOCKDOWN;
+	l2x0_saved_regs.aux_ctrl = aux | L310_AUX_CTRL_NS_LOCKDOWN;
 
-	l2c_enable(base, aux, num_lock);
+	l2c_enable(base, num_lock);
 
 	/* Read back resulting AUX_CTRL value as it could have been altered. */
 	aux = readl_relaxed(base + L2X0_AUX_CTRL);
@@ -872,8 +871,11 @@ static int __init __l2c_init(const struct l2c_init_data *data,
 	 * Check if l2x0 controller is already enabled.  If we are booting
 	 * in non-secure mode accessing the below registers will fault.
 	 */
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
-		data->enable(l2x0_base, aux, data->num_lock);
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+		l2x0_saved_regs.aux_ctrl = aux;
+
+		data->enable(l2x0_base, data->num_lock);
+	}
 
 	outer_cache = fns;
 
@@ -1388,7 +1390,7 @@ static void aurora_save(void __iomem *base)
  * For Aurora cache in no outer mode, enable via the CP15 coprocessor
  * broadcasting of cache commands to L2.
  */
-static void __init aurora_enable_no_outer(void __iomem *base, u32 aux,
+static void __init aurora_enable_no_outer(void __iomem *base,
 	unsigned num_lock)
 {
 	u32 u;
@@ -1399,7 +1401,7 @@ static void __init aurora_enable_no_outer(void __iomem *base, u32 aux,
 
 	isb();
 
-	l2c_enable(base, aux, num_lock);
+	l2c_enable(base, num_lock);
 }
 
 static void __init aurora_fixup(void __iomem *base, u32 cache_id,
-- 
cgit v1.1


From 982b465a2f7c90a11761a483d78594566619e405 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Mon, 18 May 2015 15:58:57 +0100
Subject: ARM: 8361/1: sa1100: add platform functions to handle PWER settings

PWER settings logically belongs neither to GPIO nor to system IRQ code.
Add special functions to handle PWER (for GPIO and for system IRQs)
from platform code.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-sa1100/generic.c | 24 ++++++++++++++++++++++++
 include/soc/sa1100/pwer.h      | 15 +++++++++++++++
 2 files changed, 39 insertions(+)
 create mode 100644 include/soc/sa1100/pwer.h

diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 40e0d86..c651f6e 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -23,6 +23,8 @@
 
 #include <video/sa1100fb.h>
 
+#include <soc/sa1100/pwer.h>
+
 #include <asm/div64.h>
 #include <asm/mach/map.h>
 #include <asm/mach/flash.h>
@@ -416,3 +418,25 @@ void sa1110_mb_enable(void)
 	local_irq_restore(flags);
 }
 
+int sa11x0_gpio_set_wake(unsigned int gpio, unsigned int on)
+{
+	if (on)
+		PWER |= BIT(gpio);
+	else
+		PWER &= ~BIT(gpio);
+
+	return 0;
+}
+
+int sa11x0_sc_set_wake(unsigned int irq, unsigned int on)
+{
+	if (BIT(irq) != IC_RTCAlrm)
+		return -EINVAL;
+
+	if (on)
+		PWER |= PWER_RTC;
+	else
+		PWER &= ~PWER_RTC;
+
+	return 0;
+}
diff --git a/include/soc/sa1100/pwer.h b/include/soc/sa1100/pwer.h
new file mode 100644
index 0000000..15a545b
--- /dev/null
+++ b/include/soc/sa1100/pwer.h
@@ -0,0 +1,15 @@
+#ifndef SOC_SA1100_PWER_H
+#define SOC_SA1100_PWER_H
+
+/*
+ * Copyright (C) 2015, Dmitry Eremin-Solenikov
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+int sa11x0_gpio_set_wake(unsigned int gpio, unsigned int on);
+int sa11x0_sc_set_wake(unsigned int irq, unsigned int on);
+
+#endif
-- 
cgit v1.1


From a657d7f679e841664c132cb691ffa44a6cd0bdb3 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Mon, 18 May 2015 16:01:19 +0100
Subject: ARM: 8362/1: sa1100: use sa11x0_sc_set_wake() in irq driver

Use new function controlling PWER register in IRQ driver.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-sa1100/irq.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
index 65aebfa..6afaa33 100644
--- a/arch/arm/mach-sa1100/irq.c
+++ b/arch/arm/mach-sa1100/irq.c
@@ -18,6 +18,8 @@
 #include <linux/ioport.h>
 #include <linux/syscore_ops.h>
 
+#include <soc/sa1100/pwer.h>
+
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <asm/mach/irq.h>
@@ -40,19 +42,9 @@ static void sa1100_unmask_irq(struct irq_data *d)
 	ICMR |= BIT(d->hwirq);
 }
 
-/*
- * Apart form GPIOs, only the RTC alarm can be a wakeup event.
- */
 static int sa1100_set_wake(struct irq_data *d, unsigned int on)
 {
-	if (BIT(d->hwirq) == IC_RTCAlrm) {
-		if (on)
-			PWER |= PWER_RTC;
-		else
-			PWER &= ~PWER_RTC;
-		return 0;
-	}
-	return -EINVAL;
+	return sa11x0_sc_set_wake(d->hwirq, on);
 }
 
 static struct irq_chip sa1100_normal_chip = {
-- 
cgit v1.1


From 60c06c4c6f85676a86826c7e9bbd727224a2795f Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Mon, 18 May 2015 16:02:47 +0100
Subject: ARM: 8363/1: sa1100: use ioremapped memory to access SC registers

Use ioremap() and readl/writel_relaxed() to access IRQ controller
registers.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-sa1100/irq.c | 52 ++++++++++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 18 deletions(-)

diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
index 6afaa33..08f929e 100644
--- a/arch/arm/mach-sa1100/irq.c
+++ b/arch/arm/mach-sa1100/irq.c
@@ -20,13 +20,19 @@
 
 #include <soc/sa1100/pwer.h>
 
-#include <mach/hardware.h>
 #include <mach/irqs.h>
-#include <asm/mach/irq.h>
 #include <asm/exception.h>
 
 #include "generic.h"
 
+#define ICIP	0x00  /* IC IRQ Pending reg. */
+#define ICMR	0x04  /* IC Mask Reg.        */
+#define ICLR	0x08  /* IC Level Reg.       */
+#define ICCR	0x0C  /* IC Control Reg.     */
+#define ICFP	0x10  /* IC FIQ Pending reg. */
+#define ICPR	0x20  /* IC Pending Reg.     */
+
+static void __iomem *iobase;
 
 /*
  * We don't need to ACK IRQs on the SA1100 unless they're GPIOs
@@ -34,12 +40,20 @@
  */
 static void sa1100_mask_irq(struct irq_data *d)
 {
-	ICMR &= ~BIT(d->hwirq);
+	u32 reg;
+
+	reg = readl_relaxed(iobase + ICMR);
+	reg &= ~BIT(d->hwirq);
+	writel_relaxed(reg, iobase + ICMR);
 }
 
 static void sa1100_unmask_irq(struct irq_data *d)
 {
-	ICMR |= BIT(d->hwirq);
+	u32 reg;
+
+	reg = readl_relaxed(iobase + ICMR);
+	reg |= BIT(d->hwirq);
+	writel_relaxed(reg, iobase + ICMR);
 }
 
 static int sa1100_set_wake(struct irq_data *d, unsigned int on)
@@ -87,16 +101,14 @@ static int sa1100irq_suspend(void)
 	struct sa1100irq_state *st = &sa1100irq_state;
 
 	st->saved = 1;
-	st->icmr = ICMR;
-	st->iclr = ICLR;
-	st->iccr = ICCR;
+	st->icmr = readl_relaxed(iobase + ICMR);
+	st->iclr = readl_relaxed(iobase + ICLR);
+	st->iccr = readl_relaxed(iobase + ICCR);
 
 	/*
 	 * Disable all GPIO-based interrupts.
 	 */
-	ICMR &= ~(IC_GPIO11_27|IC_GPIO10|IC_GPIO9|IC_GPIO8|IC_GPIO7|
-		  IC_GPIO6|IC_GPIO5|IC_GPIO4|IC_GPIO3|IC_GPIO2|
-		  IC_GPIO1|IC_GPIO0);
+	writel_relaxed(st->icmr & 0xfffff000, iobase + ICMR);
 
 	return 0;
 }
@@ -106,10 +118,10 @@ static void sa1100irq_resume(void)
 	struct sa1100irq_state *st = &sa1100irq_state;
 
 	if (st->saved) {
-		ICCR = st->iccr;
-		ICLR = st->iclr;
+		writel_relaxed(st->iccr, iobase + ICCR);
+		writel_relaxed(st->iclr, iobase + ICLR);
 
-		ICMR = st->icmr;
+		writel_relaxed(st->icmr, iobase + ICMR);
 	}
 }
 
@@ -132,8 +144,8 @@ sa1100_handle_irq(struct pt_regs *regs)
 	uint32_t icip, icmr, mask;
 
 	do {
-		icip = (ICIP);
-		icmr = (ICMR);
+		icip = readl_relaxed(iobase + ICIP);
+		icmr = readl_relaxed(iobase + ICMR);
 		mask = icip & icmr;
 
 		if (mask == 0)
@@ -148,17 +160,21 @@ void __init sa1100_init_irq(void)
 {
 	request_resource(&iomem_resource, &irq_resource);
 
+	iobase = ioremap(irq_resource.start, SZ_64K);
+	if (WARN_ON(!iobase))
+		return;
+
 	/* disable all IRQs */
-	ICMR = 0;
+	writel_relaxed(0, iobase + ICMR);
 
 	/* all IRQs are IRQ, not FIQ */
-	ICLR = 0;
+	writel_relaxed(0, iobase + ICLR);
 
 	/*
 	 * Whatever the doc says, this has to be set for the wait-on-irq
 	 * instruction to work... on a SA1100 rev 9 at least.
 	 */
-	ICCR = 1;
+	writel_relaxed(1, iobase + ICCR);
 
 	sa1100_normal_irqdomain = irq_domain_add_simple(NULL,
 			32, IRQ_GPIO0_SC,
-- 
cgit v1.1


From 66eb579e66ecfea55e2007be0594869ea9e453d4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 13 May 2015 17:12:23 +0100
Subject: perf: allow for PMU-specific event filtering

In certain circumstances it may not be possible to schedule particular
events due to constraints other than a lack of hardware counters (e.g.
on big.LITTLE systems where CPUs support different events). The core
perf event code does not distinguish these cases and pessimistically
assumes that any failure to schedule an event means that it is not worth
attempting to schedule later events, even if some hardware counters are
still unused.

When an event a pmu cannot schedule exists in a flexible group list it
can unnecessarily prevent event groups following it in the list from
being scheduled (until it is rotated to the end of the list). This means
some events are scheduled for only a portion of the time they could be,
and for short running programs no events may be scheduled if the list is
initially sorted in an unfortunate order.

This patch adds a new (optional) filter_match function pointer to struct
pmu which a pmu driver can use to tell perf core when an event matches
pmu-specific scheduling requirements. This plugs into the existing
event_filter_match logic, and makes it possible to avoid the scheduling
problem described above. When no filter is provided by the PMU, the
existing behaviour is retained.

Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/perf_event.h | 5 +++++
 kernel/events/core.c       | 8 +++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 61992cf..67c719c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -304,6 +304,11 @@ struct pmu {
 	 * Free pmu-private AUX data structures
 	 */
 	void (*free_aux)		(void *aux); /* optional */
+
+	/*
+	 * Filter events for PMU-specific reasons.
+	 */
+	int (*filter_match)		(struct perf_event *event); /* optional */
 };
 
 /**
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 81aa3a4..aaeb449 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1506,11 +1506,17 @@ static int __init perf_workqueue_init(void)
 
 core_initcall(perf_workqueue_init);
 
+static inline int pmu_filter_match(struct perf_event *event)
+{
+	struct pmu *pmu = event->pmu;
+	return pmu->filter_match ? pmu->filter_match(event) : 1;
+}
+
 static inline int
 event_filter_match(struct perf_event *event)
 {
 	return (event->cpu == -1 || event->cpu == smp_processor_id())
-	    && perf_cgroup_match(event);
+	    && perf_cgroup_match(event) && pmu_filter_match(event);
 }
 
 static void
-- 
cgit v1.1


From b3eac0265bf6258f08dcd4ac7fa7f87cc050defc Mon Sep 17 00:00:00 2001
From: Hou Pengyang <houpengyang@huawei.com>
Date: Fri, 8 May 2015 06:43:03 +0100
Subject: arm: perf: Fix callchain parse error with kernel tracepoint events

For ARM, when tracing with tracepoint events, the IP and cpsr are set
to 0, preventing the perf code parsing the callchain and resolving the
symbols correctly.

 ./perf record -e sched:sched_switch -g --call-graph dwarf ls
    [ perf record: Captured and wrote 0.006 MB perf.data ]
 ./perf report -f
    Samples: 5  of event 'sched:sched_switch', Event count (approx.): 5
    Children      Self    Command  Shared Object     Symbol
    100.00%       100.00%  ls       [unknown]         [.] 00000000

The fix is to implement perf_arch_fetch_caller_regs for ARM, which fills
several necessary registers used for callchain unwinding, including pc,sp,
fp and cpsr.

With this patch, callchain can be parsed correctly as :

   .....
-  100.00%   100.00%  ls       [kernel.kallsyms]  [k] __sched_text_start
   + __sched_text_start
+   20.00%     0.00%  ls       libc-2.18.so       [.] _dl_addr
+   20.00%     0.00%  ls       libc-2.18.so       [.] write
   .....

Jean Pihet found this in ARM and come up with a patch:
http://thread.gmane.org/gmane.linux.kernel/1734283/focus=1734280

This patch rewrite Jean's patch in C.

Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/perf_event.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index d9cf138..4f9dec4 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -19,4 +19,11 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs)	perf_misc_flags(regs)
 #endif
 
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+	(regs)->ARM_pc = (__ip); \
+	(regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \
+	(regs)->ARM_sp = current_stack_pointer; \
+	(regs)->ARM_cpsr = SVC_MODE; \
+}
+
 #endif /* __ARM_PERF_EVENT_H__ */
-- 
cgit v1.1


From 64d0d3943e14653fcfd5f9b3bd585bc77fa053df Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 13 May 2015 17:12:24 +0100
Subject: arm: perf: make of_pmu_irq_cfg take arm_pmu

To support multiple PMUs we'll need to pass the arm_pmu instance around.
Update of_pmu_irq_cfg to take an arm_pmu, and acquire the platform
device from this.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event_cpu.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 91c7ba1..2a9003e 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -301,9 +301,10 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 	return ret;
 }
 
-static int of_pmu_irq_cfg(struct platform_device *pdev)
+static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 {
 	int i;
+	struct platform_device *pdev = pmu->plat_device;
 	int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
 
 	if (!irqs)
@@ -336,7 +337,7 @@ static int of_pmu_irq_cfg(struct platform_device *pdev)
 	}
 
 	if (i == pdev->num_resources)
-		cpu_pmu->irq_affinity = irqs;
+		pmu->irq_affinity = irqs;
 	else
 		kfree(irqs);
 
@@ -368,7 +369,7 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
 		init_fn = of_id->data;
 
-		ret = of_pmu_irq_cfg(pdev);
+		ret = of_pmu_irq_cfg(pmu);
 		if (!ret)
 			ret = init_fn(pmu);
 	} else {
-- 
cgit v1.1


From cc88116da0d18b8292f5437dbc0c4683c8a34ac1 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 13 May 2015 17:12:25 +0100
Subject: arm: perf: treat PMUs as CPU affine

In multi-cluster systems, the PMUs can be different across clusters, and
so our logical PMU may not be able to schedule events on all CPUs.

This patch adds a cpumask to encode which CPUs a PMU driver supports
controlling events for, and limits the driver to scheduling events on
those CPUs, and enabling and disabling the physical PMUs on those CPUs.
The cpumask is built based on the interrupt-affinity property, and in
the absence of such a property a homogenous system is assumed.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pmu.h       |  1 +
 arch/arm/kernel/perf_event.c     | 25 +++++++++++++++++++++++++
 arch/arm/kernel/perf_event_cpu.c | 15 ++++++++++++---
 3 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 675e4ab..ecad26e 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -92,6 +92,7 @@ struct pmu_hw_events {
 struct arm_pmu {
 	struct pmu	pmu;
 	cpumask_t	active_irqs;
+	cpumask_t	supported_cpus;
 	int		*irq_affinity;
 	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 4a86a01..9b536be 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -11,6 +11,7 @@
  */
 #define pr_fmt(fmt) "hw perfevents: " fmt
 
+#include <linux/cpumask.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -229,6 +230,10 @@ armpmu_add(struct perf_event *event, int flags)
 	int idx;
 	int err = 0;
 
+	/* An event following a process won't be stopped earlier */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return -ENOENT;
+
 	perf_pmu_disable(event->pmu);
 
 	/* If we don't have a space for the counter then finish early. */
@@ -454,6 +459,17 @@ static int armpmu_event_init(struct perf_event *event)
 	int err = 0;
 	atomic_t *active_events = &armpmu->active_events;
 
+	/*
+	 * Reject CPU-affine events for CPUs that are of a different class to
+	 * that which this PMU handles. Process-following events (where
+	 * event->cpu == -1) can be migrated between CPUs, and thus we have to
+	 * reject them later (in armpmu_add) if they're scheduled on a
+	 * different class of CPU.
+	 */
+	if (event->cpu != -1 &&
+		!cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
+		return -ENOENT;
+
 	/* does not support taken branch sampling */
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
@@ -489,6 +505,10 @@ static void armpmu_enable(struct pmu *pmu)
 	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 
+	/* For task-bound events we may be called on other CPUs */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return;
+
 	if (enabled)
 		armpmu->start(armpmu);
 }
@@ -496,6 +516,11 @@ static void armpmu_enable(struct pmu *pmu)
 static void armpmu_disable(struct pmu *pmu)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+
+	/* For task-bound events we may be called on other CPUs */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return;
+
 	armpmu->stop(armpmu);
 }
 
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 2a9003e..9602d31 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -179,11 +179,15 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
 			  void *hcpu)
 {
+	int cpu = (unsigned long)hcpu;
 	struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
 
 	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
 		return NOTIFY_DONE;
 
+	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+		return NOTIFY_DONE;
+
 	if (pmu->reset)
 		pmu->reset(pmu);
 	else
@@ -219,7 +223,8 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 
 	/* Ensure the PMU has sane values out of reset. */
 	if (cpu_pmu->reset)
-		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+		on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset,
+			 cpu_pmu, 1);
 
 	/* If no interrupts available, set the corresponding capability flag */
 	if (!platform_get_irq(cpu_pmu->plat_device, 0))
@@ -334,12 +339,15 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 		}
 
 		irqs[i] = cpu;
+		cpumask_set_cpu(cpu, &pmu->supported_cpus);
 	}
 
-	if (i == pdev->num_resources)
+	if (i == pdev->num_resources) {
 		pmu->irq_affinity = irqs;
-	else
+	} else {
 		kfree(irqs);
+		cpumask_setall(&pmu->supported_cpus);
+	}
 
 	return 0;
 }
@@ -374,6 +382,7 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 			ret = init_fn(pmu);
 	} else {
 		ret = probe_current_pmu(pmu);
+		cpumask_setall(&pmu->supported_cpus);
 	}
 
 	if (ret) {
-- 
cgit v1.1


From c904e32a69b7c77905876fc834f474f13f62c138 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 13 May 2015 17:12:26 +0100
Subject: arm: perf: filter unschedulable events

Different CPU microarchitectures implement different PMU events, and
thus events which can be scheduled on one microarchitecture cannot be
scheduled on another, and vice-versa. Some archicted events behave
differently across microarchitectures, and thus cannot be meaningfully
summed. Due to this, we reject the scheduling of an event on a CPU of a
different microarchitecture to that the event targets.

When the core perf code is scheduling events and encounters an event
which cannot be scheduled, it stops attempting to schedule events. As
the perf core periodically rotates the list of events, for some
proportion of the time events which are unschedulable will block events
which are schedulable, resulting in low utilisation of the hardware
counters.

This patch implements a pmu::filter_match callback such that we can
detect and skip such events while scheduling early, before they can
block the schedulable events. This prevents the low HW counter
utilisation issue.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 9b536be..df02807 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -524,6 +524,18 @@ static void armpmu_disable(struct pmu *pmu)
 	armpmu->stop(armpmu);
 }
 
+/*
+ * In heterogeneous systems, events are specific to a particular
+ * microarchitecture, and aren't suitable for another. Thus, only match CPUs of
+ * the same microarchitecture.
+ */
+static int armpmu_filter_match(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	unsigned int cpu = smp_processor_id();
+	return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
+}
+
 #ifdef CONFIG_PM
 static int armpmu_runtime_resume(struct device *dev)
 {
@@ -564,6 +576,7 @@ static void armpmu_init(struct arm_pmu *armpmu)
 		.start		= armpmu_start,
 		.stop		= armpmu_stop,
 		.read		= armpmu_read,
+		.filter_match	= armpmu_filter_match,
 	};
 }
 
-- 
cgit v1.1


From 0e3038d18adcecf375c39ef5b39eb3c613293280 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 13 May 2015 17:12:27 +0100
Subject: arm: perf: probe number of counters on affine CPUs

In heterogeneous systems, the number of counters may differ across
clusters. To find the number of counters for a cluster, we must probe
the PMU from a CPU in that cluster.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event_v7.c | 48 ++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index f4207a4..ccec472 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1056,15 +1056,22 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->max_period	= (1LLU << 32) - 1;
 };
 
-static u32 armv7_read_num_pmnc_events(void)
+static void armv7_read_num_pmnc_events(void *info)
 {
-	u32 nb_cnt;
+	int *nb_cnt = info;
 
 	/* Read the nb of CNTx counters supported from PMNC */
-	nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
+	*nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
 
-	/* Add the CPU cycles counter and return */
-	return nb_cnt + 1;
+	/* Add the CPU cycles counter */
+	*nb_cnt += 1;
+}
+
+static int armv7_probe_num_events(struct arm_pmu *arm_pmu)
+{
+	return smp_call_function_any(&arm_pmu->supported_cpus,
+				     armv7_read_num_pmnc_events,
+				     &arm_pmu->num_events, 1);
 }
 
 static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1072,8 +1079,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a8";
 	cpu_pmu->map_event	= armv7_a8_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1081,8 +1087,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a9";
 	cpu_pmu->map_event	= armv7_a9_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1090,8 +1095,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a5";
 	cpu_pmu->map_event	= armv7_a5_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1099,9 +1103,8 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a15";
 	cpu_pmu->map_event	= armv7_a15_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1109,9 +1112,8 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a7";
 	cpu_pmu->map_event	= armv7_a7_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1119,16 +1121,15 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a12";
 	cpu_pmu->map_event	= armv7_a12_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv7_a12_pmu_init(cpu_pmu);
+	int ret = armv7_a12_pmu_init(cpu_pmu);
 	cpu_pmu->name = "armv7_cortex_a17";
-	return 0;
+	return ret;
 }
 
 /*
@@ -1508,14 +1509,13 @@ static int krait_pmu_init(struct arm_pmu *cpu_pmu)
 		cpu_pmu->map_event = krait_map_event_no_branch;
 	else
 		cpu_pmu->map_event = krait_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
 	cpu_pmu->reset		= krait_pmu_reset;
 	cpu_pmu->enable		= krait_pmu_enable_event;
 	cpu_pmu->disable	= krait_pmu_disable_event;
 	cpu_pmu->get_event_idx	= krait_pmu_get_event_idx;
 	cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx;
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 /*
@@ -1833,13 +1833,12 @@ static int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_scorpion";
 	cpu_pmu->map_event	= scorpion_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->reset		= scorpion_pmu_reset;
 	cpu_pmu->enable		= scorpion_pmu_enable_event;
 	cpu_pmu->disable	= scorpion_pmu_disable_event;
 	cpu_pmu->get_event_idx	= scorpion_pmu_get_event_idx;
 	cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1847,13 +1846,12 @@ static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_scorpion_mp";
 	cpu_pmu->map_event	= scorpion_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->reset		= scorpion_pmu_reset;
 	cpu_pmu->enable		= scorpion_pmu_enable_event;
 	cpu_pmu->disable	= scorpion_pmu_disable_event;
 	cpu_pmu->get_event_idx	= scorpion_pmu_get_event_idx;
 	cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 #else
 static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
-- 
cgit v1.1


From 7a2a24cb433e0932828d77e98d5c86606cb09c2a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 13 May 2015 17:12:28 +0100
Subject: arm: perf: remove singleton PMU restriction

Now that we can describe PMUs in heterogeneous systems, the only item in
the way of perf support for big.LITTLE is the singleton cpu_pmu variable
used for OProfile compatibility.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event_cpu.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 9602d31..50f245b 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -33,7 +33,7 @@
 #include <asm/pmu.h>
 
 /* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
+static struct arm_pmu *__oprofile_cpu_pmu;
 
 /*
  * Despite the names, these two functions are CPU-specific and are used
@@ -41,10 +41,10 @@ static struct arm_pmu *cpu_pmu;
  */
 const char *perf_pmu_name(void)
 {
-	if (!cpu_pmu)
+	if (!__oprofile_cpu_pmu)
 		return NULL;
 
-	return cpu_pmu->name;
+	return __oprofile_cpu_pmu->name;
 }
 EXPORT_SYMBOL_GPL(perf_pmu_name);
 
@@ -52,8 +52,8 @@ int perf_num_counters(void)
 {
 	int max_events = 0;
 
-	if (cpu_pmu != NULL)
-		max_events = cpu_pmu->num_events;
+	if (__oprofile_cpu_pmu != NULL)
+		max_events = __oprofile_cpu_pmu->num_events;
 
 	return max_events;
 }
@@ -360,19 +360,16 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 	struct arm_pmu *pmu;
 	int ret = -ENODEV;
 
-	if (cpu_pmu) {
-		pr_info("attempt to register multiple PMU devices!\n");
-		return -ENOSPC;
-	}
-
 	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
 	if (!pmu) {
 		pr_info("failed to allocate PMU device!\n");
 		return -ENOMEM;
 	}
 
-	cpu_pmu = pmu;
-	cpu_pmu->plat_device = pdev;
+	if (!__oprofile_cpu_pmu)
+		__oprofile_cpu_pmu = pmu;
+
+	pmu->plat_device = pdev;
 
 	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
 		init_fn = of_id->data;
@@ -390,18 +387,18 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 		goto out_free;
 	}
 
-	ret = cpu_pmu_init(cpu_pmu);
+	ret = cpu_pmu_init(pmu);
 	if (ret)
 		goto out_free;
 
-	ret = armpmu_register(cpu_pmu, -1);
+	ret = armpmu_register(pmu, -1);
 	if (ret)
 		goto out_destroy;
 
 	return 0;
 
 out_destroy:
-	cpu_pmu_destroy(cpu_pmu);
+	cpu_pmu_destroy(pmu);
 out_free:
 	pr_info("failed to register PMU devices!\n");
 	kfree(pmu);
-- 
cgit v1.1


From 4c9e0f76a55a38757390d43c40b2c7c3564a855a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 23 May 2015 09:31:54 +0100
Subject: ARM: 8370/1: hisi: fix hip04 build without HOTPLUG_CPU

The hip04 smp implementation provides the hotplug operations (cpu_die
and cpu_kill) unconditionally at the moment, which leads to a build
error when HOTPLUG_CPU is disabled:

mach-hisi/platmcpm.c:242:13: note: (near initialization for 'hip04_smp_ops')
mach-hisi/platmcpm.c:242:2: error: unknown field 'cpu_die' specified in initializer
mach-hisi/platmcpm.c:243:2: error: unknown field 'cpu_kill' specified in initializer

This uses an #ifdef to remove the code from the build when that
option is not set.

Fixes: 905cdf9dda5d ("ARM: hisi/hip04: remove the MCPM overhead")

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Nicolas Pitre <nico@xxxxxxxxxx>
Acked-by: Wei Xu <xuwei5@hisilicon.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-hisi/platmcpm.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c
index 880cbfa..b5f8f5f 100644
--- a/arch/arm/mach-hisi/platmcpm.c
+++ b/arch/arm/mach-hisi/platmcpm.c
@@ -152,6 +152,7 @@ out:
 	return 0;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 static void hip04_cpu_die(unsigned int l_cpu)
 {
 	unsigned int mpidr, cpu, cluster;
@@ -236,11 +237,14 @@ err:
 	spin_unlock_irq(&boot_lock);
 	return 0;
 }
+#endif
 
 static struct smp_operations __initdata hip04_smp_ops = {
 	.smp_boot_secondary	= hip04_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
 	.cpu_die		= hip04_cpu_die,
 	.cpu_kill		= hip04_cpu_kill,
+#endif
 };
 
 static bool __init hip04_cpu_table_init(void)
-- 
cgit v1.1


From 0361748f3b4a1cd73657a0a44fc3bc71ea30e8eb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 26 May 2015 15:36:58 +0100
Subject: ARM: 8371/1: always select IRQ_WORK on SMP

Any SMP kernel now requires the irq_work code after
generic_smp_call_function_single_interrupt() started using it,
or we get:

kernel/built-in.o: In function `flush_smp_call_function_queue':
:(.text+0x4dc3a): undefined reference to `irq_work_run'

Fixes: 478850160636c4f ("irq_work: Implement remote queueing")

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 45df48b..81c8b91 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1307,6 +1307,7 @@ config SMP
 	depends on GENERIC_CLOCKEVENTS
 	depends on HAVE_SMP
 	depends on MMU || ARM_MPU
+	select IRQ_WORK
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, say N. If you have a system with more
-- 
cgit v1.1


From cfeec79eb2587e0efc43c219558d70939d31bdc9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 26 May 2015 15:38:01 +0100
Subject: ARM: 8372/1: KGDB does not build on BE32

KGDB requires code patching, which only works on little-endian
or newer big-endian (BE8) machines but not on the older big-endian
ones (BE32) where it results in this build error:

arch/arm/kernel/patch.c: In function '__patch_text_real':
arch/arm/kernel/patch.c:93:4: error: implicit declaration of function '__opcode_to_mem_thumb32' [-Werror=implicit-function-declaration]
    insn = __opcode_to_mem_thumb32(insn);

This adds a Kconfig dependency to avoid the broken case and
for all other symbols that require code patching.

Fixes: 23a4e4050ba9 ("arm: kgdb: Handle read-only text / modules")

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 81c8b91..1a2ca2e 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -31,8 +31,8 @@ config ARM
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
 	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
-	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
-	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32
+	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32
 	select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_BPF_JIT
@@ -43,7 +43,7 @@ config ARM
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_CONTIGUOUS if MMU
-	select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
+	select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
 	select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
 	select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
@@ -57,7 +57,7 @@ config ARM
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_XZ
-	select HAVE_KPROBES if !XIP_KERNEL
+	select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32
 	select HAVE_KRETPROBES if (HAVE_KPROBES)
 	select HAVE_MEMBLOCK
 	select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
-- 
cgit v1.1


From 8c36a75748d0208e41e6fe7817c631ac8075eeef Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 26 May 2015 15:39:36 +0100
Subject: ARM: 8373/1: disable branch profiling in uncompressor

The branch profiling code cannot work outside of the main
kernel and just causes link errors if we try to use it in
the decompressor. Disabling it here matches what we do
for other architectures.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/boot/compressed/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 6e1fb2b..7a13aeb 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -103,6 +103,8 @@ extra-y       += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \
 		 lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs) \
 		 hyp-stub.S
 
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+
 ifeq ($(CONFIG_FUNCTION_TRACER),y)
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
-- 
cgit v1.1


From c32b76557287a032d1a38032f55c50d3a27dc743 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 26 May 2015 15:40:16 +0100
Subject: ARM: 8374/1: no longer expose CPU_ARM7TDMI/CPU_ARM9TDMI

Atmel at91x40 is gone, so we no longer have any platform using
either of these two, and we get randconfig failures on NOMMU
kernels if they accidentally get enabled on something that conflicts
with ARMv4T.

This stops short of removing the entire CPU support for now,
but as nothing selects these, it is basically dead code.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index b4f92b9..7188d63 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -6,7 +6,7 @@ comment "Processor Type"
 
 # ARM7TDMI
 config CPU_ARM7TDMI
-	bool "Support ARM7TDMI processor"
+	bool
 	depends on !MMU
 	select CPU_32v4T
 	select CPU_ABRT_LV4T
@@ -56,7 +56,7 @@ config CPU_ARM740T
 
 # ARM9TDMI
 config CPU_ARM9TDMI
-	bool "Support ARM9TDMI processor"
+	bool
 	depends on !MMU
 	select CPU_32v4T
 	select CPU_ABRT_NOMMU
-- 
cgit v1.1


From cb1293e2f594558ccc3995acf9c9d7400b3add36 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 26 May 2015 15:40:44 +0100
Subject: ARM: 8375/1: disable some options on ARMv7-M

Kprobes, irqflags tracing and kexec don't currently build on
kernels targetting ARMv7-M, so for now, we should just disallow
those combinations.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 1a2ca2e..0536e8d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -57,7 +57,7 @@ config ARM
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_XZ
-	select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32
+	select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M
 	select HAVE_KRETPROBES if (HAVE_KPROBES)
 	select HAVE_MEMBLOCK
 	select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
@@ -171,7 +171,7 @@ config LOCKDEP_SUPPORT
 
 config TRACE_IRQFLAGS_SUPPORT
 	bool
-	default y
+	default !CPU_V7M
 
 config RWSEM_XCHGADD_ALGORITHM
 	bool
@@ -1952,6 +1952,7 @@ config XIP_PHYS_ADDR
 config KEXEC
 	bool "Kexec system call (EXPERIMENTAL)"
 	depends on (!SMP || PM_SLEEP_SMP)
+	depends on !CPU_V7M
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
-- 
cgit v1.1


From 65ba508d215184746450467ce75685df255720b4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 26 May 2015 16:07:51 +0100
Subject: ARM: 8379/1: disable CONFIG_PTDUMP on !MMU

It's obviously pointless to dump page tables when the MMU is disabled,
and even trying to build this code results in numerous build errors
like these:

../arch/arm/mm/dump.c:56:11: error: 'L_PTE_USER' undeclared here (not in a function)
   .mask = L_PTE_USER,
           ^
../arch/arm/mm/dump.c:61:11: error: 'L_PTE_RDONLY' undeclared here (not in a function)
   .mask = L_PTE_RDONLY,
           ^
../arch/arm/mm/dump.c:66:11: error: 'L_PTE_XN' undeclared here (not in a function)
   .mask = L_PTE_XN,
           ^

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig.debug | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 0c12ffb..0c5ff3e 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -5,6 +5,7 @@ source "lib/Kconfig.debug"
 config ARM_PTDUMP
 	bool "Export kernel pagetable layout to userspace via debugfs"
 	depends on DEBUG_KERNEL
+	depends on MMU
 	select DEBUG_FS
 	---help---
 	  Say Y here if you want to show the kernel pagetable layout in a
-- 
cgit v1.1


From 6e27549bbcdb35975d5c157a2170b81bcb1291db Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 26 May 2015 16:08:46 +0100
Subject: ARM: 8380/1: bpf: fix NOMMU build

arch/arm/net/built-in.o: In function `bpf_jit_compile':
:(.text+0x2758): undefined reference to `set_memory_ro'
arch/arm/net/built-in.o: In function `bpf_jit_free':
:(.text+0x27ac): undefined reference to `set_memory_rw'

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cacheflush.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 2d46862..4812cda 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -482,10 +482,17 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size)
 	: : : "r0","r1","r2","r3","r4","r5","r6","r7", \
 	      "r9","r10","lr","memory" )
 
+#ifdef CONFIG_MMU
 int set_memory_ro(unsigned long addr, int numpages);
 int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
+#else
+static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
+#endif
 
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
-- 
cgit v1.1


From 90543ec8290b31b15fc9dd6de8e9c2b6347a9059 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 26 May 2015 16:10:02 +0100
Subject: ARM: 8381/1: fix ARMv4+Feroceon multiplatform build

The feroceon copypage implementation cannot be built when targetting an
ARMv4 CPU, so we need to pass the march=armv5te flag manually to gcc
when building this file. This is obviously safe since that code will
not be executed on ARMv4.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index d3afdf9..39a44bb 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -55,6 +55,8 @@ obj-$(CONFIG_CPU_XSCALE)	+= copypage-xscale.o
 obj-$(CONFIG_CPU_XSC3)		+= copypage-xsc3.o
 obj-$(CONFIG_CPU_COPY_FA)	+= copypage-fa.o
 
+CFLAGS_copypage-feroceon.o := -march=armv5te
+
 obj-$(CONFIG_CPU_TLB_V4WT)	+= tlb-v4.o
 obj-$(CONFIG_CPU_TLB_V4WB)	+= tlb-v4wb.o
 obj-$(CONFIG_CPU_TLB_V4WBI)	+= tlb-v4wbi.o
-- 
cgit v1.1


From 85e6f09785bd06f0510bdb00c40772c7f8da3c43 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Tue, 19 May 2015 16:16:14 +0100
Subject: ARM: 8367/1: sa1100: prepare for moving irq driver to drivers/irqchip

Prepare for moving sa1100 irq driver to irqchip infrastructure - split
sa1100_init_irq into helper code and irq parts.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-sa1100/generic.c     | 13 +++++++++++++
 arch/arm/mach-sa1100/irq.c         | 21 ++++++---------------
 include/linux/irqchip/irq-sa11x0.h | 17 +++++++++++++++++
 3 files changed, 36 insertions(+), 15 deletions(-)
 create mode 100644 include/linux/irqchip/irq-sa11x0.h

diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index c651f6e..345e63f 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -20,6 +20,7 @@
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
+#include <linux/irqchip/irq-sa11x0.h>
 
 #include <video/sa1100fb.h>
 
@@ -377,6 +378,18 @@ void __init sa1100_timer_init(void)
 	pxa_timer_nodt_init(IRQ_OST0, io_p2v(0x90000000), 3686400);
 }
 
+static struct resource irq_resource =
+	DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs");
+
+void __init sa1100_init_irq(void)
+{
+	request_resource(&iomem_resource, &irq_resource);
+
+	sa11x0_init_irq_nodt(IRQ_GPIO0_SC, irq_resource.start);
+
+	sa1100_init_gpio();
+}
+
 /*
  * Disable the memory bus request/grant signals on the SA1110 to
  * ensure that we don't receive spurious memory requests.  We set
diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
index 08f929e..fdec5ed 100644
--- a/arch/arm/mach-sa1100/irq.c
+++ b/arch/arm/mach-sa1100/irq.c
@@ -1,9 +1,10 @@
 /*
  * linux/arch/arm/mach-sa1100/irq.c
  *
+ * Copyright (C) 2015 Dmitry Eremin-Solenikov
  * Copyright (C) 1999-2001 Nicolas Pitre
  *
- * Generic IRQ handling for the SA11x0, GPIO 11-27 IRQ demultiplexing.
+ * Generic IRQ handling for the SA11x0.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -15,16 +16,13 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
-#include <linux/ioport.h>
 #include <linux/syscore_ops.h>
+#include <linux/irqchip/irq-sa11x0.h>
 
 #include <soc/sa1100/pwer.h>
 
-#include <mach/irqs.h>
 #include <asm/exception.h>
 
-#include "generic.h"
-
 #define ICIP	0x00  /* IC IRQ Pending reg. */
 #define ICMR	0x04  /* IC Mask Reg.        */
 #define ICLR	0x08  /* IC Level Reg.       */
@@ -86,9 +84,6 @@ static struct irq_domain_ops sa1100_normal_irqdomain_ops = {
 
 static struct irq_domain *sa1100_normal_irqdomain;
 
-static struct resource irq_resource =
-	DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs");
-
 static struct sa1100irq_state {
 	unsigned int	saved;
 	unsigned int	icmr;
@@ -156,11 +151,9 @@ sa1100_handle_irq(struct pt_regs *regs)
 	} while (1);
 }
 
-void __init sa1100_init_irq(void)
+void __init sa11x0_init_irq_nodt(int irq_start, resource_size_t io_start)
 {
-	request_resource(&iomem_resource, &irq_resource);
-
-	iobase = ioremap(irq_resource.start, SZ_64K);
+	iobase = ioremap(io_start, SZ_64K);
 	if (WARN_ON(!iobase))
 		return;
 
@@ -177,10 +170,8 @@ void __init sa1100_init_irq(void)
 	writel_relaxed(1, iobase + ICCR);
 
 	sa1100_normal_irqdomain = irq_domain_add_simple(NULL,
-			32, IRQ_GPIO0_SC,
+			32, irq_start,
 			&sa1100_normal_irqdomain_ops, NULL);
 
 	set_handle_irq(sa1100_handle_irq);
-
-	sa1100_init_gpio();
 }
diff --git a/include/linux/irqchip/irq-sa11x0.h b/include/linux/irqchip/irq-sa11x0.h
new file mode 100644
index 0000000..15db682
--- /dev/null
+++ b/include/linux/irqchip/irq-sa11x0.h
@@ -0,0 +1,17 @@
+/*
+ * Generic IRQ handling for the SA11x0.
+ *
+ * Copyright (C) 2015 Dmitry Eremin-Solenikov
+ * Copyright (C) 1999-2001 Nicolas Pitre
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __INCLUDE_LINUX_IRQCHIP_IRQ_SA11x0_H
+#define __INCLUDE_LINUX_IRQCHIP_IRQ_SA11x0_H
+
+void __init sa11x0_init_irq_nodt(int irq_start, resource_size_t io_start);
+
+#endif
-- 
cgit v1.1


From 22b67acd99c00e3fdc66869cd43fd539bab0ef8c Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Tue, 19 May 2015 16:17:09 +0100
Subject: ARM: 8368/1: sa1100: move irq driver to drivers/irqchip/

Move current sa11x0 IRQ driver to the irqchip subsystem.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-sa1100/Makefile |   2 +-
 arch/arm/mach-sa1100/irq.c    | 177 ------------------------------------------
 drivers/irqchip/Makefile      |   1 +
 drivers/irqchip/irq-sa11x0.c  | 175 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 177 insertions(+), 178 deletions(-)
 delete mode 100644 arch/arm/mach-sa1100/irq.c
 create mode 100644 drivers/irqchip/irq-sa11x0.c

diff --git a/arch/arm/mach-sa1100/Makefile b/arch/arm/mach-sa1100/Makefile
index 61ff91e..ebc4d58 100644
--- a/arch/arm/mach-sa1100/Makefile
+++ b/arch/arm/mach-sa1100/Makefile
@@ -3,7 +3,7 @@
 #
 
 # Common support
-obj-y := clock.o generic.o irq.o #nmi-oopser.o
+obj-y := clock.o generic.o #nmi-oopser.o
 
 # Specific board support
 obj-$(CONFIG_SA1100_ASSABET)		+= assabet.o
diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
deleted file mode 100644
index fdec5ed..0000000
--- a/arch/arm/mach-sa1100/irq.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * linux/arch/arm/mach-sa1100/irq.c
- *
- * Copyright (C) 2015 Dmitry Eremin-Solenikov
- * Copyright (C) 1999-2001 Nicolas Pitre
- *
- * Generic IRQ handling for the SA11x0.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/irqdomain.h>
-#include <linux/syscore_ops.h>
-#include <linux/irqchip/irq-sa11x0.h>
-
-#include <soc/sa1100/pwer.h>
-
-#include <asm/exception.h>
-
-#define ICIP	0x00  /* IC IRQ Pending reg. */
-#define ICMR	0x04  /* IC Mask Reg.        */
-#define ICLR	0x08  /* IC Level Reg.       */
-#define ICCR	0x0C  /* IC Control Reg.     */
-#define ICFP	0x10  /* IC FIQ Pending reg. */
-#define ICPR	0x20  /* IC Pending Reg.     */
-
-static void __iomem *iobase;
-
-/*
- * We don't need to ACK IRQs on the SA1100 unless they're GPIOs
- * this is for internal IRQs i.e. from IRQ LCD to RTCAlrm.
- */
-static void sa1100_mask_irq(struct irq_data *d)
-{
-	u32 reg;
-
-	reg = readl_relaxed(iobase + ICMR);
-	reg &= ~BIT(d->hwirq);
-	writel_relaxed(reg, iobase + ICMR);
-}
-
-static void sa1100_unmask_irq(struct irq_data *d)
-{
-	u32 reg;
-
-	reg = readl_relaxed(iobase + ICMR);
-	reg |= BIT(d->hwirq);
-	writel_relaxed(reg, iobase + ICMR);
-}
-
-static int sa1100_set_wake(struct irq_data *d, unsigned int on)
-{
-	return sa11x0_sc_set_wake(d->hwirq, on);
-}
-
-static struct irq_chip sa1100_normal_chip = {
-	.name		= "SC",
-	.irq_ack	= sa1100_mask_irq,
-	.irq_mask	= sa1100_mask_irq,
-	.irq_unmask	= sa1100_unmask_irq,
-	.irq_set_wake	= sa1100_set_wake,
-};
-
-static int sa1100_normal_irqdomain_map(struct irq_domain *d,
-		unsigned int irq, irq_hw_number_t hwirq)
-{
-	irq_set_chip_and_handler(irq, &sa1100_normal_chip,
-				 handle_level_irq);
-	set_irq_flags(irq, IRQF_VALID);
-
-	return 0;
-}
-
-static struct irq_domain_ops sa1100_normal_irqdomain_ops = {
-	.map = sa1100_normal_irqdomain_map,
-	.xlate = irq_domain_xlate_onetwocell,
-};
-
-static struct irq_domain *sa1100_normal_irqdomain;
-
-static struct sa1100irq_state {
-	unsigned int	saved;
-	unsigned int	icmr;
-	unsigned int	iclr;
-	unsigned int	iccr;
-} sa1100irq_state;
-
-static int sa1100irq_suspend(void)
-{
-	struct sa1100irq_state *st = &sa1100irq_state;
-
-	st->saved = 1;
-	st->icmr = readl_relaxed(iobase + ICMR);
-	st->iclr = readl_relaxed(iobase + ICLR);
-	st->iccr = readl_relaxed(iobase + ICCR);
-
-	/*
-	 * Disable all GPIO-based interrupts.
-	 */
-	writel_relaxed(st->icmr & 0xfffff000, iobase + ICMR);
-
-	return 0;
-}
-
-static void sa1100irq_resume(void)
-{
-	struct sa1100irq_state *st = &sa1100irq_state;
-
-	if (st->saved) {
-		writel_relaxed(st->iccr, iobase + ICCR);
-		writel_relaxed(st->iclr, iobase + ICLR);
-
-		writel_relaxed(st->icmr, iobase + ICMR);
-	}
-}
-
-static struct syscore_ops sa1100irq_syscore_ops = {
-	.suspend	= sa1100irq_suspend,
-	.resume		= sa1100irq_resume,
-};
-
-static int __init sa1100irq_init_devicefs(void)
-{
-	register_syscore_ops(&sa1100irq_syscore_ops);
-	return 0;
-}
-
-device_initcall(sa1100irq_init_devicefs);
-
-static asmlinkage void __exception_irq_entry
-sa1100_handle_irq(struct pt_regs *regs)
-{
-	uint32_t icip, icmr, mask;
-
-	do {
-		icip = readl_relaxed(iobase + ICIP);
-		icmr = readl_relaxed(iobase + ICMR);
-		mask = icip & icmr;
-
-		if (mask == 0)
-			break;
-
-		handle_domain_irq(sa1100_normal_irqdomain,
-				ffs(mask) - 1, regs);
-	} while (1);
-}
-
-void __init sa11x0_init_irq_nodt(int irq_start, resource_size_t io_start)
-{
-	iobase = ioremap(io_start, SZ_64K);
-	if (WARN_ON(!iobase))
-		return;
-
-	/* disable all IRQs */
-	writel_relaxed(0, iobase + ICMR);
-
-	/* all IRQs are IRQ, not FIQ */
-	writel_relaxed(0, iobase + ICLR);
-
-	/*
-	 * Whatever the doc says, this has to be set for the wait-on-irq
-	 * instruction to work... on a SA1100 rev 9 at least.
-	 */
-	writel_relaxed(1, iobase + ICCR);
-
-	sa1100_normal_irqdomain = irq_domain_add_simple(NULL,
-			32, irq_start,
-			&sa1100_normal_irqdomain_ops, NULL);
-
-	set_handle_irq(sa1100_handle_irq);
-}
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index dda4927..49f372a 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -47,3 +47,4 @@ obj-$(CONFIG_KEYSTONE_IRQ)		+= irq-keystone.o
 obj-$(CONFIG_MIPS_GIC)			+= irq-mips-gic.o
 obj-$(CONFIG_ARCH_MEDIATEK)		+= irq-mtk-sysirq.o
 obj-$(CONFIG_ARCH_DIGICOLOR)		+= irq-digicolor.o
+obj-$(CONFIG_ARCH_SA1100)		+= irq-sa11x0.o
diff --git a/drivers/irqchip/irq-sa11x0.c b/drivers/irqchip/irq-sa11x0.c
new file mode 100644
index 0000000..97d257b
--- /dev/null
+++ b/drivers/irqchip/irq-sa11x0.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2015 Dmitry Eremin-Solenikov
+ * Copyright (C) 1999-2001 Nicolas Pitre
+ *
+ * Generic IRQ handling for the SA11x0.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/syscore_ops.h>
+#include <linux/irqchip/irq-sa11x0.h>
+
+#include <soc/sa1100/pwer.h>
+
+#include <asm/exception.h>
+
+#define ICIP	0x00  /* IC IRQ Pending reg. */
+#define ICMR	0x04  /* IC Mask Reg.        */
+#define ICLR	0x08  /* IC Level Reg.       */
+#define ICCR	0x0C  /* IC Control Reg.     */
+#define ICFP	0x10  /* IC FIQ Pending reg. */
+#define ICPR	0x20  /* IC Pending Reg.     */
+
+static void __iomem *iobase;
+
+/*
+ * We don't need to ACK IRQs on the SA1100 unless they're GPIOs
+ * this is for internal IRQs i.e. from IRQ LCD to RTCAlrm.
+ */
+static void sa1100_mask_irq(struct irq_data *d)
+{
+	u32 reg;
+
+	reg = readl_relaxed(iobase + ICMR);
+	reg &= ~BIT(d->hwirq);
+	writel_relaxed(reg, iobase + ICMR);
+}
+
+static void sa1100_unmask_irq(struct irq_data *d)
+{
+	u32 reg;
+
+	reg = readl_relaxed(iobase + ICMR);
+	reg |= BIT(d->hwirq);
+	writel_relaxed(reg, iobase + ICMR);
+}
+
+static int sa1100_set_wake(struct irq_data *d, unsigned int on)
+{
+	return sa11x0_sc_set_wake(d->hwirq, on);
+}
+
+static struct irq_chip sa1100_normal_chip = {
+	.name		= "SC",
+	.irq_ack	= sa1100_mask_irq,
+	.irq_mask	= sa1100_mask_irq,
+	.irq_unmask	= sa1100_unmask_irq,
+	.irq_set_wake	= sa1100_set_wake,
+};
+
+static int sa1100_normal_irqdomain_map(struct irq_domain *d,
+		unsigned int irq, irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &sa1100_normal_chip,
+				 handle_level_irq);
+	set_irq_flags(irq, IRQF_VALID);
+
+	return 0;
+}
+
+static struct irq_domain_ops sa1100_normal_irqdomain_ops = {
+	.map = sa1100_normal_irqdomain_map,
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+static struct irq_domain *sa1100_normal_irqdomain;
+
+static struct sa1100irq_state {
+	unsigned int	saved;
+	unsigned int	icmr;
+	unsigned int	iclr;
+	unsigned int	iccr;
+} sa1100irq_state;
+
+static int sa1100irq_suspend(void)
+{
+	struct sa1100irq_state *st = &sa1100irq_state;
+
+	st->saved = 1;
+	st->icmr = readl_relaxed(iobase + ICMR);
+	st->iclr = readl_relaxed(iobase + ICLR);
+	st->iccr = readl_relaxed(iobase + ICCR);
+
+	/*
+	 * Disable all GPIO-based interrupts.
+	 */
+	writel_relaxed(st->icmr & 0xfffff000, iobase + ICMR);
+
+	return 0;
+}
+
+static void sa1100irq_resume(void)
+{
+	struct sa1100irq_state *st = &sa1100irq_state;
+
+	if (st->saved) {
+		writel_relaxed(st->iccr, iobase + ICCR);
+		writel_relaxed(st->iclr, iobase + ICLR);
+
+		writel_relaxed(st->icmr, iobase + ICMR);
+	}
+}
+
+static struct syscore_ops sa1100irq_syscore_ops = {
+	.suspend	= sa1100irq_suspend,
+	.resume		= sa1100irq_resume,
+};
+
+static int __init sa1100irq_init_devicefs(void)
+{
+	register_syscore_ops(&sa1100irq_syscore_ops);
+	return 0;
+}
+
+device_initcall(sa1100irq_init_devicefs);
+
+static asmlinkage void __exception_irq_entry
+sa1100_handle_irq(struct pt_regs *regs)
+{
+	uint32_t icip, icmr, mask;
+
+	do {
+		icip = readl_relaxed(iobase + ICIP);
+		icmr = readl_relaxed(iobase + ICMR);
+		mask = icip & icmr;
+
+		if (mask == 0)
+			break;
+
+		handle_domain_irq(sa1100_normal_irqdomain,
+				ffs(mask) - 1, regs);
+	} while (1);
+}
+
+void __init sa11x0_init_irq_nodt(int irq_start, resource_size_t io_start)
+{
+	iobase = ioremap(io_start, SZ_64K);
+	if (WARN_ON(!iobase))
+		return;
+
+	/* disable all IRQs */
+	writel_relaxed(0, iobase + ICMR);
+
+	/* all IRQs are IRQ, not FIQ */
+	writel_relaxed(0, iobase + ICLR);
+
+	/*
+	 * Whatever the doc says, this has to be set for the wait-on-irq
+	 * instruction to work... on a SA1100 rev 9 at least.
+	 */
+	writel_relaxed(1, iobase + ICCR);
+
+	sa1100_normal_irqdomain = irq_domain_add_simple(NULL,
+			32, irq_start,
+			&sa1100_normal_irqdomain_ops, NULL);
+
+	set_handle_irq(sa1100_handle_irq);
+}
-- 
cgit v1.1


From ed61f9851d0686d56d7a9648b4807d82ad0adce6 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 26 May 2015 17:23:34 +0100
Subject: arm: perf: kill off unused pm callbacks

Currently the arm perf code has platdata callbacks for runtime PM and
irq handling, but no platform implements the hooks for the former. Kill
these off.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pmu.h       | 14 --------------
 arch/arm/kernel/perf_event.c     | 38 +-------------------------------------
 arch/arm/kernel/perf_event_cpu.c |  1 -
 3 files changed, 1 insertion(+), 52 deletions(-)

diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index ecad26e..62464a4 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -24,22 +24,10 @@
  *	interrupt and passed the address of the low level handler,
  *	and can be used to implement any platform specific handling
  *	before or after calling it.
- * @runtime_resume: an optional handler which will be called by the
- *	runtime PM framework following a call to pm_runtime_get().
- *	Note that if pm_runtime_get() is called more than once in
- *	succession this handler will only be called once.
- * @runtime_suspend: an optional handler which will be called by the
- *	runtime PM framework following a call to pm_runtime_put().
- *	Note that if pm_runtime_get() is called more than once in
- *	succession this handler will only be called following the
- *	final call to pm_runtime_put() that actually disables the
- *	hardware.
  */
 struct arm_pmu_platdata {
 	irqreturn_t (*handle_irq)(int irq, void *dev,
 				  irq_handler_t pmu_handler);
-	int (*runtime_resume)(struct device *dev);
-	int (*runtime_suspend)(struct device *dev);
 };
 
 #ifdef CONFIG_HW_PERF_EVENTS
@@ -123,8 +111,6 @@ struct arm_pmu {
 
 #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
 
-extern const struct dev_pm_ops armpmu_dev_pm_ops;
-
 int armpmu_register(struct arm_pmu *armpmu, int type);
 
 u64 armpmu_event_update(struct perf_event *event);
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index df02807..0072e8b 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -14,7 +14,6 @@
 #include <linux/cpumask.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
-#include <linux/pm_runtime.h>
 #include <linux/irq.h>
 #include <linux/irqdesc.h>
 
@@ -349,20 +348,12 @@ static void
 armpmu_release_hardware(struct arm_pmu *armpmu)
 {
 	armpmu->free_irq(armpmu);
-	pm_runtime_put_sync(&armpmu->plat_device->dev);
 }
 
 static int
 armpmu_reserve_hardware(struct arm_pmu *armpmu)
 {
-	int err;
-	struct platform_device *pmu_device = armpmu->plat_device;
-
-	if (!pmu_device)
-		return -ENODEV;
-
-	pm_runtime_get_sync(&pmu_device->dev);
-	err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
+	int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
 	if (err) {
 		armpmu_release_hardware(armpmu);
 		return err;
@@ -536,32 +527,6 @@ static int armpmu_filter_match(struct perf_event *event)
 	return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
 }
 
-#ifdef CONFIG_PM
-static int armpmu_runtime_resume(struct device *dev)
-{
-	struct arm_pmu_platdata *plat = dev_get_platdata(dev);
-
-	if (plat && plat->runtime_resume)
-		return plat->runtime_resume(dev);
-
-	return 0;
-}
-
-static int armpmu_runtime_suspend(struct device *dev)
-{
-	struct arm_pmu_platdata *plat = dev_get_platdata(dev);
-
-	if (plat && plat->runtime_suspend)
-		return plat->runtime_suspend(dev);
-
-	return 0;
-}
-#endif
-
-const struct dev_pm_ops armpmu_dev_pm_ops = {
-	SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL)
-};
-
 static void armpmu_init(struct arm_pmu *armpmu)
 {
 	atomic_set(&armpmu->active_events, 0);
@@ -583,7 +548,6 @@ static void armpmu_init(struct arm_pmu *armpmu)
 int armpmu_register(struct arm_pmu *armpmu, int type)
 {
 	armpmu_init(armpmu);
-	pm_runtime_enable(&armpmu->plat_device->dev);
 	pr_info("enabled with %s PMU driver, %d counters available\n",
 			armpmu->name, armpmu->num_events);
 	return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 50f245b..14a5a0a 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -408,7 +408,6 @@ out_free:
 static struct platform_driver cpu_pmu_driver = {
 	.driver		= {
 		.name	= "arm-pmu",
-		.pm	= &armpmu_dev_pm_ops,
 		.of_match_table = cpu_pmu_of_device_ids,
 	},
 	.probe		= cpu_pmu_device_probe,
-- 
cgit v1.1


From cfdad2991f7addb1bc0ce3361a5ee980a0482a87 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 26 May 2015 17:23:35 +0100
Subject: arm: perf: share arm_pmu_device_probe

Enable the probe function to be shared with other drivers, which will
inject the appropriate of_device_id and pmu_probe_info tables.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pmu.h       |  4 ++++
 arch/arm/kernel/perf_event_cpu.c | 20 ++++++++++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 62464a4..3fc87df 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -145,6 +145,10 @@ struct pmu_probe_info {
 #define XSCALE_PMU_PROBE(_version, _fn) \
 	PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn)
 
+int arm_pmu_device_probe(struct platform_device *pdev,
+			 const struct of_device_id *of_table,
+			 const struct pmu_probe_info *probe_table);
+
 #endif /* CONFIG_HW_PERF_EVENTS */
 
 #endif /* __ARM_PMU_H__ */
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 14a5a0a..797b569 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -286,16 +286,16 @@ static const struct pmu_probe_info pmu_probe_table[] = {
 /*
  * CPU PMU identification and probing.
  */
-static int probe_current_pmu(struct arm_pmu *pmu)
+static int probe_current_pmu(struct arm_pmu *pmu,
+			     const struct pmu_probe_info *info)
 {
 	int cpu = get_cpu();
 	unsigned int cpuid = read_cpuid_id();
 	int ret = -ENODEV;
-	const struct pmu_probe_info *info;
 
 	pr_info("probing PMU on CPU %d\n", cpu);
 
-	for (info = pmu_probe_table; info->init != NULL; info++) {
+	for (; info->init != NULL; info++) {
 		if ((cpuid & info->mask) != info->cpuid)
 			continue;
 		ret = info->init(pmu);
@@ -352,7 +352,9 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 	return 0;
 }
 
-static int cpu_pmu_device_probe(struct platform_device *pdev)
+int arm_pmu_device_probe(struct platform_device *pdev,
+			 const struct of_device_id *of_table,
+			 const struct pmu_probe_info *probe_table)
 {
 	const struct of_device_id *of_id;
 	const int (*init_fn)(struct arm_pmu *);
@@ -371,14 +373,14 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 
 	pmu->plat_device = pdev;
 
-	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
+	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
 		init_fn = of_id->data;
 
 		ret = of_pmu_irq_cfg(pmu);
 		if (!ret)
 			ret = init_fn(pmu);
 	} else {
-		ret = probe_current_pmu(pmu);
+		ret = probe_current_pmu(pmu, probe_table);
 		cpumask_setall(&pmu->supported_cpus);
 	}
 
@@ -405,6 +407,12 @@ out_free:
 	return ret;
 }
 
+static int cpu_pmu_device_probe(struct platform_device *pdev)
+{
+	return arm_pmu_device_probe(pdev, cpu_pmu_of_device_ids,
+				    pmu_probe_table);
+}
+
 static struct platform_driver cpu_pmu_driver = {
 	.driver		= {
 		.name	= "arm-pmu",
-- 
cgit v1.1


From a12c72cc3e6938191cabeefff44b959a823d3d76 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 26 May 2015 17:23:36 +0100
Subject: arm: perf: factor out xscale pmu driver

Now that the core arm perf code maintains no global state and all
microarchitecture-specific PMU data can be fed in through the shared
probe function, it's possible to use it as a library and get rid of the
C file includes we have currently.

This patch factors out the xscale-specific portions out into the xscale
driver. For the moment this is always built if perf event support is
enabled, but the preprocessor guards will leave behind an empty file.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/Makefile            |  2 +-
 arch/arm/kernel/perf_event_cpu.c    |  4 ----
 arch/arm/kernel/perf_event_xscale.c | 32 +++++++++++++++++++++++++++-----
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 752725d..8b4aad7 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -70,7 +70,7 @@ obj-$(CONFIG_CPU_PJ4)		+= pj4-cp0.o
 obj-$(CONFIG_CPU_PJ4B)		+= pj4-cp0.o
 obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o perf_callchain.o
-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o
+obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o perf_event_xscale.o
 CFLAGS_pj4-cp0.o		:= -marm
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
 obj-$(CONFIG_ARM_CPU_TOPOLOGY)  += topology.o
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 797b569..1ebb179 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -60,7 +60,6 @@ int perf_num_counters(void)
 EXPORT_SYMBOL_GPL(perf_num_counters);
 
 /* Include the PMU-specific implementations. */
-#include "perf_event_xscale.c"
 #include "perf_event_v6.c"
 #include "perf_event_v7.c"
 
@@ -267,7 +266,6 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = {
 	{.name = "arm-pmu"},
 	{.name = "armv6-pmu"},
 	{.name = "armv7-pmu"},
-	{.name = "xscale-pmu"},
 	{},
 };
 
@@ -278,8 +276,6 @@ static const struct pmu_probe_info pmu_probe_table[] = {
 	ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init),
 	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init),
 	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init),
-	XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
-	XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
 	{ /* sentinel value */ }
 };
 
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 8af9f1f..304d056 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -13,6 +13,14 @@
  */
 
 #ifdef CONFIG_CPU_XSCALE
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
 enum xscale_perf_types {
 	XSCALE_PERFCTR_ICACHE_MISS		= 0x00,
 	XSCALE_PERFCTR_ICACHE_NO_DELIVER	= 0x01,
@@ -740,14 +748,28 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
 
 	return 0;
 }
-#else
-static inline int xscale1pmu_init(struct arm_pmu *cpu_pmu)
+
+static const struct pmu_probe_info xscale_pmu_probe_table[] = {
+	XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
+	XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
+	{ /* sentinel value */ }
+};
+
+static int xscale_pmu_device_probe(struct platform_device *pdev)
 {
-	return -ENODEV;
+	return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table);
 }
 
-static inline int xscale2pmu_init(struct arm_pmu *cpu_pmu)
+static struct platform_driver xscale_pmu_driver = {
+	.driver		= {
+		.name	= "xscale-pmu",
+	},
+	.probe		= xscale_pmu_device_probe,
+};
+
+static int __init register_xscale_pmu_driver(void)
 {
-	return -ENODEV;
+	return platform_driver_register(&xscale_pmu_driver);
 }
+device_initcall(register_xscale_pmu_driver);
 #endif	/* CONFIG_CPU_XSCALE */
-- 
cgit v1.1


From 1fe115b303f301916e1430667c5b03451f56c733 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 26 May 2015 17:23:37 +0100
Subject: arm: perf: factor out armv6 pmu driver

Now that the core arm perf code maintains no global state and all
microarchitecture-specific PMU data can be fed in through the shared
probe function, it's possible to use it as a library and get rid of the
C file includes we have currently.

This patch factors out the ARMv6-specific portions out into the ARMv6
driver. For the moment this is always built if perf event support is
enabled, but the preprocessor guards will leave behind an empty file.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/Makefile         |  3 ++-
 arch/arm/kernel/perf_event_cpu.c |  9 --------
 arch/arm/kernel/perf_event_v6.c  | 49 +++++++++++++++++++++++++++++-----------
 3 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 8b4aad7..42a43da 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -70,7 +70,8 @@ obj-$(CONFIG_CPU_PJ4)		+= pj4-cp0.o
 obj-$(CONFIG_CPU_PJ4B)		+= pj4-cp0.o
 obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o perf_callchain.o
-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o perf_event_xscale.o
+obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o \
+				   perf_event_xscale.o perf_event_v6.o
 CFLAGS_pj4-cp0.o		:= -marm
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
 obj-$(CONFIG_ARM_CPU_TOPOLOGY)  += topology.o
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 1ebb179..d24d585 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -60,7 +60,6 @@ int perf_num_counters(void)
 EXPORT_SYMBOL_GPL(perf_num_counters);
 
 /* Include the PMU-specific implementations. */
-#include "perf_event_v6.c"
 #include "perf_event_v7.c"
 
 static void cpu_pmu_enable_percpu_irq(void *data)
@@ -253,9 +252,6 @@ static const struct of_device_id cpu_pmu_of_device_ids[] = {
 	{.compatible = "arm,cortex-a8-pmu",	.data = armv7_a8_pmu_init},
 	{.compatible = "arm,cortex-a7-pmu",	.data = armv7_a7_pmu_init},
 	{.compatible = "arm,cortex-a5-pmu",	.data = armv7_a5_pmu_init},
-	{.compatible = "arm,arm11mpcore-pmu",	.data = armv6mpcore_pmu_init},
-	{.compatible = "arm,arm1176-pmu",	.data = armv6_1176_pmu_init},
-	{.compatible = "arm,arm1136-pmu",	.data = armv6_1136_pmu_init},
 	{.compatible = "qcom,krait-pmu",	.data = krait_pmu_init},
 	{.compatible = "qcom,scorpion-pmu",	.data = scorpion_pmu_init},
 	{.compatible = "qcom,scorpion-mp-pmu",	.data = scorpion_mp_pmu_init},
@@ -264,16 +260,11 @@ static const struct of_device_id cpu_pmu_of_device_ids[] = {
 
 static struct platform_device_id cpu_pmu_plat_device_ids[] = {
 	{.name = "arm-pmu"},
-	{.name = "armv6-pmu"},
 	{.name = "armv7-pmu"},
 	{},
 };
 
 static const struct pmu_probe_info pmu_probe_table[] = {
-	ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init),
-	ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init),
-	ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init),
-	ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init),
 	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init),
 	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init),
 	{ /* sentinel value */ }
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index f2ffd5c..09f83e4 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -31,6 +31,14 @@
  */
 
 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
 enum armv6_perf_types {
 	ARMV6_PERFCTR_ICACHE_MISS	    = 0x0,
 	ARMV6_PERFCTR_IBUF_STALL	    = 0x1,
@@ -543,24 +551,39 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
 
 	return 0;
 }
-#else
-static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	return -ENODEV;
-}
 
-static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	return -ENODEV;
-}
+static struct of_device_id armv6_pmu_of_device_ids[] = {
+	{.compatible = "arm,arm11mpcore-pmu",	.data = armv6mpcore_pmu_init},
+	{.compatible = "arm,arm1176-pmu",	.data = armv6_1176_pmu_init},
+	{.compatible = "arm,arm1136-pmu",	.data = armv6_1136_pmu_init},
+	{ /* sentinel value */ }
+};
 
-static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
+static const struct pmu_probe_info armv6_pmu_probe_table[] = {
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init),
+	{ /* sentinel value */ }
+};
+
+static int armv6_pmu_device_probe(struct platform_device *pdev)
 {
-	return -ENODEV;
+	return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids,
+				    armv6_pmu_probe_table);
 }
 
-static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
+static struct platform_driver armv6_pmu_driver = {
+	.driver		= {
+		.name	= "armv6-pmu",
+		.of_match_table = armv6_pmu_of_device_ids,
+	},
+	.probe		= armv6_pmu_device_probe,
+};
+
+static int __init register_armv6_pmu_driver(void)
 {
-	return -ENODEV;
+	return platform_driver_register(&armv6_pmu_driver);
 }
+device_initcall(register_armv6_pmu_driver);
 #endif	/* CONFIG_CPU_V6 || CONFIG_CPU_V6K */
-- 
cgit v1.1


From 29ba0f37f1578db268ac805c117365923b9a7663 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 26 May 2015 17:23:38 +0100
Subject: arm: perf: factor out armv7 pmu driver

Now that the core arm perf code maintains no global state and all
microarchitecture-specific PMU data can be fed in through the shared
probe function, it's possible to use it as a library and get rid of the
C file includes we have currently.

This patch factors out the ARMv7-specific portions out into the ARMv7
driver. For the moment this is always built if perf event support is
enabled, but the preprocessor guards will leave behind an empty file.

Now that perf_event_cpu.c contains no microarchitecture-specific data,
the associated probing code is removed, completing its relegation to a
library file. The vestigal "arm-pmu" platform device ID is removed in
this patch, as it has been unused since platform files were updated to
specify a more specific PMU variant.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/Makefile         |  3 +-
 arch/arm/kernel/perf_event_cpu.c | 53 ---------------------------
 arch/arm/kernel/perf_event_v7.c  | 77 +++++++++++++++++++---------------------
 3 files changed, 39 insertions(+), 94 deletions(-)

diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 42a43da..26de378 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -71,7 +71,8 @@ obj-$(CONFIG_CPU_PJ4B)		+= pj4-cp0.o
 obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o perf_callchain.o
 obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o \
-				   perf_event_xscale.o perf_event_v6.o
+				   perf_event_xscale.o perf_event_v6.o \
+				   perf_event_v7.o
 CFLAGS_pj4-cp0.o		:= -marm
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
 obj-$(CONFIG_ARM_CPU_TOPOLOGY)  += topology.o
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index d24d585..261b639 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -59,9 +59,6 @@ int perf_num_counters(void)
 }
 EXPORT_SYMBOL_GPL(perf_num_counters);
 
-/* Include the PMU-specific implementations. */
-#include "perf_event_v7.c"
-
 static void cpu_pmu_enable_percpu_irq(void *data)
 {
 	int irq = *(int *)data;
@@ -242,35 +239,6 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 }
 
 /*
- * PMU platform driver and devicetree bindings.
- */
-static const struct of_device_id cpu_pmu_of_device_ids[] = {
-	{.compatible = "arm,cortex-a17-pmu",	.data = armv7_a17_pmu_init},
-	{.compatible = "arm,cortex-a15-pmu",	.data = armv7_a15_pmu_init},
-	{.compatible = "arm,cortex-a12-pmu",	.data = armv7_a12_pmu_init},
-	{.compatible = "arm,cortex-a9-pmu",	.data = armv7_a9_pmu_init},
-	{.compatible = "arm,cortex-a8-pmu",	.data = armv7_a8_pmu_init},
-	{.compatible = "arm,cortex-a7-pmu",	.data = armv7_a7_pmu_init},
-	{.compatible = "arm,cortex-a5-pmu",	.data = armv7_a5_pmu_init},
-	{.compatible = "qcom,krait-pmu",	.data = krait_pmu_init},
-	{.compatible = "qcom,scorpion-pmu",	.data = scorpion_pmu_init},
-	{.compatible = "qcom,scorpion-mp-pmu",	.data = scorpion_mp_pmu_init},
-	{},
-};
-
-static struct platform_device_id cpu_pmu_plat_device_ids[] = {
-	{.name = "arm-pmu"},
-	{.name = "armv7-pmu"},
-	{},
-};
-
-static const struct pmu_probe_info pmu_probe_table[] = {
-	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init),
-	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init),
-	{ /* sentinel value */ }
-};
-
-/*
  * CPU PMU identification and probing.
  */
 static int probe_current_pmu(struct arm_pmu *pmu,
@@ -393,24 +361,3 @@ out_free:
 	kfree(pmu);
 	return ret;
 }
-
-static int cpu_pmu_device_probe(struct platform_device *pdev)
-{
-	return arm_pmu_device_probe(pdev, cpu_pmu_of_device_ids,
-				    pmu_probe_table);
-}
-
-static struct platform_driver cpu_pmu_driver = {
-	.driver		= {
-		.name	= "arm-pmu",
-		.of_match_table = cpu_pmu_of_device_ids,
-	},
-	.probe		= cpu_pmu_device_probe,
-	.id_table	= cpu_pmu_plat_device_ids,
-};
-
-static int __init register_pmu_driver(void)
-{
-	return platform_driver_register(&cpu_pmu_driver);
-}
-device_initcall(register_pmu_driver);
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index ccec472..f9b37f8 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -19,9 +19,15 @@
 #ifdef CONFIG_CPU_V7
 
 #include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
 #include <asm/vfp.h>
 #include "../vfp/vfpinstr.h"
 
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
 /*
  * Common ARMv7 event types
  *
@@ -1853,54 +1859,45 @@ static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
 	return armv7_probe_num_events(cpu_pmu);
 }
-#else
-static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	return -ENODEV;
-}
-
-static inline int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	return -ENODEV;
-}
-
-static inline int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	return -ENODEV;
-}
-
-static inline int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	return -ENODEV;
-}
 
-static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	return -ENODEV;
-}
+static const struct of_device_id armv7_pmu_of_device_ids[] = {
+	{.compatible = "arm,cortex-a17-pmu",	.data = armv7_a17_pmu_init},
+	{.compatible = "arm,cortex-a15-pmu",	.data = armv7_a15_pmu_init},
+	{.compatible = "arm,cortex-a12-pmu",	.data = armv7_a12_pmu_init},
+	{.compatible = "arm,cortex-a9-pmu",	.data = armv7_a9_pmu_init},
+	{.compatible = "arm,cortex-a8-pmu",	.data = armv7_a8_pmu_init},
+	{.compatible = "arm,cortex-a7-pmu",	.data = armv7_a7_pmu_init},
+	{.compatible = "arm,cortex-a5-pmu",	.data = armv7_a5_pmu_init},
+	{.compatible = "qcom,krait-pmu",	.data = krait_pmu_init},
+	{.compatible = "qcom,scorpion-pmu",	.data = scorpion_pmu_init},
+	{.compatible = "qcom,scorpion-mp-pmu",	.data = scorpion_mp_pmu_init},
+	{},
+};
 
-static inline int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	return -ENODEV;
-}
+static const struct pmu_probe_info armv7_pmu_probe_table[] = {
+	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init),
+	{ /* sentinel value */ }
+};
 
-static inline int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	return -ENODEV;
-}
 
-static inline int krait_pmu_init(struct arm_pmu *cpu_pmu)
+static int armv7_pmu_device_probe(struct platform_device *pdev)
 {
-	return -ENODEV;
+	return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids,
+				    armv7_pmu_probe_table);
 }
 
-static inline int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	return -ENODEV;
-}
+static struct platform_driver armv7_pmu_driver = {
+	.driver		= {
+		.name	= "armv7-pmu",
+		.of_match_table = armv7_pmu_of_device_ids,
+	},
+	.probe		= armv7_pmu_device_probe,
+};
 
-static inline int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
+static int __init register_armv7_pmu_driver(void)
 {
-	return -ENODEV;
+	return platform_driver_register(&armv7_pmu_driver);
 }
+device_initcall(register_armv7_pmu_driver);
 #endif	/* CONFIG_CPU_V7 */
-- 
cgit v1.1


From 74cf0bc75f1671b8da3b2e6ef7b2dc75cab0016a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 26 May 2015 17:23:39 +0100
Subject: arm: perf: unify perf_event{,_cpu}.c

Now that the arm_pmu framework is only used for CPU PMUs, there's no
reason to keep the pseudo-generic and CPU-specific framework portions
separate.

This patch folds the two into perf_event.c.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
[will: fixed up irq cfg to match upstream]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/Makefile         |   2 +-
 arch/arm/kernel/perf_event.c     | 340 ++++++++++++++++++++++++++++++++++++
 arch/arm/kernel/perf_event_cpu.c | 363 ---------------------------------------
 3 files changed, 341 insertions(+), 364 deletions(-)
 delete mode 100644 arch/arm/kernel/perf_event_cpu.c

diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 26de378..d274a1f 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -70,7 +70,7 @@ obj-$(CONFIG_CPU_PJ4)		+= pj4-cp0.o
 obj-$(CONFIG_CPU_PJ4B)		+= pj4-cp0.o
 obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o perf_callchain.o
-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o \
+obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o \
 				   perf_event_xscale.o perf_event_v6.o \
 				   perf_event_v7.o
 CFLAGS_pj4-cp0.o		:= -marm
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 0072e8b..357f57e 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -11,12 +11,18 @@
  */
 #define pr_fmt(fmt) "hw perfevents: " fmt
 
+#include <linux/bitmap.h>
 #include <linux/cpumask.h>
+#include <linux/export.h>
 #include <linux/kernel.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/irq.h>
 #include <linux/irqdesc.h>
 
+#include <asm/cputype.h>
 #include <asm/irq_regs.h>
 #include <asm/pmu.h>
 
@@ -553,3 +559,337 @@ int armpmu_register(struct arm_pmu *armpmu, int type)
 	return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
 }
 
+/* Set at runtime when we know what CPU type we are. */
+static struct arm_pmu *__oprofile_cpu_pmu;
+
+/*
+ * Despite the names, these two functions are CPU-specific and are used
+ * by the OProfile/perf code.
+ */
+const char *perf_pmu_name(void)
+{
+	if (!__oprofile_cpu_pmu)
+		return NULL;
+
+	return __oprofile_cpu_pmu->name;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+	int max_events = 0;
+
+	if (__oprofile_cpu_pmu != NULL)
+		max_events = __oprofile_cpu_pmu->num_events;
+
+	return max_events;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
+static void cpu_pmu_enable_percpu_irq(void *data)
+{
+	int irq = *(int *)data;
+
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+}
+
+static void cpu_pmu_disable_percpu_irq(void *data)
+{
+	int irq = *(int *)data;
+
+	disable_percpu_irq(irq);
+}
+
+static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
+{
+	int i, irq, irqs;
+	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq >= 0 && irq_is_percpu(irq)) {
+		on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
+		free_percpu_irq(irq, &hw_events->percpu_pmu);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
+			if (cpu_pmu->irq_affinity)
+				cpu = cpu_pmu->irq_affinity[i];
+
+			if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
+				continue;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq >= 0)
+				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+		}
+	}
+}
+
+static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
+{
+	int i, err, irq, irqs;
+	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
+
+	if (!pmu_device)
+		return -ENODEV;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+	if (irqs < 1) {
+		pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
+		return 0;
+	}
+
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq >= 0 && irq_is_percpu(irq)) {
+		err = request_percpu_irq(irq, handler, "arm-pmu",
+					 &hw_events->percpu_pmu);
+		if (err) {
+			pr_err("unable to request IRQ%d for ARM PMU counters\n",
+				irq);
+			return err;
+		}
+		on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
+			err = 0;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq < 0)
+				continue;
+
+			if (cpu_pmu->irq_affinity)
+				cpu = cpu_pmu->irq_affinity[i];
+
+			/*
+			 * If we have a single PMU interrupt that we can't shift,
+			 * assume that we're running on a uniprocessor machine and
+			 * continue. Otherwise, continue without this interrupt.
+			 */
+			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
+				pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
+					irq, cpu);
+				continue;
+			}
+
+			err = request_irq(irq, handler,
+					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
+					  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+			if (err) {
+				pr_err("unable to request IRQ%d for ARM PMU counters\n",
+					irq);
+				return err;
+			}
+
+			cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * PMU hardware loses all context when a CPU goes offline.
+ * When a CPU is hotplugged back in, since some hardware registers are
+ * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
+ * junk values out of them.
+ */
+static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
+			  void *hcpu)
+{
+	int cpu = (unsigned long)hcpu;
+	struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
+
+	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
+		return NOTIFY_DONE;
+
+	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+		return NOTIFY_DONE;
+
+	if (pmu->reset)
+		pmu->reset(pmu);
+	else
+		return NOTIFY_DONE;
+
+	return NOTIFY_OK;
+}
+
+static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	int err;
+	int cpu;
+	struct pmu_hw_events __percpu *cpu_hw_events;
+
+	cpu_hw_events = alloc_percpu(struct pmu_hw_events);
+	if (!cpu_hw_events)
+		return -ENOMEM;
+
+	cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
+	err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
+	if (err)
+		goto out_hw_events;
+
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
+		raw_spin_lock_init(&events->pmu_lock);
+		events->percpu_pmu = cpu_pmu;
+	}
+
+	cpu_pmu->hw_events	= cpu_hw_events;
+	cpu_pmu->request_irq	= cpu_pmu_request_irq;
+	cpu_pmu->free_irq	= cpu_pmu_free_irq;
+
+	/* Ensure the PMU has sane values out of reset. */
+	if (cpu_pmu->reset)
+		on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset,
+			 cpu_pmu, 1);
+
+	/* If no interrupts available, set the corresponding capability flag */
+	if (!platform_get_irq(cpu_pmu->plat_device, 0))
+		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	return 0;
+
+out_hw_events:
+	free_percpu(cpu_hw_events);
+	return err;
+}
+
+static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
+{
+	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
+	free_percpu(cpu_pmu->hw_events);
+}
+
+/*
+ * CPU PMU identification and probing.
+ */
+static int probe_current_pmu(struct arm_pmu *pmu,
+			     const struct pmu_probe_info *info)
+{
+	int cpu = get_cpu();
+	unsigned int cpuid = read_cpuid_id();
+	int ret = -ENODEV;
+
+	pr_info("probing PMU on CPU %d\n", cpu);
+
+	for (; info->init != NULL; info++) {
+		if ((cpuid & info->mask) != info->cpuid)
+			continue;
+		ret = info->init(pmu);
+		break;
+	}
+
+	put_cpu();
+	return ret;
+}
+
+static int of_pmu_irq_cfg(struct arm_pmu *pmu)
+{
+	int i, irq, *irqs;
+	struct platform_device *pdev = pmu->plat_device;
+
+	/* Don't bother with PPIs; they're already affine */
+	irq = platform_get_irq(pdev, 0);
+	if (irq >= 0 && irq_is_percpu(irq))
+		return 0;
+
+	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
+	for (i = 0; i < pdev->num_resources; ++i) {
+		struct device_node *dn;
+		int cpu;
+
+		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
+				      i);
+		if (!dn) {
+			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
+				of_node_full_name(pdev->dev.of_node), i);
+			break;
+		}
+
+		for_each_possible_cpu(cpu)
+			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
+				break;
+
+		of_node_put(dn);
+		if (cpu >= nr_cpu_ids) {
+			pr_warn("Failed to find logical CPU for %s\n",
+				dn->name);
+			break;
+		}
+
+		irqs[i] = cpu;
+		cpumask_set_cpu(cpu, &pmu->supported_cpus);
+	}
+
+	if (i == pdev->num_resources) {
+		pmu->irq_affinity = irqs;
+	} else {
+		kfree(irqs);
+		cpumask_setall(&pmu->supported_cpus);
+	}
+
+	return 0;
+}
+
+int arm_pmu_device_probe(struct platform_device *pdev,
+			 const struct of_device_id *of_table,
+			 const struct pmu_probe_info *probe_table)
+{
+	const struct of_device_id *of_id;
+	const int (*init_fn)(struct arm_pmu *);
+	struct device_node *node = pdev->dev.of_node;
+	struct arm_pmu *pmu;
+	int ret = -ENODEV;
+
+	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
+	if (!pmu) {
+		pr_info("failed to allocate PMU device!\n");
+		return -ENOMEM;
+	}
+
+	if (!__oprofile_cpu_pmu)
+		__oprofile_cpu_pmu = pmu;
+
+	pmu->plat_device = pdev;
+
+	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
+		init_fn = of_id->data;
+
+		ret = of_pmu_irq_cfg(pmu);
+		if (!ret)
+			ret = init_fn(pmu);
+	} else {
+		ret = probe_current_pmu(pmu, probe_table);
+		cpumask_setall(&pmu->supported_cpus);
+	}
+
+	if (ret) {
+		pr_info("failed to probe PMU!\n");
+		goto out_free;
+	}
+
+	ret = cpu_pmu_init(pmu);
+	if (ret)
+		goto out_free;
+
+	ret = armpmu_register(pmu, -1);
+	if (ret)
+		goto out_destroy;
+
+	return 0;
+
+out_destroy:
+	cpu_pmu_destroy(pmu);
+out_free:
+	pr_info("failed to register PMU devices!\n");
+	kfree(pmu);
+	return ret;
+}
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
deleted file mode 100644
index 261b639..0000000
--- a/arch/arm/kernel/perf_event_cpu.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-#define pr_fmt(fmt) "CPU PMU: " fmt
-
-#include <linux/bitmap.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/irq.h>
-#include <linux/irqdesc.h>
-
-#include <asm/cputype.h>
-#include <asm/irq_regs.h>
-#include <asm/pmu.h>
-
-/* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *__oprofile_cpu_pmu;
-
-/*
- * Despite the names, these two functions are CPU-specific and are used
- * by the OProfile/perf code.
- */
-const char *perf_pmu_name(void)
-{
-	if (!__oprofile_cpu_pmu)
-		return NULL;
-
-	return __oprofile_cpu_pmu->name;
-}
-EXPORT_SYMBOL_GPL(perf_pmu_name);
-
-int perf_num_counters(void)
-{
-	int max_events = 0;
-
-	if (__oprofile_cpu_pmu != NULL)
-		max_events = __oprofile_cpu_pmu->num_events;
-
-	return max_events;
-}
-EXPORT_SYMBOL_GPL(perf_num_counters);
-
-static void cpu_pmu_enable_percpu_irq(void *data)
-{
-	int irq = *(int *)data;
-
-	enable_percpu_irq(irq, IRQ_TYPE_NONE);
-}
-
-static void cpu_pmu_disable_percpu_irq(void *data)
-{
-	int irq = *(int *)data;
-
-	disable_percpu_irq(irq);
-}
-
-static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
-{
-	int i, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq >= 0 && irq_is_percpu(irq)) {
-		on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
-		free_percpu_irq(irq, &hw_events->percpu_pmu);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			int cpu = i;
-
-			if (cpu_pmu->irq_affinity)
-				cpu = cpu_pmu->irq_affinity[i];
-
-			if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
-				continue;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq >= 0)
-				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
-		}
-	}
-}
-
-static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
-{
-	int i, err, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
-
-	if (!pmu_device)
-		return -ENODEV;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-	if (irqs < 1) {
-		pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
-		return 0;
-	}
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq >= 0 && irq_is_percpu(irq)) {
-		err = request_percpu_irq(irq, handler, "arm-pmu",
-					 &hw_events->percpu_pmu);
-		if (err) {
-			pr_err("unable to request IRQ%d for ARM PMU counters\n",
-				irq);
-			return err;
-		}
-		on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			int cpu = i;
-
-			err = 0;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq < 0)
-				continue;
-
-			if (cpu_pmu->irq_affinity)
-				cpu = cpu_pmu->irq_affinity[i];
-
-			/*
-			 * If we have a single PMU interrupt that we can't shift,
-			 * assume that we're running on a uniprocessor machine and
-			 * continue. Otherwise, continue without this interrupt.
-			 */
-			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
-				pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
-					irq, cpu);
-				continue;
-			}
-
-			err = request_irq(irq, handler,
-					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
-					  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
-			if (err) {
-				pr_err("unable to request IRQ%d for ARM PMU counters\n",
-					irq);
-				return err;
-			}
-
-			cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
-		}
-	}
-
-	return 0;
-}
-
-/*
- * PMU hardware loses all context when a CPU goes offline.
- * When a CPU is hotplugged back in, since some hardware registers are
- * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
- * junk values out of them.
- */
-static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
-			  void *hcpu)
-{
-	int cpu = (unsigned long)hcpu;
-	struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
-
-	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
-		return NOTIFY_DONE;
-
-	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
-		return NOTIFY_DONE;
-
-	if (pmu->reset)
-		pmu->reset(pmu);
-	else
-		return NOTIFY_DONE;
-
-	return NOTIFY_OK;
-}
-
-static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	int err;
-	int cpu;
-	struct pmu_hw_events __percpu *cpu_hw_events;
-
-	cpu_hw_events = alloc_percpu(struct pmu_hw_events);
-	if (!cpu_hw_events)
-		return -ENOMEM;
-
-	cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
-	err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
-	if (err)
-		goto out_hw_events;
-
-	for_each_possible_cpu(cpu) {
-		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
-		raw_spin_lock_init(&events->pmu_lock);
-		events->percpu_pmu = cpu_pmu;
-	}
-
-	cpu_pmu->hw_events	= cpu_hw_events;
-	cpu_pmu->request_irq	= cpu_pmu_request_irq;
-	cpu_pmu->free_irq	= cpu_pmu_free_irq;
-
-	/* Ensure the PMU has sane values out of reset. */
-	if (cpu_pmu->reset)
-		on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset,
-			 cpu_pmu, 1);
-
-	/* If no interrupts available, set the corresponding capability flag */
-	if (!platform_get_irq(cpu_pmu->plat_device, 0))
-		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
-	return 0;
-
-out_hw_events:
-	free_percpu(cpu_hw_events);
-	return err;
-}
-
-static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
-{
-	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
-	free_percpu(cpu_pmu->hw_events);
-}
-
-/*
- * CPU PMU identification and probing.
- */
-static int probe_current_pmu(struct arm_pmu *pmu,
-			     const struct pmu_probe_info *info)
-{
-	int cpu = get_cpu();
-	unsigned int cpuid = read_cpuid_id();
-	int ret = -ENODEV;
-
-	pr_info("probing PMU on CPU %d\n", cpu);
-
-	for (; info->init != NULL; info++) {
-		if ((cpuid & info->mask) != info->cpuid)
-			continue;
-		ret = info->init(pmu);
-		break;
-	}
-
-	put_cpu();
-	return ret;
-}
-
-static int of_pmu_irq_cfg(struct arm_pmu *pmu)
-{
-	int i;
-	struct platform_device *pdev = pmu->plat_device;
-	int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
-
-	if (!irqs)
-		return -ENOMEM;
-
-	for (i = 0; i < pdev->num_resources; ++i) {
-		struct device_node *dn;
-		int cpu;
-
-		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
-				      i);
-		if (!dn) {
-			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
-				of_node_full_name(dn), i);
-			break;
-		}
-
-		for_each_possible_cpu(cpu)
-			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
-				break;
-
-		of_node_put(dn);
-		if (cpu >= nr_cpu_ids) {
-			pr_warn("Failed to find logical CPU for %s\n",
-				dn->name);
-			break;
-		}
-
-		irqs[i] = cpu;
-		cpumask_set_cpu(cpu, &pmu->supported_cpus);
-	}
-
-	if (i == pdev->num_resources) {
-		pmu->irq_affinity = irqs;
-	} else {
-		kfree(irqs);
-		cpumask_setall(&pmu->supported_cpus);
-	}
-
-	return 0;
-}
-
-int arm_pmu_device_probe(struct platform_device *pdev,
-			 const struct of_device_id *of_table,
-			 const struct pmu_probe_info *probe_table)
-{
-	const struct of_device_id *of_id;
-	const int (*init_fn)(struct arm_pmu *);
-	struct device_node *node = pdev->dev.of_node;
-	struct arm_pmu *pmu;
-	int ret = -ENODEV;
-
-	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
-	if (!pmu) {
-		pr_info("failed to allocate PMU device!\n");
-		return -ENOMEM;
-	}
-
-	if (!__oprofile_cpu_pmu)
-		__oprofile_cpu_pmu = pmu;
-
-	pmu->plat_device = pdev;
-
-	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
-		init_fn = of_id->data;
-
-		ret = of_pmu_irq_cfg(pmu);
-		if (!ret)
-			ret = init_fn(pmu);
-	} else {
-		ret = probe_current_pmu(pmu, probe_table);
-		cpumask_setall(&pmu->supported_cpus);
-	}
-
-	if (ret) {
-		pr_info("failed to probe PMU!\n");
-		goto out_free;
-	}
-
-	ret = cpu_pmu_init(pmu);
-	if (ret)
-		goto out_free;
-
-	ret = armpmu_register(pmu, -1);
-	if (ret)
-		goto out_destroy;
-
-	return 0;
-
-out_destroy:
-	cpu_pmu_destroy(pmu);
-out_free:
-	pr_info("failed to register PMU devices!\n");
-	kfree(pmu);
-	return ret;
-}
-- 
cgit v1.1


From 30b5f4d6128e12c17a0e54cac685a048c3d8d862 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 4 Apr 2015 11:30:04 +0100
Subject: ARM: keystone2: move platform notifier initialisation into platform
 init

We ideally want the init_meminfo function to do nothing but return the
delta to be applied to PHYS_OFFSET - it should do nothing else.  As we
can detect in platform init code whether we are running in the high
physical address space, move the platform notifier initialisation
entirely into the platform init code.

Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
Tested-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-keystone/keystone.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c
index 0662087..3d58a8f 100644
--- a/arch/arm/mach-keystone/keystone.c
+++ b/arch/arm/mach-keystone/keystone.c
@@ -27,7 +27,6 @@
 
 #include "keystone.h"
 
-static struct notifier_block platform_nb;
 static unsigned long keystone_dma_pfn_offset __read_mostly;
 
 static int keystone_platform_notifier(struct notifier_block *nb,
@@ -49,11 +48,18 @@ static int keystone_platform_notifier(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+static struct notifier_block platform_nb = {
+	.notifier_call = keystone_platform_notifier,
+};
+
 static void __init keystone_init(void)
 {
-	keystone_pm_runtime_init();
-	if (platform_nb.notifier_call)
+	if (PHYS_OFFSET >= KEYSTONE_HIGH_PHYS_START) {
+		keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START -
+						   KEYSTONE_LOW_PHYS_START);
 		bus_register_notifier(&platform_bus_type, &platform_nb);
+	}
+	keystone_pm_runtime_init();
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
@@ -96,9 +102,6 @@ static void __init keystone_init_meminfo(void)
 
 	/* Populate the arch idmap hook */
 	arch_virt_to_idmap = keystone_virt_to_idmap;
-	platform_nb.notifier_call = keystone_platform_notifier;
-	keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START -
-						KEYSTONE_LOW_PHYS_START);
 
 	pr_info("Switching to high address space at 0x%llx\n", (u64)offset);
 }
-- 
cgit v1.1


From c8ca2b4b29282605698968d15667939b23e208e2 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 4 Apr 2015 09:53:38 +0100
Subject: ARM: keystone2: move update of the phys-to-virt constants into
 generic code

Make the init_meminfo function return the offset to be applied to the
phys-to-virt translation constants.  This allows us to move the update
into generic code, along with the requirements for this update.

This avoids platforms having to know the details of the phys-to-virt
translation support.

Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
Tested-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mach/arch.h  |  2 +-
 arch/arm/mach-keystone/keystone.c | 27 ++++++++++-----------------
 arch/arm/mm/mmu.c                 | 26 ++++++++++++++++++++++----
 3 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 0406cb3..e881913 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -51,7 +51,7 @@ struct machine_desc {
 	bool			(*smp_init)(void);
 	void			(*fixup)(struct tag *, char **);
 	void			(*dt_fixup)(void);
-	void			(*init_meminfo)(void);
+	long long		(*init_meminfo)(void);
 	void			(*reserve)(void);/* reserve mem blocks	*/
 	void			(*map_io)(void);/* IO mapping function	*/
 	void			(*init_early)(void);
diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c
index 3d58a8f..baa0fbc 100644
--- a/arch/arm/mach-keystone/keystone.c
+++ b/arch/arm/mach-keystone/keystone.c
@@ -68,11 +68,9 @@ static phys_addr_t keystone_virt_to_idmap(unsigned long x)
 	return (phys_addr_t)(x) - CONFIG_PAGE_OFFSET + KEYSTONE_LOW_PHYS_START;
 }
 
-static void __init keystone_init_meminfo(void)
+static long long __init keystone_init_meminfo(void)
 {
-	bool lpae = IS_ENABLED(CONFIG_ARM_LPAE);
-	bool pvpatch = IS_ENABLED(CONFIG_ARM_PATCH_PHYS_VIRT);
-	phys_addr_t offset = PHYS_OFFSET - KEYSTONE_LOW_PHYS_START;
+	long long offset;
 	phys_addr_t mem_start, mem_end;
 
 	mem_start = memblock_start_of_DRAM();
@@ -81,29 +79,24 @@ static void __init keystone_init_meminfo(void)
 	/* nothing to do if we are running out of the <32-bit space */
 	if (mem_start >= KEYSTONE_LOW_PHYS_START &&
 	    mem_end   <= KEYSTONE_LOW_PHYS_END)
-		return;
-
-	if (!lpae || !pvpatch) {
-		pr_crit("Enable %s%s%s to run outside 32-bit space\n",
-		      !lpae ? __stringify(CONFIG_ARM_LPAE) : "",
-		      (!lpae && !pvpatch) ? " and " : "",
-		      !pvpatch ? __stringify(CONFIG_ARM_PATCH_PHYS_VIRT) : "");
-	}
+		return 0;
 
 	if (mem_start < KEYSTONE_HIGH_PHYS_START ||
 	    mem_end   > KEYSTONE_HIGH_PHYS_END) {
 		pr_crit("Invalid address space for memory (%08llx-%08llx)\n",
-		      (u64)mem_start, (u64)mem_end);
+		        (u64)mem_start, (u64)mem_end);
+		return 0;
 	}
 
-	offset += KEYSTONE_HIGH_PHYS_START;
-	__pv_phys_pfn_offset = PFN_DOWN(offset);
-	__pv_offset = (offset - PAGE_OFFSET);
+	offset = KEYSTONE_HIGH_PHYS_START - KEYSTONE_LOW_PHYS_START;
 
 	/* Populate the arch idmap hook */
 	arch_virt_to_idmap = keystone_virt_to_idmap;
 
-	pr_info("Switching to high address space at 0x%llx\n", (u64)offset);
+	pr_info("Switching to high address space at 0x%llx\n",
+	        (u64)PHYS_OFFSET + (u64)offset);
+
+	return offset;
 }
 
 static const char *const keystone_match[] __initconst = {
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4e6ef89..38ccbdf 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1387,7 +1387,7 @@ static void __init map_lowmem(void)
 	}
 }
 
-#ifdef CONFIG_ARM_LPAE
+#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_ARM_PATCH_PHYS_VIRT)
 /*
  * early_paging_init() recreates boot time page table setup, allowing machines
  * to switch over to a high (>4G) address space on LPAE systems
@@ -1397,6 +1397,7 @@ void __init early_paging_init(const struct machine_desc *mdesc,
 {
 	pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags;
 	unsigned long map_start, map_end;
+	long long offset;
 	pgd_t *pgd0, *pgdk;
 	pud_t *pud0, *pudk, *pud_start;
 	pmd_t *pmd0, *pmdk;
@@ -1419,7 +1420,13 @@ void __init early_paging_init(const struct machine_desc *mdesc,
 	pudk = pud_offset(pgdk, map_start);
 	pmdk = pmd_offset(pudk, map_start);
 
-	mdesc->init_meminfo();
+	offset = mdesc->init_meminfo();
+	if (offset == 0)
+		return;
+
+	/* Re-set the phys pfn offset, and the pv offset */
+	__pv_offset += offset;
+	__pv_phys_pfn_offset += PFN_DOWN(offset);
 
 	/* Run the patch stub to update the constants */
 	fixup_pv_table(&__pv_table_begin,
@@ -1502,8 +1509,19 @@ void __init early_paging_init(const struct machine_desc *mdesc,
 void __init early_paging_init(const struct machine_desc *mdesc,
 			      struct proc_info_list *procinfo)
 {
-	if (mdesc->init_meminfo)
-		mdesc->init_meminfo();
+	long long offset;
+
+	if (!mdesc->init_meminfo)
+		return;
+
+	offset = mdesc->init_meminfo();
+	if (offset == 0)
+		return;
+
+	pr_crit("Physical address space modification is only to support Keystone2.\n");
+	pr_crit("Please enable ARM_LPAE and ARM_PATCH_PHYS_VIRT support to use this\n");
+	pr_crit("feature. Your kernel may crash now, have a good day.\n");
+	add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
 }
 
 #endif
-- 
cgit v1.1


From 39b74fe82f734ac0bec726733805bb7d46c95933 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 4 Apr 2015 10:25:28 +0100
Subject: ARM: keystone2: move address space switch printk into generic code

There is no point platform code doing this, let's move it into the
generic code so it doesn't get duplicated.

Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
Tested-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-keystone/keystone.c | 3 ---
 arch/arm/mm/mmu.c                 | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c
index baa0fbc..af8c92b 100644
--- a/arch/arm/mach-keystone/keystone.c
+++ b/arch/arm/mach-keystone/keystone.c
@@ -93,9 +93,6 @@ static long long __init keystone_init_meminfo(void)
 	/* Populate the arch idmap hook */
 	arch_virt_to_idmap = keystone_virt_to_idmap;
 
-	pr_info("Switching to high address space at 0x%llx\n",
-	        (u64)PHYS_OFFSET + (u64)offset);
-
 	return offset;
 }
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 38ccbdf..91262d2 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1424,6 +1424,9 @@ void __init early_paging_init(const struct machine_desc *mdesc,
 	if (offset == 0)
 		return;
 
+	pr_info("Switching physical address space to 0x%08llx\n",
+		(u64)PHYS_OFFSET + offset);
+
 	/* Re-set the phys pfn offset, and the pv offset */
 	__pv_offset += offset;
 	__pv_phys_pfn_offset += PFN_DOWN(offset);
-- 
cgit v1.1


From c0b759d87eab301af0380f5459057656178e78cf Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 4 Apr 2015 10:01:10 +0100
Subject: ARM: keystone2: rename init_meminfo to pv_fixup

The init_meminfo() method is not about initialising meminfo - it's about
fixing up the physical to virtual translation so that we use a different
physical address space, possibly above the 4GB physical address space.
Therefore, the name "init_meminfo()" is confusing.

Rename it to pv_fixup() instead.

Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
Tested-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mach/arch.h  | 2 +-
 arch/arm/mach-keystone/keystone.c | 4 ++--
 arch/arm/mm/mmu.c                 | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index e881913..cb3a407 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -51,7 +51,7 @@ struct machine_desc {
 	bool			(*smp_init)(void);
 	void			(*fixup)(struct tag *, char **);
 	void			(*dt_fixup)(void);
-	long long		(*init_meminfo)(void);
+	long long		(*pv_fixup)(void);
 	void			(*reserve)(void);/* reserve mem blocks	*/
 	void			(*map_io)(void);/* IO mapping function	*/
 	void			(*init_early)(void);
diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c
index af8c92b..e288010 100644
--- a/arch/arm/mach-keystone/keystone.c
+++ b/arch/arm/mach-keystone/keystone.c
@@ -68,7 +68,7 @@ static phys_addr_t keystone_virt_to_idmap(unsigned long x)
 	return (phys_addr_t)(x) - CONFIG_PAGE_OFFSET + KEYSTONE_LOW_PHYS_START;
 }
 
-static long long __init keystone_init_meminfo(void)
+static long long __init keystone_pv_fixup(void)
 {
 	long long offset;
 	phys_addr_t mem_start, mem_end;
@@ -108,5 +108,5 @@ DT_MACHINE_START(KEYSTONE, "Keystone")
 	.smp		= smp_ops(keystone_smp_ops),
 	.init_machine	= keystone_init,
 	.dt_compat	= keystone_match,
-	.init_meminfo   = keystone_init_meminfo,
+	.pv_fixup	= keystone_pv_fixup,
 MACHINE_END
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 91262d2..0e5ed87 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1404,7 +1404,7 @@ void __init early_paging_init(const struct machine_desc *mdesc,
 	phys_addr_t phys;
 	int i;
 
-	if (!(mdesc->init_meminfo))
+	if (!mdesc->pv_fixup)
 		return;
 
 	/* remap kernel code and data */
@@ -1420,7 +1420,7 @@ void __init early_paging_init(const struct machine_desc *mdesc,
 	pudk = pud_offset(pgdk, map_start);
 	pmdk = pmd_offset(pudk, map_start);
 
-	offset = mdesc->init_meminfo();
+	offset = mdesc->pv_fixup();
 	if (offset == 0)
 		return;
 
@@ -1514,10 +1514,10 @@ void __init early_paging_init(const struct machine_desc *mdesc,
 {
 	long long offset;
 
-	if (!mdesc->init_meminfo)
+	if (!mdesc->pv_fixup)
 		return;
 
-	offset = mdesc->init_meminfo();
+	offset = mdesc->pv_fixup();
 	if (offset == 0)
 		return;
 
-- 
cgit v1.1


From d8dc7fbd53eeb329a1dda5a19df7058b9c1c413e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 4 Apr 2015 16:58:38 +0100
Subject: ARM: re-implement physical address space switching

Re-implement the physical address space switching to be architecturally
compliant.  This involves flushing the caches, disabling the MMU, and
only then updating the page tables.  Once that is complete, the system
can be brought back up again.

Since we disable the MMU, we need to do the update in assembly code.
Luckily, the entries which need updating are fairly trivial, and are
all setup by the early assembly code.  We can merely adjust each entry
by the delta required.

Not only does this fix the code to be architecturally compliant, but it
fixes a couple of bugs too:

1. The original code would only ever update the first L2 entry covering
   a fraction of the kernel; the remainder were left untouched.
2. The L2 entries covering the DTB blob were likewise untouched.

This solution fixes up all entries.

Tested-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/Kconfig        |   4 ++
 arch/arm/mm/Makefile       |   1 +
 arch/arm/mm/mmu.c          | 124 +++++++++++++++------------------------------
 arch/arm/mm/pv-fixup-asm.S |  88 ++++++++++++++++++++++++++++++++
 4 files changed, 133 insertions(+), 84 deletions(-)
 create mode 100644 arch/arm/mm/pv-fixup-asm.S

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index b4f92b9..4dc661e 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -624,6 +624,10 @@ config ARM_LPAE
 
 	  If unsure, say N.
 
+config ARM_PV_FIXUP
+	def_bool y
+	depends on ARM_LPAE && ARM_PATCH_PHYS_VIRT && ARCH_KEYSTONE
+
 config ARCH_PHYS_ADDR_T_64BIT
 	def_bool ARM_LPAE
 
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index d3afdf9..4cc1ec9 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_MODULES)		+= proc-syms.o
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
 obj-$(CONFIG_HIGHMEM)		+= highmem.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_ARM_PV_FIXUP)	+= pv-fixup-asm.o
 
 obj-$(CONFIG_CPU_ABRT_NOMMU)	+= abort-nommu.o
 obj-$(CONFIG_CPU_ABRT_EV4)	+= abort-ev4.o
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 0e5ed87..a5d3180 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1387,7 +1387,11 @@ static void __init map_lowmem(void)
 	}
 }
 
-#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_ARM_PATCH_PHYS_VIRT)
+#ifdef CONFIG_ARM_PV_FIXUP
+extern unsigned long __atags_pointer;
+typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata);
+pgtables_remap lpae_pgtables_remap_asm;
+
 /*
  * early_paging_init() recreates boot time page table setup, allowing machines
  * to switch over to a high (>4G) address space on LPAE systems
@@ -1395,35 +1399,30 @@ static void __init map_lowmem(void)
 void __init early_paging_init(const struct machine_desc *mdesc,
 			      struct proc_info_list *procinfo)
 {
-	pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags;
-	unsigned long map_start, map_end;
+	pgtables_remap *lpae_pgtables_remap;
+	unsigned long pa_pgd;
+	unsigned int cr, ttbcr;
 	long long offset;
-	pgd_t *pgd0, *pgdk;
-	pud_t *pud0, *pudk, *pud_start;
-	pmd_t *pmd0, *pmdk;
-	phys_addr_t phys;
-	int i;
+	void *boot_data;
 
 	if (!mdesc->pv_fixup)
 		return;
 
-	/* remap kernel code and data */
-	map_start = init_mm.start_code & PMD_MASK;
-	map_end   = ALIGN(init_mm.brk, PMD_SIZE);
-
-	/* get a handle on things... */
-	pgd0 = pgd_offset_k(0);
-	pud_start = pud0 = pud_offset(pgd0, 0);
-	pmd0 = pmd_offset(pud0, 0);
-
-	pgdk = pgd_offset_k(map_start);
-	pudk = pud_offset(pgdk, map_start);
-	pmdk = pmd_offset(pudk, map_start);
-
 	offset = mdesc->pv_fixup();
 	if (offset == 0)
 		return;
 
+	/*
+	 * Get the address of the remap function in the 1:1 identity
+	 * mapping setup by the early page table assembly code.  We
+	 * must get this prior to the pv update.  The following barrier
+	 * ensures that this is complete before we fixup any P:V offsets.
+	 */
+	lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm);
+	pa_pgd = __pa(swapper_pg_dir);
+	boot_data = __va(__atags_pointer);
+	barrier();
+
 	pr_info("Switching physical address space to 0x%08llx\n",
 		(u64)PHYS_OFFSET + offset);
 
@@ -1436,75 +1435,32 @@ void __init early_paging_init(const struct machine_desc *mdesc,
 		(&__pv_table_end - &__pv_table_begin) << 2);
 
 	/*
-	 * Cache cleaning operations for self-modifying code
-	 * We should clean the entries by MVA but running a
-	 * for loop over every pv_table entry pointer would
-	 * just complicate the code.
-	 */
-	flush_cache_louis();
-	dsb(ishst);
-	isb();
-
-	/*
-	 * FIXME: This code is not architecturally compliant: we modify
-	 * the mappings in-place, indeed while they are in use by this
-	 * very same code.  This may lead to unpredictable behaviour of
-	 * the CPU.
-	 *
-	 * Even modifying the mappings in a separate page table does
-	 * not resolve this.
-	 *
-	 * The architecture strongly recommends that when a mapping is
-	 * changed, that it is changed by first going via an invalid
-	 * mapping and back to the new mapping.  This is to ensure that
-	 * no TLB conflicts (caused by the TLB having more than one TLB
-	 * entry match a translation) can occur.  However, doing that
-	 * here will result in unmapping the code we are running.
-	 */
-	pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n");
-	add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
-
-	/*
-	 * Remap level 1 table.  This changes the physical addresses
-	 * used to refer to the level 2 page tables to the high
-	 * physical address alias, leaving everything else the same.
-	 */
-	for (i = 0; i < PTRS_PER_PGD; pud0++, i++) {
-		set_pud(pud0,
-			__pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER));
-		pmd0 += PTRS_PER_PMD;
-	}
-
-	/*
-	 * Remap the level 2 table, pointing the mappings at the high
-	 * physical address alias of these pages.
-	 */
-	phys = __pa(map_start);
-	do {
-		*pmdk++ = __pmd(phys | pmdprot);
-		phys += PMD_SIZE;
-	} while (phys < map_end);
-
-	/*
-	 * Ensure that the above updates are flushed out of the cache.
-	 * This is not strictly correct; on a system where the caches
-	 * are coherent with each other, but the MMU page table walks
-	 * may not be coherent, flush_cache_all() may be a no-op, and
-	 * this will fail.
+	 * We changing not only the virtual to physical mapping, but also
+	 * the physical addresses used to access memory.  We need to flush
+	 * all levels of cache in the system with caching disabled to
+	 * ensure that all data is written back, and nothing is prefetched
+	 * into the caches.  We also need to prevent the TLB walkers
+	 * allocating into the caches too.  Note that this is ARMv7 LPAE
+	 * specific.
 	 */
+	cr = get_cr();
+	set_cr(cr & ~(CR_I | CR_C));
+	asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr));
+	asm volatile("mcr p15, 0, %0, c2, c0, 2"
+		: : "r" (ttbcr & ~(3 << 8 | 3 << 10)));
 	flush_cache_all();
 
 	/*
-	 * Re-write the TTBR values to point them at the high physical
-	 * alias of the page tables.  We expect __va() will work on
-	 * cpu_get_pgd(), which returns the value of TTBR0.
+	 * Fixup the page tables - this must be in the idmap region as
+	 * we need to disable the MMU to do this safely, and hence it
+	 * needs to be assembly.  It's fairly simple, as we're using the
+	 * temporary tables setup by the initial assembly code.
 	 */
-	cpu_switch_mm(pgd0, &init_mm);
-	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
+	lpae_pgtables_remap(offset, pa_pgd, boot_data);
 
-	/* Finally flush any stale TLB values. */
-	local_flush_bp_all();
-	local_flush_tlb_all();
+	/* Re-enable the caches and cacheable TLB walks */
+	asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr));
+	set_cr(cr);
 }
 
 #else
diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S
new file mode 100644
index 0000000..1867f3e4
--- /dev/null
+++ b/arch/arm/mm/pv-fixup-asm.S
@@ -0,0 +1,88 @@
+/*
+ *  Copyright (C) 2015 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This assembly is required to safely remap the physical address space
+ * for Keystone 2
+ */
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/cp15.h>
+#include <asm/memory.h>
+#include <asm/pgtable.h>
+
+	.section ".idmap.text", "ax"
+
+#define L1_ORDER 3
+#define L2_ORDER 3
+
+ENTRY(lpae_pgtables_remap_asm)
+	stmfd	sp!, {r4-r8, lr}
+
+	mrc	p15, 0, r8, c1, c0, 0		@ read control reg
+	bic	ip, r8, #CR_M			@ disable caches and MMU
+	mcr	p15, 0, ip, c1, c0, 0
+	dsb
+	isb
+
+	/* Update level 2 entries covering the kernel */
+	ldr	r6, =(_end - 1)
+	add	r7, r2, #0x1000
+	add	r6, r7, r6, lsr #SECTION_SHIFT - L2_ORDER
+	add	r7, r7, #PAGE_OFFSET >> (SECTION_SHIFT - L2_ORDER)
+1:	ldrd	r4, [r7]
+	adds	r4, r4, r0
+	adc	r5, r5, r1
+	strd	r4, [r7], #1 << L2_ORDER
+	cmp	r7, r6
+	bls	1b
+
+	/* Update level 2 entries for the boot data */
+	add	r7, r2, #0x1000
+	add	r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER
+	bic	r7, r7, #(1 << L2_ORDER) - 1
+	ldrd	r4, [r7]
+	adds	r4, r4, r0
+	adc	r5, r5, r1
+	strd	r4, [r7], #1 << L2_ORDER
+	ldrd	r4, [r7]
+	adds	r4, r4, r0
+	adc	r5, r5, r1
+	strd	r4, [r7]
+
+	/* Update level 1 entries */
+	mov	r6, #4
+	mov	r7, r2
+2:	ldrd	r4, [r7]
+	adds	r4, r4, r0
+	adc	r5, r5, r1
+	strd	r4, [r7], #1 << L1_ORDER
+	subs	r6, r6, #1
+	bne	2b
+
+	mrrc	p15, 0, r4, r5, c2		@ read TTBR0
+	adds	r4, r4, r0			@ update physical address
+	adc	r5, r5, r1
+	mcrr	p15, 0, r4, r5, c2		@ write back TTBR0
+	mrrc	p15, 1, r4, r5, c2		@ read TTBR1
+	adds	r4, r4, r0			@ update physical address
+	adc	r5, r5, r1
+	mcrr	p15, 1, r4, r5, c2		@ write back TTBR1
+
+	dsb
+
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c5, 0		@ I+BTB cache invalidate
+	mcr	p15, 0, ip, c8, c7, 0		@ local_flush_tlb_all()
+	dsb
+	isb
+
+	mcr	p15, 0, r8, c1, c0, 0		@ re-enable MMU
+	dsb
+	isb
+
+	ldmfd	sp!, {r4-r8, pc}
+ENDPROC(lpae_pgtables_remap_asm)
-- 
cgit v1.1


From 1221ed10f2a56ecdd8ff75f436f52aca5ba0f1d3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 4 Apr 2015 17:25:20 +0100
Subject: ARM: cleanup early_paging_init() calling

Eliminate the needless nommu version of this function, and get rid of
the proc_info_list structure argument - we no longer need this in order
to fix up the page table entries.

Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
Tested-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/setup.c | 7 ++++---
 arch/arm/mm/mmu.c       | 6 ++----
 arch/arm/mm/nommu.c     | 9 ---------
 3 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 6c777e9..979c1c5 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -75,8 +75,7 @@ __setup("fpe=", fpe_setup);
 
 extern void init_default_cache_policy(unsigned long);
 extern void paging_init(const struct machine_desc *desc);
-extern void early_paging_init(const struct machine_desc *,
-			      struct proc_info_list *);
+extern void early_paging_init(const struct machine_desc *);
 extern void sanity_check_meminfo(void);
 extern enum reboot_mode reboot_mode;
 extern void setup_dma_zone(const struct machine_desc *desc);
@@ -936,7 +935,9 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_early_param();
 
-	early_paging_init(mdesc, lookup_processor_type(read_cpuid_id()));
+#ifdef CONFIG_MMU
+	early_paging_init(mdesc);
+#endif
 	setup_dma_zone(mdesc);
 	sanity_check_meminfo();
 	arm_memblock_init(mdesc);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index a5d3180..5aefa95 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1396,8 +1396,7 @@ pgtables_remap lpae_pgtables_remap_asm;
  * early_paging_init() recreates boot time page table setup, allowing machines
  * to switch over to a high (>4G) address space on LPAE systems
  */
-void __init early_paging_init(const struct machine_desc *mdesc,
-			      struct proc_info_list *procinfo)
+void __init early_paging_init(const struct machine_desc *mdesc)
 {
 	pgtables_remap *lpae_pgtables_remap;
 	unsigned long pa_pgd;
@@ -1465,8 +1464,7 @@ void __init early_paging_init(const struct machine_desc *mdesc,
 
 #else
 
-void __init early_paging_init(const struct machine_desc *mdesc,
-			      struct proc_info_list *procinfo)
+void __init early_paging_init(const struct machine_desc *mdesc)
 {
 	long long offset;
 
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index a014dfa..afd7e05 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -304,15 +304,6 @@ void __init sanity_check_meminfo(void)
 }
 
 /*
- * early_paging_init() recreates boot time page table setup, allowing machines
- * to switch over to a high (>4G) address space on LPAE systems
- */
-void __init early_paging_init(const struct machine_desc *mdesc,
-			      struct proc_info_list *procinfo)
-{
-}
-
-/*
  * paging_init() sets up the page tables, initialises the zone memory
  * maps, and sets up the zero page, bad page and bad page tables.
  */
-- 
cgit v1.1


From b2c3e38a54714e917c9e8675ff5812dca1c0f39d Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 4 Apr 2015 20:09:46 +0100
Subject: ARM: redo TTBR setup code for LPAE

Re-engineer the LPAE TTBR setup code.  Rather than passing some shifted
address in order to fit in a CPU register, pass either a full physical
address (in the case of r4, r5 for TTBR0) or a PFN (for TTBR1).

This removes the ARCH_PGD_SHIFT hack, and the last dangerous user of
cpu_set_ttbr() in the secondary CPU startup code path (which was there
to re-set TTBR1 to the appropriate high physical address space on
Keystone2.)

Tested-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/memory.h    | 16 ---------------
 arch/arm/include/asm/proc-fns.h  |  7 -------
 arch/arm/include/asm/smp.h       |  2 +-
 arch/arm/kernel/head-nommu.S     |  2 +-
 arch/arm/kernel/head.S           | 42 +++++++++++++++++++++++++++++-----------
 arch/arm/kernel/smp.c            | 10 ++++++----
 arch/arm/mach-keystone/platsmp.c | 13 -------------
 arch/arm/mm/proc-v7-2level.S     |  6 +++---
 arch/arm/mm/proc-v7-3level.S     | 14 +++++---------
 arch/arm/mm/proc-v7.S            | 26 ++++++++++++-------------
 10 files changed, 60 insertions(+), 78 deletions(-)

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 184def0..3a72d69 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -18,8 +18,6 @@
 #include <linux/types.h>
 #include <linux/sizes.h>
 
-#include <asm/cache.h>
-
 #ifdef CONFIG_NEED_MACH_MEMORY_H
 #include <mach/memory.h>
 #endif
@@ -133,20 +131,6 @@
 #define phys_to_page(phys)	(pfn_to_page(__phys_to_pfn(phys)))
 
 /*
- * Minimum guaranted alignment in pgd_alloc().  The page table pointers passed
- * around in head.S and proc-*.S are shifted by this amount, in order to
- * leave spare high bits for systems with physical address extension.  This
- * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but
- * gives us about 38-bits or so.
- */
-#ifdef CONFIG_ARM_LPAE
-#define ARCH_PGD_SHIFT		L1_CACHE_SHIFT
-#else
-#define ARCH_PGD_SHIFT		0
-#endif
-#define ARCH_PGD_MASK		((1 << ARCH_PGD_SHIFT) - 1)
-
-/*
  * PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical
  * memory.  This is used for XIP and NoMMU kernels, and on platforms that don't
  * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 5324c11..8877ad5 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -125,13 +125,6 @@ extern void cpu_resume(void);
 		ttbr;						\
 	})
 
-#define cpu_set_ttbr(nr, val)					\
-	do {							\
-		u64 ttbr = val;					\
-		__asm__("mcrr	p15, " #nr ", %Q0, %R0, c2"	\
-			: : "r" (ttbr));			\
-	} while (0)
-
 #define cpu_get_pgd()	\
 	({						\
 		u64 pg = cpu_get_ttbr(0);		\
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 18f5a55..487aa08 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -61,7 +61,7 @@ asmlinkage void secondary_start_kernel(void);
 struct secondary_data {
 	union {
 		unsigned long mpu_rgn_szr;
-		unsigned long pgdir;
+		u64 pgdir;
 	};
 	unsigned long swapper_pg_dir;
 	void *stack;
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index aebfbf7..84da14b 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -123,7 +123,7 @@ ENTRY(secondary_startup)
 ENDPROC(secondary_startup)
 
 ENTRY(__secondary_switched)
-	ldr	sp, [r7, #8]			@ set up the stack pointer
+	ldr	sp, [r7, #12]			@ set up the stack pointer
 	mov	fp, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 3637973..7304b4c 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -131,13 +131,30 @@ ENTRY(stext)
 	 * The following calls CPU specific code in a position independent
 	 * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
 	 * xxx_proc_info structure selected by __lookup_processor_type
-	 * above.  On return, the CPU will be ready for the MMU to be
-	 * turned on, and r0 will hold the CPU control register value.
+	 * above.
+	 *
+	 * The processor init function will be called with:
+	 *  r1 - machine type
+	 *  r2 - boot data (atags/dt) pointer
+	 *  r4 - translation table base (low word)
+	 *  r5 - translation table base (high word, if LPAE)
+	 *  r8 - translation table base 1 (pfn if LPAE)
+	 *  r9 - cpuid
+	 *  r13 - virtual address for __enable_mmu -> __turn_mmu_on
+	 *
+	 * On return, the CPU will be ready for the MMU to be turned on,
+	 * r0 will hold the CPU control register value, r1, r2, r4, and
+	 * r9 will be preserved.  r5 will also be preserved if LPAE.
 	 */
 	ldr	r13, =__mmap_switched		@ address to jump to after
 						@ mmu has been enabled
 	adr	lr, BSYM(1f)			@ return (PIC) address
+#ifdef CONFIG_ARM_LPAE
+	mov	r5, #0				@ high TTBR0
+	mov	r8, r4, lsr #12			@ TTBR1 is swapper_pg_dir pfn
+#else
 	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
+#endif
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10
 	ret	r12
@@ -158,7 +175,7 @@ ENDPROC(stext)
  *
  * Returns:
  *  r0, r3, r5-r7 corrupted
- *  r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
+ *  r4 = physical page table address
  */
 __create_page_tables:
 	pgtbl	r4, r8				@ page table address
@@ -333,7 +350,6 @@ __create_page_tables:
 #endif
 #ifdef CONFIG_ARM_LPAE
 	sub	r4, r4, #0x1000		@ point to the PGD table
-	mov	r4, r4, lsr #ARCH_PGD_SHIFT
 #endif
 	ret	lr
 ENDPROC(__create_page_tables)
@@ -381,9 +397,9 @@ ENTRY(secondary_startup)
 	adr	r4, __secondary_data
 	ldmia	r4, {r5, r7, r12}		@ address to jump to after
 	sub	lr, r4, r5			@ mmu has been enabled
-	ldr	r4, [r7, lr]			@ get secondary_data.pgdir
-	add	r7, r7, #4
-	ldr	r8, [r7, lr]			@ get secondary_data.swapper_pg_dir
+	add	r3, r7, lr
+	ldrd	r4, [r3, #0]			@ get secondary_data.pgdir
+	ldr	r8, [r3, #8]			@ get secondary_data.swapper_pg_dir
 	adr	lr, BSYM(__enable_mmu)		@ return address
 	mov	r13, r12			@ __secondary_switched address
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
@@ -397,7 +413,7 @@ ENDPROC(secondary_startup_arm)
 	 * r6  = &secondary_data
 	 */
 ENTRY(__secondary_switched)
-	ldr	sp, [r7, #4]			@ get secondary_data.stack
+	ldr	sp, [r7, #12]			@ get secondary_data.stack
 	mov	fp, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
@@ -416,12 +432,14 @@ __secondary_data:
 /*
  * Setup common bits before finally enabling the MMU.  Essentially
  * this is just loading the page table pointer and domain access
- * registers.
+ * registers.  All these registers need to be preserved by the
+ * processor setup function (or set in the case of r0)
  *
  *  r0  = cp#15 control register
  *  r1  = machine ID
  *  r2  = atags or dtb pointer
- *  r4  = page table (see ARCH_PGD_SHIFT in asm/memory.h)
+ *  r4  = TTBR pointer (low word)
+ *  r5  = TTBR pointer (high word if LPAE)
  *  r9  = processor ID
  *  r13 = *virtual* address to jump to upon completion
  */
@@ -440,7 +458,9 @@ __enable_mmu:
 #ifdef CONFIG_CPU_ICACHE_DISABLE
 	bic	r0, r0, #CR_I
 #endif
-#ifndef CONFIG_ARM_LPAE
+#ifdef CONFIG_ARM_LPAE
+	mcrr	p15, 0, r4, r5, c2		@ load TTBR0
+#else
 	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index cca5b87..90dfbed 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -86,9 +86,11 @@ void __init smp_set_ops(struct smp_operations *ops)
 
 static unsigned long get_arch_pgd(pgd_t *pgd)
 {
-	phys_addr_t pgdir = virt_to_idmap(pgd);
-	BUG_ON(pgdir & ARCH_PGD_MASK);
-	return pgdir >> ARCH_PGD_SHIFT;
+#ifdef CONFIG_ARM_LPAE
+	return __phys_to_pfn(virt_to_phys(pgd));
+#else
+	return virt_to_phys(pgd);
+#endif
 }
 
 int __cpu_up(unsigned int cpu, struct task_struct *idle)
@@ -108,7 +110,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 #endif
 
 #ifdef CONFIG_MMU
-	secondary_data.pgdir = get_arch_pgd(idmap_pgd);
+	secondary_data.pgdir = virt_to_phys(idmap_pgd);
 	secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
 #endif
 	sync_cache_w(&secondary_data);
diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c
index 5f46a7c..4bbb184 100644
--- a/arch/arm/mach-keystone/platsmp.c
+++ b/arch/arm/mach-keystone/platsmp.c
@@ -39,19 +39,6 @@ static int keystone_smp_boot_secondary(unsigned int cpu,
 	return error;
 }
 
-#ifdef CONFIG_ARM_LPAE
-static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
-{
-	pgd_t *pgd0 = pgd_offset_k(0);
-	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
-	local_flush_tlb_all();
-}
-#else
-static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
-{}
-#endif
-
 struct smp_operations keystone_smp_ops __initdata = {
 	.smp_boot_secondary	= keystone_smp_boot_secondary,
-	.smp_secondary_init     = keystone_smp_secondary_initmem,
 };
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index 10405b8..fa38514 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -148,10 +148,10 @@ ENDPROC(cpu_v7_set_pte_ext)
 	 * Macro for setting up the TTBRx and TTBCR registers.
 	 * - \ttb0 and \ttb1 updated with the corresponding flags.
 	 */
-	.macro	v7_ttb_setup, zero, ttbr0, ttbr1, tmp
+	.macro	v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
 	mcr	p15, 0, \zero, c2, c0, 2	@ TTB control register
-	ALT_SMP(orr	\ttbr0, \ttbr0, #TTB_FLAGS_SMP)
-	ALT_UP(orr	\ttbr0, \ttbr0, #TTB_FLAGS_UP)
+	ALT_SMP(orr	\ttbr0l, \ttbr0l, #TTB_FLAGS_SMP)
+	ALT_UP(orr	\ttbr0l, \ttbr0l, #TTB_FLAGS_UP)
 	ALT_SMP(orr	\ttbr1, \ttbr1, #TTB_FLAGS_SMP)
 	ALT_UP(orr	\ttbr1, \ttbr1, #TTB_FLAGS_UP)
 	mcr	p15, 0, \ttbr1, c2, c0, 1	@ load TTB1
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index d3daed0..5e5720e 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -126,11 +126,10 @@ ENDPROC(cpu_v7_set_pte_ext)
 	 * Macro for setting up the TTBRx and TTBCR registers.
 	 * - \ttbr1 updated.
 	 */
-	.macro	v7_ttb_setup, zero, ttbr0, ttbr1, tmp
+	.macro	v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
 	ldr	\tmp, =swapper_pg_dir		@ swapper_pg_dir virtual address
-	mov	\tmp, \tmp, lsr #ARCH_PGD_SHIFT
-	cmp	\ttbr1, \tmp			@ PHYS_OFFSET > PAGE_OFFSET?
-	mrc	p15, 0, \tmp, c2, c0, 2		@ TTB control register
+	cmp	\ttbr1, \tmp, lsr #12		@ PHYS_OFFSET > PAGE_OFFSET?
+	mrc	p15, 0, \tmp, c2, c0, 2		@ TTB control egister
 	orr	\tmp, \tmp, #TTB_EAE
 	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP)
 	ALT_UP(orr	\tmp, \tmp, #TTB_FLAGS_UP)
@@ -143,13 +142,10 @@ ENDPROC(cpu_v7_set_pte_ext)
 	 */
 	orrls	\tmp, \tmp, #TTBR1_SIZE				@ TTBCR.T1SZ
 	mcr	p15, 0, \tmp, c2, c0, 2				@ TTBCR
-	mov	\tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT)	@ upper bits
-	mov	\ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT		@ lower bits
+	mov	\tmp, \ttbr1, lsr #20
+	mov	\ttbr1, \ttbr1, lsl #12
 	addls	\ttbr1, \ttbr1, #TTBR1_OFFSET
 	mcrr	p15, 1, \ttbr1, \tmp, c2			@ load TTBR1
-	mov	\tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT)	@ upper bits
-	mov	\ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT		@ lower bits
-	mcrr	p15, 0, \ttbr0, \tmp, c2			@ load TTBR0
 	.endm
 
 	/*
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 3d1054f..8732309 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -343,9 +343,9 @@ __v7_setup:
 	and	r10, r0, #0xff000000		@ ARM?
 	teq	r10, #0x41000000
 	bne	3f
-	and	r5, r0, #0x00f00000		@ variant
+	and	r3, r0, #0x00f00000		@ variant
 	and	r6, r0, #0x0000000f		@ revision
-	orr	r6, r6, r5, lsr #20-4		@ combine variant and revision
+	orr	r6, r6, r3, lsr #20-4		@ combine variant and revision
 	ubfx	r0, r0, #4, #12			@ primary part number
 
 	/* Cortex-A8 Errata */
@@ -354,7 +354,7 @@ __v7_setup:
 	bne	2f
 #if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)
 
-	teq	r5, #0x00100000			@ only present in r1p*
+	teq	r3, #0x00100000			@ only present in r1p*
 	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
 	orreq	r10, r10, #(1 << 6)		@ set IBE to 1
 	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
@@ -395,7 +395,7 @@ __v7_setup:
 	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
 #endif
 #ifdef CONFIG_ARM_ERRATA_743622
-	teq	r5, #0x00200000			@ only present in r2p*
+	teq	r3, #0x00200000			@ only present in r2p*
 	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
 	orreq	r10, r10, #1 << 6		@ set bit #6
 	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
@@ -425,10 +425,10 @@ __v7_setup:
 	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
 #ifdef CONFIG_MMU
 	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs
-	v7_ttb_setup r10, r4, r8, r5		@ TTBCR, TTBRx setup
-	ldr	r5, =PRRR			@ PRRR
+	v7_ttb_setup r10, r4, r5, r8, r3	@ TTBCR, TTBRx setup
+	ldr	r3, =PRRR			@ PRRR
 	ldr	r6, =NMRR			@ NMRR
-	mcr	p15, 0, r5, c10, c2, 0		@ write PRRR
+	mcr	p15, 0, r3, c10, c2, 0		@ write PRRR
 	mcr	p15, 0, r6, c10, c2, 1		@ write NMRR
 #endif
 	dsb					@ Complete invalidations
@@ -437,22 +437,22 @@ __v7_setup:
 	and	r0, r0, #(0xf << 12)		@ ThumbEE enabled field
 	teq	r0, #(1 << 12)			@ check if ThumbEE is present
 	bne	1f
-	mov	r5, #0
-	mcr	p14, 6, r5, c1, c0, 0		@ Initialize TEEHBR to 0
+	mov	r3, #0
+	mcr	p14, 6, r3, c1, c0, 0		@ Initialize TEEHBR to 0
 	mrc	p14, 6, r0, c0, c0, 0		@ load TEECR
 	orr	r0, r0, #1			@ set the 1st bit in order to
 	mcr	p14, 6, r0, c0, c0, 0		@ stop userspace TEEHBR access
 1:
 #endif
-	adr	r5, v7_crval
-	ldmia	r5, {r5, r6}
+	adr	r3, v7_crval
+	ldmia	r3, {r3, r6}
  ARM_BE8(orr	r6, r6, #1 << 25)		@ big-endian page tables
 #ifdef CONFIG_SWP_EMULATE
-	orr     r5, r5, #(1 << 10)              @ set SW bit in "clear"
+	orr     r3, r3, #(1 << 10)              @ set SW bit in "clear"
 	bic     r6, r6, #(1 << 10)              @ clear it in "mmuset"
 #endif
    	mrc	p15, 0, r0, c1, c0, 0		@ read control register
-	bic	r0, r0, r5			@ clear bits them
+	bic	r0, r0, r3			@ clear bits them
 	orr	r0, r0, r6			@ set them
  THUMB(	orr	r0, r0, #1 << 30	)	@ Thumb exceptions
 	ret	lr				@ return to head.S:__ret
-- 
cgit v1.1


From 17e7bf86690eaad4906d2295f0bd171cc194633b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 4 Apr 2015 21:34:33 +0100
Subject: ARM: proc-v7: move CPU errata out of line

Rather than having a long sprawling __v7_setup function, which is hard
to maintain properly, move the CPU errata out of line.

While doing this, it was discovered that the Cortex-A15 errata had been
incorrectly added:

	ldr	r10, =0x00000c08	@ Cortex-A8 primary part number
	teq	r0, r10
	bne	2f
/* Cortex-A8 errata */
	b	3f
2:	ldr	r10, =0x00000c09	@ Cortex-A9 primary part number
	teq	r0, r10
	bne	3f
/* Cortex-A9 errata */
3:	ldr	r10, =0x00000c0f	@ Cortex-A15 primary part number
	teq	r0, r10
	bne	4f
/* Cortex-A15 errata */
4:

This results in the Cortex-A15 test always being executed after the
Cortex-A8 and Cortex-A9 errata, which is obviously not what is intended.
The 'b 3f' labels should have been updated to 'b 4f'.  The new structure
of:

	/* Cortex-A8 Errata */
	ldr	r10, =0x00000c08	@ Cortex-A8 primary part number
	teq	r0, r10
	beq	__ca8_errata

	/* Cortex-A9 Errata */
	ldr	r10, =0x00000c09	@ Cortex-A9 primary part number
	teq	r0, r10
	beq	__ca9_errata

	/* Cortex-A15 Errata */
	ldr	r10, =0x00000c0f	@ Cortex-A15 primary part number
	teq	r0, r10
	beq	__ca15_errata

__errata_finish:

is much cleaner and easier to see that this kind of thing doesn't
happen.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-v7.S | 143 +++++++++++++++++++++++++++-----------------------
 1 file changed, 78 insertions(+), 65 deletions(-)

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 8732309..48031ee 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -252,6 +252,12 @@ ENDPROC(cpu_pj4b_do_resume)
  *	Initialise TLB, Caches, and MMU state ready to switch the MMU
  *	on.  Return in r0 the new CP15 C1 control register setting.
  *
+ *	r1, r2, r4, r5, r9 must be preserved.
+ *	r4: TTBR0 (low word)
+ *	r5: TTBR0 (high word if LPAE)
+ *	r8: TTBR1
+ *	r9: Main ID register
+ *
  *	This should be able to cover all ARMv7 cores.
  *
  *	It is assumed that:
@@ -279,6 +285,70 @@ __v7_ca17mp_setup:
 #endif
 	b	__v7_setup
 
+__ca8_errata:
+#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)
+	teq	r3, #0x00100000			@ only present in r1p*
+	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
+	orreq	r10, r10, #(1 << 6)		@ set IBE to 1
+	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
+#endif
+#ifdef CONFIG_ARM_ERRATA_458693
+	teq	r6, #0x20			@ only present in r2p0
+	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
+	orreq	r10, r10, #(1 << 5)		@ set L1NEON to 1
+	orreq	r10, r10, #(1 << 9)		@ set PLDNOP to 1
+	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
+#endif
+#ifdef CONFIG_ARM_ERRATA_460075
+	teq	r6, #0x20			@ only present in r2p0
+	mrceq	p15, 1, r10, c9, c0, 2		@ read L2 cache aux ctrl register
+	tsteq	r10, #1 << 22
+	orreq	r10, r10, #(1 << 22)		@ set the Write Allocate disable bit
+	mcreq	p15, 1, r10, c9, c0, 2		@ write the L2 cache aux ctrl register
+#endif
+	b	__errata_finish
+
+__ca9_errata:
+#ifdef CONFIG_ARM_ERRATA_742230
+	cmp	r6, #0x22			@ only present up to r2p2
+	mrcle	p15, 0, r10, c15, c0, 1		@ read diagnostic register
+	orrle	r10, r10, #1 << 4		@ set bit #4
+	mcrle	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+#endif
+#ifdef CONFIG_ARM_ERRATA_742231
+	teq	r6, #0x20			@ present in r2p0
+	teqne	r6, #0x21			@ present in r2p1
+	teqne	r6, #0x22			@ present in r2p2
+	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
+	orreq	r10, r10, #1 << 12		@ set bit #12
+	orreq	r10, r10, #1 << 22		@ set bit #22
+	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+#endif
+#ifdef CONFIG_ARM_ERRATA_743622
+	teq	r3, #0x00200000			@ only present in r2p*
+	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
+	orreq	r10, r10, #1 << 6		@ set bit #6
+	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+#endif
+#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)
+	ALT_SMP(cmp r6, #0x30)			@ present prior to r3p0
+	ALT_UP_B(1f)
+	mrclt	p15, 0, r10, c15, c0, 1		@ read diagnostic register
+	orrlt	r10, r10, #1 << 11		@ set bit #11
+	mcrlt	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+1:
+#endif
+	b	__errata_finish
+
+__ca15_errata:
+#ifdef CONFIG_ARM_ERRATA_773022
+	cmp	r6, #0x4			@ only present up to r0p4
+	mrcle	p15, 0, r10, c1, c0, 1		@ read aux control register
+	orrle	r10, r10, #1 << 1		@ disable loop buffer
+	mcrle	p15, 0, r10, c1, c0, 1		@ write aux control register
+#endif
+	b	__errata_finish
+
 __v7_pj4b_setup:
 #ifdef CONFIG_CPU_PJ4B
 
@@ -342,7 +412,7 @@ __v7_setup:
 	mrc	p15, 0, r0, c0, c0, 0		@ read main ID register
 	and	r10, r0, #0xff000000		@ ARM?
 	teq	r10, #0x41000000
-	bne	3f
+	bne	__errata_finish
 	and	r3, r0, #0x00f00000		@ variant
 	and	r6, r0, #0x0000000f		@ revision
 	orr	r6, r6, r3, lsr #20-4		@ combine variant and revision
@@ -351,77 +421,20 @@ __v7_setup:
 	/* Cortex-A8 Errata */
 	ldr	r10, =0x00000c08		@ Cortex-A8 primary part number
 	teq	r0, r10
-	bne	2f
-#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)
-
-	teq	r3, #0x00100000			@ only present in r1p*
-	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
-	orreq	r10, r10, #(1 << 6)		@ set IBE to 1
-	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
-#endif
-#ifdef CONFIG_ARM_ERRATA_458693
-	teq	r6, #0x20			@ only present in r2p0
-	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
-	orreq	r10, r10, #(1 << 5)		@ set L1NEON to 1
-	orreq	r10, r10, #(1 << 9)		@ set PLDNOP to 1
-	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
-#endif
-#ifdef CONFIG_ARM_ERRATA_460075
-	teq	r6, #0x20			@ only present in r2p0
-	mrceq	p15, 1, r10, c9, c0, 2		@ read L2 cache aux ctrl register
-	tsteq	r10, #1 << 22
-	orreq	r10, r10, #(1 << 22)		@ set the Write Allocate disable bit
-	mcreq	p15, 1, r10, c9, c0, 2		@ write the L2 cache aux ctrl register
-#endif
-	b	3f
+	beq	__ca8_errata
 
 	/* Cortex-A9 Errata */
-2:	ldr	r10, =0x00000c09		@ Cortex-A9 primary part number
+	ldr	r10, =0x00000c09		@ Cortex-A9 primary part number
 	teq	r0, r10
-	bne	3f
-#ifdef CONFIG_ARM_ERRATA_742230
-	cmp	r6, #0x22			@ only present up to r2p2
-	mrcle	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orrle	r10, r10, #1 << 4		@ set bit #4
-	mcrle	p15, 0, r10, c15, c0, 1		@ write diagnostic register
-#endif
-#ifdef CONFIG_ARM_ERRATA_742231
-	teq	r6, #0x20			@ present in r2p0
-	teqne	r6, #0x21			@ present in r2p1
-	teqne	r6, #0x22			@ present in r2p2
-	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orreq	r10, r10, #1 << 12		@ set bit #12
-	orreq	r10, r10, #1 << 22		@ set bit #22
-	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
-#endif
-#ifdef CONFIG_ARM_ERRATA_743622
-	teq	r3, #0x00200000			@ only present in r2p*
-	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orreq	r10, r10, #1 << 6		@ set bit #6
-	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
-#endif
-#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)
-	ALT_SMP(cmp r6, #0x30)			@ present prior to r3p0
-	ALT_UP_B(1f)
-	mrclt	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orrlt	r10, r10, #1 << 11		@ set bit #11
-	mcrlt	p15, 0, r10, c15, c0, 1		@ write diagnostic register
-1:
-#endif
+	beq	__ca9_errata
 
 	/* Cortex-A15 Errata */
-3:	ldr	r10, =0x00000c0f		@ Cortex-A15 primary part number
+	ldr	r10, =0x00000c0f		@ Cortex-A15 primary part number
 	teq	r0, r10
-	bne	4f
-
-#ifdef CONFIG_ARM_ERRATA_773022
-	cmp	r6, #0x4			@ only present up to r0p4
-	mrcle	p15, 0, r10, c1, c0, 1		@ read aux control register
-	orrle	r10, r10, #1 << 1		@ disable loop buffer
-	mcrle	p15, 0, r10, c1, c0, 1		@ write aux control register
-#endif
+	beq	__ca15_errata
 
-4:	mov	r10, #0
+__errata_finish:
+	mov	r10, #0
 	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
 #ifdef CONFIG_MMU
 	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs
-- 
cgit v1.1


From 4419496884ed16fdeb122782d41f5ad38f8923cc Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 4 Apr 2015 21:36:35 +0100
Subject: ARM: proc-v7: clean up MIDR access

We already have the main ID register available in r9, there's no need
to refetch it.  Use the saved value.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-v7.S | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 48031ee..19c1c8b 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -409,14 +409,13 @@ __v7_setup:
 	bl      v7_flush_dcache_louis
 	ldmia	r12, {r0-r5, r7, r9, r11, lr}
 
-	mrc	p15, 0, r0, c0, c0, 0		@ read main ID register
-	and	r10, r0, #0xff000000		@ ARM?
+	and	r10, r9, #0xff000000		@ ARM?
 	teq	r10, #0x41000000
 	bne	__errata_finish
-	and	r3, r0, #0x00f00000		@ variant
-	and	r6, r0, #0x0000000f		@ revision
+	and	r3, r9, #0x00f00000		@ variant
+	and	r6, r9, #0x0000000f		@ revision
 	orr	r6, r6, r3, lsr #20-4		@ combine variant and revision
-	ubfx	r0, r0, #4, #12			@ primary part number
+	ubfx	r0, r9, #4, #12			@ primary part number
 
 	/* Cortex-A8 Errata */
 	ldr	r10, =0x00000c08		@ Cortex-A8 primary part number
-- 
cgit v1.1


From c76f238e261b8d088f086c2e8551fa8402d86f42 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 4 Apr 2015 21:46:35 +0100
Subject: ARM: proc-v7: sanitise and document registers around errata

Document that r13 is not a stack in the initialisation function, in
case anyone gets other ideas.

Document the registers available for the errata workarounds, and
specifically which registers contain parts of the MIDR register, as
well as which registers must be preserved.

Lastly, use the lowest numbered available register (r0) rather than
r10 for temporary storage.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-v7.S | 68 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 38 insertions(+), 30 deletions(-)

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 19c1c8b..003190a 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -252,7 +252,7 @@ ENDPROC(cpu_pj4b_do_resume)
  *	Initialise TLB, Caches, and MMU state ready to switch the MMU
  *	on.  Return in r0 the new CP15 C1 control register setting.
  *
- *	r1, r2, r4, r5, r9 must be preserved.
+ *	r1, r2, r4, r5, r9, r13 must be preserved - r13 is not a stack
  *	r4: TTBR0 (low word)
  *	r5: TTBR0 (high word if LPAE)
  *	r8: TTBR1
@@ -285,57 +285,65 @@ __v7_ca17mp_setup:
 #endif
 	b	__v7_setup
 
+/*
+ * Errata:
+ *  r0, r10 available for use
+ *  r1, r2, r4, r5, r9, r13: must be preserved
+ *  r3: contains MIDR rX number in bits 23-20
+ *  r6: contains MIDR rXpY as 8-bit XY number
+ *  r9: MIDR
+ */
 __ca8_errata:
 #if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)
 	teq	r3, #0x00100000			@ only present in r1p*
-	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
-	orreq	r10, r10, #(1 << 6)		@ set IBE to 1
-	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
+	mrceq	p15, 0, r0, c1, c0, 1		@ read aux control register
+	orreq	r0, r0, #(1 << 6)		@ set IBE to 1
+	mcreq	p15, 0, r0, c1, c0, 1		@ write aux control register
 #endif
 #ifdef CONFIG_ARM_ERRATA_458693
 	teq	r6, #0x20			@ only present in r2p0
-	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
-	orreq	r10, r10, #(1 << 5)		@ set L1NEON to 1
-	orreq	r10, r10, #(1 << 9)		@ set PLDNOP to 1
-	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
+	mrceq	p15, 0, r0, c1, c0, 1		@ read aux control register
+	orreq	r0, r0, #(1 << 5)		@ set L1NEON to 1
+	orreq	r0, r0, #(1 << 9)		@ set PLDNOP to 1
+	mcreq	p15, 0, r0, c1, c0, 1		@ write aux control register
 #endif
 #ifdef CONFIG_ARM_ERRATA_460075
 	teq	r6, #0x20			@ only present in r2p0
-	mrceq	p15, 1, r10, c9, c0, 2		@ read L2 cache aux ctrl register
-	tsteq	r10, #1 << 22
-	orreq	r10, r10, #(1 << 22)		@ set the Write Allocate disable bit
-	mcreq	p15, 1, r10, c9, c0, 2		@ write the L2 cache aux ctrl register
+	mrceq	p15, 1, r0, c9, c0, 2		@ read L2 cache aux ctrl register
+	tsteq	r0, #1 << 22
+	orreq	r0, r0, #(1 << 22)		@ set the Write Allocate disable bit
+	mcreq	p15, 1, r0, c9, c0, 2		@ write the L2 cache aux ctrl register
 #endif
 	b	__errata_finish
 
 __ca9_errata:
 #ifdef CONFIG_ARM_ERRATA_742230
 	cmp	r6, #0x22			@ only present up to r2p2
-	mrcle	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orrle	r10, r10, #1 << 4		@ set bit #4
-	mcrle	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+	mrcle	p15, 0, r0, c15, c0, 1		@ read diagnostic register
+	orrle	r0, r0, #1 << 4			@ set bit #4
+	mcrle	p15, 0, r0, c15, c0, 1		@ write diagnostic register
 #endif
 #ifdef CONFIG_ARM_ERRATA_742231
 	teq	r6, #0x20			@ present in r2p0
 	teqne	r6, #0x21			@ present in r2p1
 	teqne	r6, #0x22			@ present in r2p2
-	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orreq	r10, r10, #1 << 12		@ set bit #12
-	orreq	r10, r10, #1 << 22		@ set bit #22
-	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+	mrceq	p15, 0, r0, c15, c0, 1		@ read diagnostic register
+	orreq	r0, r0, #1 << 12		@ set bit #12
+	orreq	r0, r0, #1 << 22		@ set bit #22
+	mcreq	p15, 0, r0, c15, c0, 1		@ write diagnostic register
 #endif
 #ifdef CONFIG_ARM_ERRATA_743622
 	teq	r3, #0x00200000			@ only present in r2p*
-	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orreq	r10, r10, #1 << 6		@ set bit #6
-	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+	mrceq	p15, 0, r0, c15, c0, 1		@ read diagnostic register
+	orreq	r0, r0, #1 << 6			@ set bit #6
+	mcreq	p15, 0, r0, c15, c0, 1		@ write diagnostic register
 #endif
 #if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)
 	ALT_SMP(cmp r6, #0x30)			@ present prior to r3p0
 	ALT_UP_B(1f)
-	mrclt	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orrlt	r10, r10, #1 << 11		@ set bit #11
-	mcrlt	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+	mrclt	p15, 0, r0, c15, c0, 1		@ read diagnostic register
+	orrlt	r0, r0, #1 << 11		@ set bit #11
+	mcrlt	p15, 0, r0, c15, c0, 1		@ write diagnostic register
 1:
 #endif
 	b	__errata_finish
@@ -343,9 +351,9 @@ __ca9_errata:
 __ca15_errata:
 #ifdef CONFIG_ARM_ERRATA_773022
 	cmp	r6, #0x4			@ only present up to r0p4
-	mrcle	p15, 0, r10, c1, c0, 1		@ read aux control register
-	orrle	r10, r10, #1 << 1		@ disable loop buffer
-	mcrle	p15, 0, r10, c1, c0, 1		@ write aux control register
+	mrcle	p15, 0, r0, c1, c0, 1		@ read aux control register
+	orrle	r0, r0, #1 << 1			@ disable loop buffer
+	mcrle	p15, 0, r0, c1, c0, 1		@ write aux control register
 #endif
 	b	__errata_finish
 
@@ -409,8 +417,8 @@ __v7_setup:
 	bl      v7_flush_dcache_louis
 	ldmia	r12, {r0-r5, r7, r9, r11, lr}
 
-	and	r10, r9, #0xff000000		@ ARM?
-	teq	r10, #0x41000000
+	and	r0, r9, #0xff000000		@ ARM?
+	teq	r0, #0x41000000
 	bne	__errata_finish
 	and	r3, r9, #0x00f00000		@ variant
 	and	r6, r9, #0x0000000f		@ revision
-- 
cgit v1.1


From c07b5fd0268fc2bbaa584548d437f763d20bde77 Mon Sep 17 00:00:00 2001
From: Yingjoe Chen <yingjoe.chen@mediatek.com>
Date: Mon, 18 May 2015 09:04:31 +0100
Subject: ARM: 8359/1: correct secondary_startup_arm mode

secondary_startup_arm is used as ARM mode secondary start up function
when ther kernel is compiled in THUMB mode, however the label itself
is still in .thumb mode. readelf shows:

160979: c020a581   120 FUNC    GLOBAL DEFAULT    2 secondary_startup_arm

Make sure the label is in ARM mode as well.

Signed-off-by: Yingjoe Chen <yingjoe.chen@mediatek.com>
Tested-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/head.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 3637973..58ee8a2 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -346,8 +346,8 @@ __turn_mmu_on_loc:
 
 #if defined(CONFIG_SMP)
 	.text
-ENTRY(secondary_startup_arm)
 	.arm
+ENTRY(secondary_startup_arm)
  THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
  THUMB(	.thumb			)	@ switch to Thumb now.
-- 
cgit v1.1


From 29d2e5631ca25fc45d68373817cd3f2b5ee7d8d0 Mon Sep 17 00:00:00 2001
From: Yingjoe Chen <yingjoe.chen@mediatek.com>
Date: Mon, 18 May 2015 09:06:13 +0100
Subject: ARM: 8360/1: add secondary_startup_arm prototype in header file

Put secondary_startup_arm() prototype in arch/arm/include/asm/smp.h
so users doesn't have to add extern prototype in their code.

Signed-off-by: Yingjoe Chen <yingjoe.chen@mediatek.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/smp.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 18f5a55..f5cffb7 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -69,6 +69,7 @@ struct secondary_data {
 extern struct secondary_data secondary_data;
 extern volatile int pen_release;
 extern void secondary_startup(void);
+extern void secondary_startup_arm(void);
 
 extern int __cpu_disable(void);
 
-- 
cgit v1.1


From 73c430bf9ac6cd3a41ccc3c9904e66cc0a5f9420 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 18 May 2015 16:03:13 +0100
Subject: ARM: 8364/1: fix BE32 module loading

The new veneer support for loadable modules on ARM uses the
__opcode_to_mem_thumb32() function to count R_ARM_THM_CALL
and R_ARM_THM_JUMP24 relocations.

However, this function is not defined for big-endian kernels
on ARMv5 or before, causing a compile-time error:

arch/arm/kernel/module-plts.c: In function 'count_plts':
arch/arm/kernel/module-plts.c:124:9: error: implicit declaration of function '__opcode_to_mem_thumb32' [-Werror=implicit-function-declaration]
         __opcode_to_mem_thumb32(0x07ff2fff)))
         ^

As we know that this part of the function is only needed for
Thumb2 kernels, and that those can never happen with BE32,
we can avoid the error by enclosing the code in an #ifdef.

Fixes: 7d485f647c1 ("ARM: 8220/1: allow modules outside of bl range")

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/module-plts.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index 71a65c4..097e2e2 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -118,11 +118,13 @@ static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num)
 					   __opcode_to_mem_arm(0x00ffffff)))
 				ret++;
 			break;
+#ifdef CONFIG_THUMB2_KERNEL
 		case R_ARM_THM_CALL:
 		case R_ARM_THM_JUMP24:
 			if (!duplicate_rel(base, rel, i,
 					   __opcode_to_mem_thumb32(0x07ff2fff)))
 				ret++;
+#endif
 		}
 	return ret;
 }
-- 
cgit v1.1


From 1e5f0519f4cbf8b8830b88039e16222f186a4ab4 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 18 May 2015 16:29:04 +0100
Subject: ARM: 8365/1: introduce sp804_timer_disable and remove arm_timer.h
 inclusion

The header asm/hardware/arm_timer.h is included in various machine
specific files to access TIMER_CTRL and initialise to a known state.

This patch introduces a new function sp804_timer_disable to disable
the SP804 timers and uses the same for initialising the timers to
known(off) state, thereby removing the dependency on the header
asm/hardware/arm_timer.h

This change is in prepartion to move sp804 timer support out of arch/arm
so that it can be used on ARM64 platforms.

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Olof Johansson <olof@lixom.net>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/common/timer-sp.c               | 5 +++++
 arch/arm/include/asm/hardware/timer-sp.h | 1 +
 arch/arm/mach-integrator/integrator_ap.c | 1 -
 arch/arm/mach-realview/core.c            | 9 ++++-----
 arch/arm/mach-versatile/core.c           | 9 ++++-----
 5 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c
index 1921132..000aea3 100644
--- a/arch/arm/common/timer-sp.c
+++ b/arch/arm/common/timer-sp.c
@@ -71,6 +71,11 @@ static u64 notrace sp804_read(void)
 	return ~readl_relaxed(sched_clock_base + TIMER_VALUE);
 }
 
+void __init sp804_timer_disable(void __iomem *base)
+{
+	writel(0, base + TIMER_CTRL);
+}
+
 void __init __sp804_clocksource_and_sched_clock_init(void __iomem *base,
 						     const char *name,
 						     struct clk *clk,
diff --git a/arch/arm/include/asm/hardware/timer-sp.h b/arch/arm/include/asm/hardware/timer-sp.h
index bb28af7..05eaefa 100644
--- a/arch/arm/include/asm/hardware/timer-sp.h
+++ b/arch/arm/include/asm/hardware/timer-sp.h
@@ -4,6 +4,7 @@ void __sp804_clocksource_and_sched_clock_init(void __iomem *,
 					      const char *, struct clk *, int);
 void __sp804_clockevents_init(void __iomem *, unsigned int,
 			      struct clk *, const char *);
+void sp804_timer_disable(void __iomem *);
 
 static inline void sp804_clocksource_init(void __iomem *base, const char *name)
 {
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 30003ba..5b0e363 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -37,7 +37,6 @@
 #include <linux/stat.h>
 #include <linux/termios.h>
 
-#include <asm/hardware/arm_timer.h>
 #include <asm/setup.h>
 #include <asm/param.h>		/* HZ */
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index c309593..c611f48 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -38,7 +38,6 @@
 #include <mach/hardware.h>
 #include <asm/irq.h>
 #include <asm/mach-types.h>
-#include <asm/hardware/arm_timer.h>
 #include <asm/hardware/icst.h>
 
 #include <asm/mach/arch.h>
@@ -381,10 +380,10 @@ void __init realview_timer_init(unsigned int timer_irq)
 	/*
 	 * Initialise to a known state (all timers off)
 	 */
-	writel(0, timer0_va_base + TIMER_CTRL);
-	writel(0, timer1_va_base + TIMER_CTRL);
-	writel(0, timer2_va_base + TIMER_CTRL);
-	writel(0, timer3_va_base + TIMER_CTRL);
+	sp804_timer_disable(timer0_va_base);
+	sp804_timer_disable(timer1_va_base);
+	sp804_timer_disable(timer2_va_base);
+	sp804_timer_disable(timer3_va_base);
 
 	sp804_clocksource_init(timer3_va_base, "timer3");
 	sp804_clockevents_init(timer0_va_base, timer_irq, "timer0");
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 6ea09fe..f98c196 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -42,7 +42,6 @@
 #include <linux/reboot.h>
 
 #include <asm/irq.h>
-#include <asm/hardware/arm_timer.h>
 #include <asm/hardware/icst.h>
 #include <asm/mach-types.h>
 
@@ -798,10 +797,10 @@ void __init versatile_timer_init(void)
 	/*
 	 * Initialise to a known state (all timers off)
 	 */
-	writel(0, TIMER0_VA_BASE + TIMER_CTRL);
-	writel(0, TIMER1_VA_BASE + TIMER_CTRL);
-	writel(0, TIMER2_VA_BASE + TIMER_CTRL);
-	writel(0, TIMER3_VA_BASE + TIMER_CTRL);
+	sp804_timer_disable(TIMER0_VA_BASE);
+	sp804_timer_disable(TIMER1_VA_BASE);
+	sp804_timer_disable(TIMER2_VA_BASE);
+	sp804_timer_disable(TIMER3_VA_BASE);
 
 	sp804_clocksource_init(TIMER3_VA_BASE, "timer3");
 	sp804_clockevents_init(TIMER0_VA_BASE, IRQ_TIMERINT0_1, "timer0");
-- 
cgit v1.1


From 0b7402dce445ba0d11401c2cb806e8fc260c9e49 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 18 May 2015 16:29:40 +0100
Subject: ARM: 8366/1: move Dual-Timer SP804 driver to drivers/clocksource

The ARM Dual-Timer SP804 module is peripheral found not only on ARM32
platforms but also on ARM64 platforms.

This patch moves the driver out of arch/arm to driver/clocksource
so that it can be used on ARM64 platforms also.

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Olof Johansson <olof@lixom.net>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig                          |   5 -
 arch/arm/common/Makefile                  |   1 -
 arch/arm/common/timer-sp.c                | 309 -----------------------------
 arch/arm/include/asm/hardware/arm_timer.h |  35 ----
 arch/arm/include/asm/hardware/timer-sp.h  |  24 ---
 arch/arm/mach-nspire/nspire.c             |   2 -
 arch/arm/mach-realview/core.c             |   4 +-
 arch/arm/mach-versatile/core.c            |   3 +-
 drivers/clocksource/Kconfig               |   5 +
 drivers/clocksource/Makefile              |   1 +
 drivers/clocksource/timer-integrator-ap.c |   3 +-
 drivers/clocksource/timer-sp.h            |  30 +++
 drivers/clocksource/timer-sp804.c         | 310 ++++++++++++++++++++++++++++++
 include/clocksource/timer-sp804.h         |  28 +++
 14 files changed, 380 insertions(+), 380 deletions(-)
 delete mode 100644 arch/arm/common/timer-sp.c
 delete mode 100644 arch/arm/include/asm/hardware/arm_timer.h
 delete mode 100644 arch/arm/include/asm/hardware/timer-sp.h
 create mode 100644 drivers/clocksource/timer-sp.h
 create mode 100644 drivers/clocksource/timer-sp804.c
 create mode 100644 include/clocksource/timer-sp804.h

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d0950ce..34b7285 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -975,11 +975,6 @@ config PLAT_PXA
 config PLAT_VERSATILE
 	bool
 
-config ARM_TIMER_SP804
-	bool
-	select CLKSRC_MMIO
-	select CLKSRC_OF if OF
-
 source "arch/arm/firmware/Kconfig"
 
 source arch/arm/mm/Kconfig
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 70b1eff..6ee5959 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_SHARP_LOCOMO)	+= locomo.o
 obj-$(CONFIG_SHARP_PARAM)	+= sharpsl_param.o
 obj-$(CONFIG_SHARP_SCOOP)	+= scoop.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
-obj-$(CONFIG_ARM_TIMER_SP804)	+= timer-sp.o
 obj-$(CONFIG_MCPM)		+= mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
 CFLAGS_REMOVE_mcpm_entry.o	= -pg
 AFLAGS_mcpm_head.o		:= -march=armv7-a
diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c
deleted file mode 100644
index 000aea3..0000000
--- a/arch/arm/common/timer-sp.c
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- *  linux/arch/arm/common/timer-sp.c
- *
- *  Copyright (C) 1999 - 2003 ARM Limited
- *  Copyright (C) 2000 Deep Blue Solutions Ltd
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#include <linux/clk.h>
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-#include <linux/err.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/sched_clock.h>
-
-#include <asm/hardware/arm_timer.h>
-#include <asm/hardware/timer-sp.h>
-
-static long __init sp804_get_clock_rate(struct clk *clk)
-{
-	long rate;
-	int err;
-
-	err = clk_prepare(clk);
-	if (err) {
-		pr_err("sp804: clock failed to prepare: %d\n", err);
-		clk_put(clk);
-		return err;
-	}
-
-	err = clk_enable(clk);
-	if (err) {
-		pr_err("sp804: clock failed to enable: %d\n", err);
-		clk_unprepare(clk);
-		clk_put(clk);
-		return err;
-	}
-
-	rate = clk_get_rate(clk);
-	if (rate < 0) {
-		pr_err("sp804: clock failed to get rate: %ld\n", rate);
-		clk_disable(clk);
-		clk_unprepare(clk);
-		clk_put(clk);
-	}
-
-	return rate;
-}
-
-static void __iomem *sched_clock_base;
-
-static u64 notrace sp804_read(void)
-{
-	return ~readl_relaxed(sched_clock_base + TIMER_VALUE);
-}
-
-void __init sp804_timer_disable(void __iomem *base)
-{
-	writel(0, base + TIMER_CTRL);
-}
-
-void __init __sp804_clocksource_and_sched_clock_init(void __iomem *base,
-						     const char *name,
-						     struct clk *clk,
-						     int use_sched_clock)
-{
-	long rate;
-
-	if (!clk) {
-		clk = clk_get_sys("sp804", name);
-		if (IS_ERR(clk)) {
-			pr_err("sp804: clock not found: %d\n",
-			       (int)PTR_ERR(clk));
-			return;
-		}
-	}
-
-	rate = sp804_get_clock_rate(clk);
-
-	if (rate < 0)
-		return;
-
-	/* setup timer 0 as free-running clocksource */
-	writel(0, base + TIMER_CTRL);
-	writel(0xffffffff, base + TIMER_LOAD);
-	writel(0xffffffff, base + TIMER_VALUE);
-	writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
-		base + TIMER_CTRL);
-
-	clocksource_mmio_init(base + TIMER_VALUE, name,
-		rate, 200, 32, clocksource_mmio_readl_down);
-
-	if (use_sched_clock) {
-		sched_clock_base = base;
-		sched_clock_register(sp804_read, 32, rate);
-	}
-}
-
-
-static void __iomem *clkevt_base;
-static unsigned long clkevt_reload;
-
-/*
- * IRQ handler for the timer
- */
-static irqreturn_t sp804_timer_interrupt(int irq, void *dev_id)
-{
-	struct clock_event_device *evt = dev_id;
-
-	/* clear the interrupt */
-	writel(1, clkevt_base + TIMER_INTCLR);
-
-	evt->event_handler(evt);
-
-	return IRQ_HANDLED;
-}
-
-static void sp804_set_mode(enum clock_event_mode mode,
-	struct clock_event_device *evt)
-{
-	unsigned long ctrl = TIMER_CTRL_32BIT | TIMER_CTRL_IE;
-
-	writel(ctrl, clkevt_base + TIMER_CTRL);
-
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-		writel(clkevt_reload, clkevt_base + TIMER_LOAD);
-		ctrl |= TIMER_CTRL_PERIODIC | TIMER_CTRL_ENABLE;
-		break;
-
-	case CLOCK_EVT_MODE_ONESHOT:
-		/* period set, and timer enabled in 'next_event' hook */
-		ctrl |= TIMER_CTRL_ONESHOT;
-		break;
-
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-	default:
-		break;
-	}
-
-	writel(ctrl, clkevt_base + TIMER_CTRL);
-}
-
-static int sp804_set_next_event(unsigned long next,
-	struct clock_event_device *evt)
-{
-	unsigned long ctrl = readl(clkevt_base + TIMER_CTRL);
-
-	writel(next, clkevt_base + TIMER_LOAD);
-	writel(ctrl | TIMER_CTRL_ENABLE, clkevt_base + TIMER_CTRL);
-
-	return 0;
-}
-
-static struct clock_event_device sp804_clockevent = {
-	.features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
-		CLOCK_EVT_FEAT_DYNIRQ,
-	.set_mode	= sp804_set_mode,
-	.set_next_event	= sp804_set_next_event,
-	.rating		= 300,
-};
-
-static struct irqaction sp804_timer_irq = {
-	.name		= "timer",
-	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
-	.handler	= sp804_timer_interrupt,
-	.dev_id		= &sp804_clockevent,
-};
-
-void __init __sp804_clockevents_init(void __iomem *base, unsigned int irq, struct clk *clk, const char *name)
-{
-	struct clock_event_device *evt = &sp804_clockevent;
-	long rate;
-
-	if (!clk)
-		clk = clk_get_sys("sp804", name);
-	if (IS_ERR(clk)) {
-		pr_err("sp804: %s clock not found: %d\n", name,
-			(int)PTR_ERR(clk));
-		return;
-	}
-
-	rate = sp804_get_clock_rate(clk);
-	if (rate < 0)
-		return;
-
-	clkevt_base = base;
-	clkevt_reload = DIV_ROUND_CLOSEST(rate, HZ);
-	evt->name = name;
-	evt->irq = irq;
-	evt->cpumask = cpu_possible_mask;
-
-	writel(0, base + TIMER_CTRL);
-
-	setup_irq(irq, &sp804_timer_irq);
-	clockevents_config_and_register(evt, rate, 0xf, 0xffffffff);
-}
-
-static void __init sp804_of_init(struct device_node *np)
-{
-	static bool initialized = false;
-	void __iomem *base;
-	int irq;
-	u32 irq_num = 0;
-	struct clk *clk1, *clk2;
-	const char *name = of_get_property(np, "compatible", NULL);
-
-	base = of_iomap(np, 0);
-	if (WARN_ON(!base))
-		return;
-
-	/* Ensure timers are disabled */
-	writel(0, base + TIMER_CTRL);
-	writel(0, base + TIMER_2_BASE + TIMER_CTRL);
-
-	if (initialized || !of_device_is_available(np))
-		goto err;
-
-	clk1 = of_clk_get(np, 0);
-	if (IS_ERR(clk1))
-		clk1 = NULL;
-
-	/* Get the 2nd clock if the timer has 3 timer clocks */
-	if (of_count_phandle_with_args(np, "clocks", "#clock-cells") == 3) {
-		clk2 = of_clk_get(np, 1);
-		if (IS_ERR(clk2)) {
-			pr_err("sp804: %s clock not found: %d\n", np->name,
-				(int)PTR_ERR(clk2));
-			clk2 = NULL;
-		}
-	} else
-		clk2 = clk1;
-
-	irq = irq_of_parse_and_map(np, 0);
-	if (irq <= 0)
-		goto err;
-
-	of_property_read_u32(np, "arm,sp804-has-irq", &irq_num);
-	if (irq_num == 2) {
-		__sp804_clockevents_init(base + TIMER_2_BASE, irq, clk2, name);
-		__sp804_clocksource_and_sched_clock_init(base, name, clk1, 1);
-	} else {
-		__sp804_clockevents_init(base, irq, clk1 , name);
-		__sp804_clocksource_and_sched_clock_init(base + TIMER_2_BASE,
-							 name, clk2, 1);
-	}
-	initialized = true;
-
-	return;
-err:
-	iounmap(base);
-}
-CLOCKSOURCE_OF_DECLARE(sp804, "arm,sp804", sp804_of_init);
-
-static void __init integrator_cp_of_init(struct device_node *np)
-{
-	static int init_count = 0;
-	void __iomem *base;
-	int irq;
-	const char *name = of_get_property(np, "compatible", NULL);
-	struct clk *clk;
-
-	base = of_iomap(np, 0);
-	if (WARN_ON(!base))
-		return;
-	clk = of_clk_get(np, 0);
-	if (WARN_ON(IS_ERR(clk)))
-		return;
-
-	/* Ensure timer is disabled */
-	writel(0, base + TIMER_CTRL);
-
-	if (init_count == 2 || !of_device_is_available(np))
-		goto err;
-
-	if (!init_count)
-		__sp804_clocksource_and_sched_clock_init(base, name, clk, 0);
-	else {
-		irq = irq_of_parse_and_map(np, 0);
-		if (irq <= 0)
-			goto err;
-
-		__sp804_clockevents_init(base, irq, clk, name);
-	}
-
-	init_count++;
-	return;
-err:
-	iounmap(base);
-}
-CLOCKSOURCE_OF_DECLARE(intcp, "arm,integrator-cp-timer", integrator_cp_of_init);
diff --git a/arch/arm/include/asm/hardware/arm_timer.h b/arch/arm/include/asm/hardware/arm_timer.h
deleted file mode 100644
index d6030ff..0000000
--- a/arch/arm/include/asm/hardware/arm_timer.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef __ASM_ARM_HARDWARE_ARM_TIMER_H
-#define __ASM_ARM_HARDWARE_ARM_TIMER_H
-
-/*
- * ARM timer implementation, found in Integrator, Versatile and Realview
- * platforms.  Not all platforms support all registers and bits in these
- * registers, so we mark them with A for Integrator AP, C for Integrator
- * CP, V for Versatile and R for Realview.
- *
- * Integrator AP has 16-bit timers, Integrator CP, Versatile and Realview
- * can have 16-bit or 32-bit selectable via a bit in the control register.
- *
- * Every SP804 contains two identical timers.
- */
-#define TIMER_1_BASE	0x00
-#define TIMER_2_BASE	0x20
-
-#define TIMER_LOAD	0x00			/* ACVR rw */
-#define TIMER_VALUE	0x04			/* ACVR ro */
-#define TIMER_CTRL	0x08			/* ACVR rw */
-#define TIMER_CTRL_ONESHOT	(1 << 0)	/*  CVR */
-#define TIMER_CTRL_32BIT	(1 << 1)	/*  CVR */
-#define TIMER_CTRL_DIV1		(0 << 2)	/* ACVR */
-#define TIMER_CTRL_DIV16	(1 << 2)	/* ACVR */
-#define TIMER_CTRL_DIV256	(2 << 2)	/* ACVR */
-#define TIMER_CTRL_IE		(1 << 5)	/*   VR */
-#define TIMER_CTRL_PERIODIC	(1 << 6)	/* ACVR */
-#define TIMER_CTRL_ENABLE	(1 << 7)	/* ACVR */
-
-#define TIMER_INTCLR	0x0c			/* ACVR wo */
-#define TIMER_RIS	0x10			/*  CVR ro */
-#define TIMER_MIS	0x14			/*  CVR ro */
-#define TIMER_BGLOAD	0x18			/*  CVR rw */
-
-#endif
diff --git a/arch/arm/include/asm/hardware/timer-sp.h b/arch/arm/include/asm/hardware/timer-sp.h
deleted file mode 100644
index 05eaefa..0000000
--- a/arch/arm/include/asm/hardware/timer-sp.h
+++ /dev/null
@@ -1,24 +0,0 @@
-struct clk;
-
-void __sp804_clocksource_and_sched_clock_init(void __iomem *,
-					      const char *, struct clk *, int);
-void __sp804_clockevents_init(void __iomem *, unsigned int,
-			      struct clk *, const char *);
-void sp804_timer_disable(void __iomem *);
-
-static inline void sp804_clocksource_init(void __iomem *base, const char *name)
-{
-	__sp804_clocksource_and_sched_clock_init(base, name, NULL, 0);
-}
-
-static inline void sp804_clocksource_and_sched_clock_init(void __iomem *base,
-							  const char *name)
-{
-	__sp804_clocksource_and_sched_clock_init(base, name, NULL, 1);
-}
-
-static inline void sp804_clockevents_init(void __iomem *base, unsigned int irq, const char *name)
-{
-	__sp804_clockevents_init(base, irq, NULL, name);
-
-}
diff --git a/arch/arm/mach-nspire/nspire.c b/arch/arm/mach-nspire/nspire.c
index 3445a56..34c2a1b3 100644
--- a/arch/arm/mach-nspire/nspire.c
+++ b/arch/arm/mach-nspire/nspire.c
@@ -22,8 +22,6 @@
 #include <asm/mach-types.h>
 #include <asm/mach/map.h>
 
-#include <asm/hardware/timer-sp.h>
-
 #include "mmio.h"
 #include "clcd.h"
 
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index c611f48..44575ed 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -35,6 +35,8 @@
 #include <linux/mtd/physmap.h>
 #include <linux/memblock.h>
 
+#include <clocksource/timer-sp804.h>
+
 #include <mach/hardware.h>
 #include <asm/irq.h>
 #include <asm/mach-types.h>
@@ -44,10 +46,8 @@
 #include <asm/mach/irq.h>
 #include <asm/mach/map.h>
 
-
 #include <mach/platform.h>
 #include <mach/irqs.h>
-#include <asm/hardware/timer-sp.h>
 
 #include <plat/sched_clock.h>
 
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index f98c196..23a04fe 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -41,6 +41,8 @@
 #include <linux/bitops.h>
 #include <linux/reboot.h>
 
+#include <clocksource/timer-sp804.h>
+
 #include <asm/irq.h>
 #include <asm/hardware/icst.h>
 #include <asm/mach-types.h>
@@ -51,7 +53,6 @@
 #include <asm/mach/map.h>
 #include <mach/hardware.h>
 #include <mach/platform.h>
-#include <asm/hardware/timer-sp.h>
 
 #include <plat/sched_clock.h>
 
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 51d7865f..0f1c0e7 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -132,6 +132,11 @@ config ARM_GLOBAL_TIMER
 	help
 	  This options enables support for the ARM global timer unit
 
+config ARM_TIMER_SP804
+	bool "Support for Dual Timer SP804 module"
+	select CLKSRC_MMIO
+	select CLKSRC_OF if OF
+
 config CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
 	bool
 	depends on ARM_GLOBAL_TIMER
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 5b85f6a..5ca59f9 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_MTK_TIMER)		+= mtk_timer.o
 
 obj-$(CONFIG_ARM_ARCH_TIMER)		+= arm_arch_timer.o
 obj-$(CONFIG_ARM_GLOBAL_TIMER)		+= arm_global_timer.o
+obj-$(CONFIG_ARM_TIMER_SP804)		+= timer-sp804.o
 obj-$(CONFIG_CLKSRC_METAG_GENERIC)	+= metag_generic.o
 obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST)	+= dummy_timer.o
 obj-$(CONFIG_ARCH_KEYSTONE)		+= timer-keystone.o
diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c
index b9efd30..d7d21e4 100644
--- a/drivers/clocksource/timer-integrator-ap.c
+++ b/drivers/clocksource/timer-integrator-ap.c
@@ -26,7 +26,8 @@
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/sched_clock.h>
-#include <asm/hardware/arm_timer.h>
+
+#include "timer-sp.h"
 
 static void __iomem * sched_clk_base;
 
diff --git a/drivers/clocksource/timer-sp.h b/drivers/clocksource/timer-sp.h
new file mode 100644
index 0000000..050d885
--- /dev/null
+++ b/drivers/clocksource/timer-sp.h
@@ -0,0 +1,30 @@
+/*
+ * ARM timer implementation, found in Integrator, Versatile and Realview
+ * platforms.  Not all platforms support all registers and bits in these
+ * registers, so we mark them with A for Integrator AP, C for Integrator
+ * CP, V for Versatile and R for Realview.
+ *
+ * Integrator AP has 16-bit timers, Integrator CP, Versatile and Realview
+ * can have 16-bit or 32-bit selectable via a bit in the control register.
+ *
+ * Every SP804 contains two identical timers.
+ */
+#define TIMER_1_BASE	0x00
+#define TIMER_2_BASE	0x20
+
+#define TIMER_LOAD	0x00			/* ACVR rw */
+#define TIMER_VALUE	0x04			/* ACVR ro */
+#define TIMER_CTRL	0x08			/* ACVR rw */
+#define TIMER_CTRL_ONESHOT	(1 << 0)	/*  CVR */
+#define TIMER_CTRL_32BIT	(1 << 1)	/*  CVR */
+#define TIMER_CTRL_DIV1		(0 << 2)	/* ACVR */
+#define TIMER_CTRL_DIV16	(1 << 2)	/* ACVR */
+#define TIMER_CTRL_DIV256	(2 << 2)	/* ACVR */
+#define TIMER_CTRL_IE		(1 << 5)	/*   VR */
+#define TIMER_CTRL_PERIODIC	(1 << 6)	/* ACVR */
+#define TIMER_CTRL_ENABLE	(1 << 7)	/* ACVR */
+
+#define TIMER_INTCLR	0x0c			/* ACVR wo */
+#define TIMER_RIS	0x10			/*  CVR ro */
+#define TIMER_MIS	0x14			/*  CVR ro */
+#define TIMER_BGLOAD	0x18			/*  CVR rw */
diff --git a/drivers/clocksource/timer-sp804.c b/drivers/clocksource/timer-sp804.c
new file mode 100644
index 0000000..ca02503
--- /dev/null
+++ b/drivers/clocksource/timer-sp804.c
@@ -0,0 +1,310 @@
+/*
+ *  linux/drivers/clocksource/timer-sp.c
+ *
+ *  Copyright (C) 1999 - 2003 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/clk.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/sched_clock.h>
+
+#include <clocksource/timer-sp804.h>
+
+#include "timer-sp.h"
+
+static long __init sp804_get_clock_rate(struct clk *clk)
+{
+	long rate;
+	int err;
+
+	err = clk_prepare(clk);
+	if (err) {
+		pr_err("sp804: clock failed to prepare: %d\n", err);
+		clk_put(clk);
+		return err;
+	}
+
+	err = clk_enable(clk);
+	if (err) {
+		pr_err("sp804: clock failed to enable: %d\n", err);
+		clk_unprepare(clk);
+		clk_put(clk);
+		return err;
+	}
+
+	rate = clk_get_rate(clk);
+	if (rate < 0) {
+		pr_err("sp804: clock failed to get rate: %ld\n", rate);
+		clk_disable(clk);
+		clk_unprepare(clk);
+		clk_put(clk);
+	}
+
+	return rate;
+}
+
+static void __iomem *sched_clock_base;
+
+static u64 notrace sp804_read(void)
+{
+	return ~readl_relaxed(sched_clock_base + TIMER_VALUE);
+}
+
+void __init sp804_timer_disable(void __iomem *base)
+{
+	writel(0, base + TIMER_CTRL);
+}
+
+void __init __sp804_clocksource_and_sched_clock_init(void __iomem *base,
+						     const char *name,
+						     struct clk *clk,
+						     int use_sched_clock)
+{
+	long rate;
+
+	if (!clk) {
+		clk = clk_get_sys("sp804", name);
+		if (IS_ERR(clk)) {
+			pr_err("sp804: clock not found: %d\n",
+			       (int)PTR_ERR(clk));
+			return;
+		}
+	}
+
+	rate = sp804_get_clock_rate(clk);
+
+	if (rate < 0)
+		return;
+
+	/* setup timer 0 as free-running clocksource */
+	writel(0, base + TIMER_CTRL);
+	writel(0xffffffff, base + TIMER_LOAD);
+	writel(0xffffffff, base + TIMER_VALUE);
+	writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
+		base + TIMER_CTRL);
+
+	clocksource_mmio_init(base + TIMER_VALUE, name,
+		rate, 200, 32, clocksource_mmio_readl_down);
+
+	if (use_sched_clock) {
+		sched_clock_base = base;
+		sched_clock_register(sp804_read, 32, rate);
+	}
+}
+
+
+static void __iomem *clkevt_base;
+static unsigned long clkevt_reload;
+
+/*
+ * IRQ handler for the timer
+ */
+static irqreturn_t sp804_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = dev_id;
+
+	/* clear the interrupt */
+	writel(1, clkevt_base + TIMER_INTCLR);
+
+	evt->event_handler(evt);
+
+	return IRQ_HANDLED;
+}
+
+static void sp804_set_mode(enum clock_event_mode mode,
+	struct clock_event_device *evt)
+{
+	unsigned long ctrl = TIMER_CTRL_32BIT | TIMER_CTRL_IE;
+
+	writel(ctrl, clkevt_base + TIMER_CTRL);
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		writel(clkevt_reload, clkevt_base + TIMER_LOAD);
+		ctrl |= TIMER_CTRL_PERIODIC | TIMER_CTRL_ENABLE;
+		break;
+
+	case CLOCK_EVT_MODE_ONESHOT:
+		/* period set, and timer enabled in 'next_event' hook */
+		ctrl |= TIMER_CTRL_ONESHOT;
+		break;
+
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		break;
+	}
+
+	writel(ctrl, clkevt_base + TIMER_CTRL);
+}
+
+static int sp804_set_next_event(unsigned long next,
+	struct clock_event_device *evt)
+{
+	unsigned long ctrl = readl(clkevt_base + TIMER_CTRL);
+
+	writel(next, clkevt_base + TIMER_LOAD);
+	writel(ctrl | TIMER_CTRL_ENABLE, clkevt_base + TIMER_CTRL);
+
+	return 0;
+}
+
+static struct clock_event_device sp804_clockevent = {
+	.features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
+		CLOCK_EVT_FEAT_DYNIRQ,
+	.set_mode	= sp804_set_mode,
+	.set_next_event	= sp804_set_next_event,
+	.rating		= 300,
+};
+
+static struct irqaction sp804_timer_irq = {
+	.name		= "timer",
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
+	.handler	= sp804_timer_interrupt,
+	.dev_id		= &sp804_clockevent,
+};
+
+void __init __sp804_clockevents_init(void __iomem *base, unsigned int irq, struct clk *clk, const char *name)
+{
+	struct clock_event_device *evt = &sp804_clockevent;
+	long rate;
+
+	if (!clk)
+		clk = clk_get_sys("sp804", name);
+	if (IS_ERR(clk)) {
+		pr_err("sp804: %s clock not found: %d\n", name,
+			(int)PTR_ERR(clk));
+		return;
+	}
+
+	rate = sp804_get_clock_rate(clk);
+	if (rate < 0)
+		return;
+
+	clkevt_base = base;
+	clkevt_reload = DIV_ROUND_CLOSEST(rate, HZ);
+	evt->name = name;
+	evt->irq = irq;
+	evt->cpumask = cpu_possible_mask;
+
+	writel(0, base + TIMER_CTRL);
+
+	setup_irq(irq, &sp804_timer_irq);
+	clockevents_config_and_register(evt, rate, 0xf, 0xffffffff);
+}
+
+static void __init sp804_of_init(struct device_node *np)
+{
+	static bool initialized = false;
+	void __iomem *base;
+	int irq;
+	u32 irq_num = 0;
+	struct clk *clk1, *clk2;
+	const char *name = of_get_property(np, "compatible", NULL);
+
+	base = of_iomap(np, 0);
+	if (WARN_ON(!base))
+		return;
+
+	/* Ensure timers are disabled */
+	writel(0, base + TIMER_CTRL);
+	writel(0, base + TIMER_2_BASE + TIMER_CTRL);
+
+	if (initialized || !of_device_is_available(np))
+		goto err;
+
+	clk1 = of_clk_get(np, 0);
+	if (IS_ERR(clk1))
+		clk1 = NULL;
+
+	/* Get the 2nd clock if the timer has 3 timer clocks */
+	if (of_count_phandle_with_args(np, "clocks", "#clock-cells") == 3) {
+		clk2 = of_clk_get(np, 1);
+		if (IS_ERR(clk2)) {
+			pr_err("sp804: %s clock not found: %d\n", np->name,
+				(int)PTR_ERR(clk2));
+			clk2 = NULL;
+		}
+	} else
+		clk2 = clk1;
+
+	irq = irq_of_parse_and_map(np, 0);
+	if (irq <= 0)
+		goto err;
+
+	of_property_read_u32(np, "arm,sp804-has-irq", &irq_num);
+	if (irq_num == 2) {
+		__sp804_clockevents_init(base + TIMER_2_BASE, irq, clk2, name);
+		__sp804_clocksource_and_sched_clock_init(base, name, clk1, 1);
+	} else {
+		__sp804_clockevents_init(base, irq, clk1 , name);
+		__sp804_clocksource_and_sched_clock_init(base + TIMER_2_BASE,
+							 name, clk2, 1);
+	}
+	initialized = true;
+
+	return;
+err:
+	iounmap(base);
+}
+CLOCKSOURCE_OF_DECLARE(sp804, "arm,sp804", sp804_of_init);
+
+static void __init integrator_cp_of_init(struct device_node *np)
+{
+	static int init_count = 0;
+	void __iomem *base;
+	int irq;
+	const char *name = of_get_property(np, "compatible", NULL);
+	struct clk *clk;
+
+	base = of_iomap(np, 0);
+	if (WARN_ON(!base))
+		return;
+	clk = of_clk_get(np, 0);
+	if (WARN_ON(IS_ERR(clk)))
+		return;
+
+	/* Ensure timer is disabled */
+	writel(0, base + TIMER_CTRL);
+
+	if (init_count == 2 || !of_device_is_available(np))
+		goto err;
+
+	if (!init_count)
+		__sp804_clocksource_and_sched_clock_init(base, name, clk, 0);
+	else {
+		irq = irq_of_parse_and_map(np, 0);
+		if (irq <= 0)
+			goto err;
+
+		__sp804_clockevents_init(base, irq, clk, name);
+	}
+
+	init_count++;
+	return;
+err:
+	iounmap(base);
+}
+CLOCKSOURCE_OF_DECLARE(intcp, "arm,integrator-cp-timer", integrator_cp_of_init);
diff --git a/include/clocksource/timer-sp804.h b/include/clocksource/timer-sp804.h
new file mode 100644
index 0000000..1f8a1ca
--- /dev/null
+++ b/include/clocksource/timer-sp804.h
@@ -0,0 +1,28 @@
+#ifndef __CLKSOURCE_TIMER_SP804_H
+#define __CLKSOURCE_TIMER_SP804_H
+
+struct clk;
+
+void __sp804_clocksource_and_sched_clock_init(void __iomem *,
+					      const char *, struct clk *, int);
+void __sp804_clockevents_init(void __iomem *, unsigned int,
+			      struct clk *, const char *);
+void sp804_timer_disable(void __iomem *);
+
+static inline void sp804_clocksource_init(void __iomem *base, const char *name)
+{
+	__sp804_clocksource_and_sched_clock_init(base, name, NULL, 0);
+}
+
+static inline void sp804_clocksource_and_sched_clock_init(void __iomem *base,
+							  const char *name)
+{
+	__sp804_clocksource_and_sched_clock_init(base, name, NULL, 1);
+}
+
+static inline void sp804_clockevents_init(void __iomem *base, unsigned int irq, const char *name)
+{
+	__sp804_clockevents_init(base, irq, NULL, name);
+
+}
+#endif
-- 
cgit v1.1


From e44d89f14227ffa5e96cd158fb2d35fbb40fa97a Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Fri, 29 May 2015 12:27:46 +0100
Subject: ARM: 8382/1: clocksource: make ARM_TIMER_SP804 depend on
 GENERIC_SCHED_CLOCK

Commit 5261ef2ea836 ("ARM: 8366/1: move Dual-Timer SP804 driver to
drivers/clocksource") moved SP804 to drivers/clocksource resulting in
it being selectable on platforms/architectures without the config
GENERIC_SCHED_CLOCK enabled. Due to that, it results in the following
build failure(e.g. x86_64 allmodconfig)

drivers/built-in.o: In function `__sp804_clocksource_and_sched_clock_init':
(.init.text+0x1a0e7): undefined reference to `sched_clock_register'

This patch fixes the build by making ARM_TIMER_SP804 depend on
GENERIC_SCHED_CLOCK

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/clocksource/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 0f1c0e7..5053470 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -134,6 +134,7 @@ config ARM_GLOBAL_TIMER
 
 config ARM_TIMER_SP804
 	bool "Support for Dual Timer SP804 module"
+	depends on GENERIC_SCHED_CLOCK
 	select CLKSRC_MMIO
 	select CLKSRC_OF if OF
 
-- 
cgit v1.1


From 45b0fa09c6e47c02ac0a08bd2b3c6a73480222e0 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Wed, 20 May 2015 00:16:46 +0100
Subject: ARM: 8369/1: ARMv7M: define size of vector table for Vybrid

Vybrids has 112 peripheral interrupts which can be routed to the
Cortex-M4's NVIC interrupt controller.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 6173aa3..54cc211f 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -609,6 +609,7 @@ config CPU_V7M_NUM_IRQ
 	depends on CPU_V7M
 	default 90 if ARCH_STM32
 	default 38 if ARCH_EFM32
+	default 112 if SOC_VF610
 	default 240
 	help
 	  This option indicates the number of interrupts connected to the NVIC.
-- 
cgit v1.1


From 31cd08c3a1db4b3164567a2a424b5e5dba6ce7a3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 19 May 2015 13:39:05 +0100
Subject: ARM: remove __bad_xchg definition

We want link errors if xchg() is called for a variable size we do not
support.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cmpxchg.h | 1 +
 arch/arm/kernel/traps.c        | 8 --------
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index abb2c37..f4d74ab 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -94,6 +94,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		break;
 #endif
 	default:
+		/* Cause a link-time error, the xchg() size is not supported */
 		__bad_xchg(ptr, size), ret = 0;
 		break;
 	}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3dce1a3..d358226 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -749,14 +749,6 @@ late_initcall(arm_mrc_hook_init);
 
 #endif
 
-void __bad_xchg(volatile void *ptr, int size)
-{
-	pr_err("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
-	       __builtin_return_address(0), ptr, size);
-	BUG();
-}
-EXPORT_SYMBOL(__bad_xchg);
-
 /*
  * A data abort trap was taken, but we did not handle the instruction.
  * Try to abort the user program, or panic if it was the kernel.
-- 
cgit v1.1


From e001bbae7147b111fe1aa42beaf835635f3c016e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 26 May 2015 15:41:41 +0100
Subject: ARM: cmpxchg: avoid warnings from macro-ized cmpxchg()
 implementations

A recent change in kernel/acct.c added a new warning for many
configurations on ARM:

kernel/acct.c: In function 'acct_pin_kill':
arch/arm/include/asm/cmpxchg.h:122:3: warning: value computed is not used [-Wunused-value]

The code is in fact correct, it's just a cmpxchg() call that
intentionally ignores the result, and no other code does that.  The
warning does not show up on x86 because of the way that its cmpxchg()
macro is written. This changes the ARM implementation to use a similar
construct with a compound expression instead of a typecast, which causes
the compiler to not complain about an unused result.

Fix the other macros in this file in a similar way, and place them
just below their function implementations.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cmpxchg.h | 66 +++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index f4d74ab..1692a05 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -103,8 +103,10 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	return ret;
 }
 
-#define xchg(ptr,x) \
-	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+#define xchg(ptr, x) ({							\
+	(__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr),		\
+				   sizeof(*(ptr)));			\
+})
 
 #include <asm-generic/cmpxchg-local.h>
 
@@ -119,14 +121,16 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
  * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
  * them available.
  */
-#define cmpxchg_local(ptr, o, n)				  	       \
-	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
-			(unsigned long)(n), sizeof(*(ptr))))
+#define cmpxchg_local(ptr, o, n) ({					\
+	(__typeof(*ptr))__cmpxchg_local_generic((ptr),			\
+					        (unsigned long)(o),	\
+					        (unsigned long)(n),	\
+					        sizeof(*(ptr)));	\
+})
+
 #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
 
-#ifndef CONFIG_SMP
 #include <asm-generic/cmpxchg.h>
-#endif
 
 #else	/* min ARCH >= ARMv6 */
 
@@ -202,11 +206,12 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 	return ret;
 }
 
-#define cmpxchg(ptr,o,n)						\
-	((__typeof__(*(ptr)))__cmpxchg_mb((ptr),			\
-					  (unsigned long)(o),		\
-					  (unsigned long)(n),		\
-					  sizeof(*(ptr))))
+#define cmpxchg(ptr,o,n) ({						\
+	(__typeof__(*(ptr)))__cmpxchg_mb((ptr),				\
+					 (unsigned long)(o),		\
+					 (unsigned long)(n),		\
+					 sizeof(*(ptr)));		\
+})
 
 static inline unsigned long __cmpxchg_local(volatile void *ptr,
 					    unsigned long old,
@@ -228,6 +233,13 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 	return ret;
 }
 
+#define cmpxchg_local(ptr, o, n) ({					\
+	(__typeof(*ptr))__cmpxchg_local((ptr),				\
+				        (unsigned long)(o),		\
+				        (unsigned long)(n),		\
+				        sizeof(*(ptr)));		\
+})
+
 static inline unsigned long long __cmpxchg64(unsigned long long *ptr,
 					     unsigned long long old,
 					     unsigned long long new)
@@ -253,6 +265,14 @@ static inline unsigned long long __cmpxchg64(unsigned long long *ptr,
 	return oldval;
 }
 
+#define cmpxchg64_relaxed(ptr, o, n) ({					\
+	(__typeof__(*(ptr)))__cmpxchg64((ptr),				\
+					(unsigned long long)(o),	\
+					(unsigned long long)(n));	\
+})
+
+#define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n))
+
 static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr,
 						unsigned long long old,
 						unsigned long long new)
@@ -266,23 +286,11 @@ static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr,
 	return ret;
 }
 
-#define cmpxchg_local(ptr,o,n)						\
-	((__typeof__(*(ptr)))__cmpxchg_local((ptr),			\
-				       (unsigned long)(o),		\
-				       (unsigned long)(n),		\
-				       sizeof(*(ptr))))
-
-#define cmpxchg64(ptr, o, n)						\
-	((__typeof__(*(ptr)))__cmpxchg64_mb((ptr),			\
-					(unsigned long long)(o),	\
-					(unsigned long long)(n)))
-
-#define cmpxchg64_relaxed(ptr, o, n)					\
-	((__typeof__(*(ptr)))__cmpxchg64((ptr),				\
-					(unsigned long long)(o),	\
-					(unsigned long long)(n)))
-
-#define cmpxchg64_local(ptr, o, n)	cmpxchg64_relaxed((ptr), (o), (n))
+#define cmpxchg64(ptr, o, n) ({						\
+	(__typeof__(*(ptr)))__cmpxchg64_mb((ptr),			\
+					   (unsigned long long)(o),	\
+					   (unsigned long long)(n));	\
+})
 
 #endif	/* __LINUX_ARM_ARCH__ >= 6 */
 
-- 
cgit v1.1


From d33ce23b2160d26b27a47092da5d556b5b11a12a Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathan_lynch@mentor.com>
Date: Wed, 3 Jun 2015 00:46:04 +0100
Subject: ARM: 8385/1: VDSO: group link options

Currently the VDSO's link options are kind of a mess spread between

ccflags-y and cmd_vdsold.  Collect linker directives into one
variable, VDSO_LDFLAGS, and use that in cmd_vdsold.

Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/vdso/Makefile | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 8aa79105..89182ad 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -6,9 +6,14 @@ obj-vdso := vgettimeofday.o datapage.o
 targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds
 obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
 
-ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING
-ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector
+ccflags-y += -DDISABLE_BRANCH_PROFILING
+
+VDSO_LDFLAGS := -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
+VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+VDSO_LDFLAGS += -nostdlib -shared
+VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id)
 
 obj-$(CONFIG_VDSO) += vdso.o
 extra-$(CONFIG_VDSO) += vdso.lds
@@ -40,10 +45,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
 
 # Actual build commands
 quiet_cmd_vdsold = VDSO    $@
-      cmd_vdsold = $(CC) $(c_flags) -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) \
-                   $(call cc-ldoption, -Wl$(comma)--build-id) \
-                   -Wl,-Bsymbolic -Wl,-z,max-page-size=4096 \
-                   -Wl,-z,common-page-size=4096 -o $@
+      cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \
+                   -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
 
 quiet_cmd_vdsomunge = MUNGE   $@
       cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@
-- 
cgit v1.1


From d2b30cd4b7223a96e606dfc8120626f66d81e091 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathan_lynch@mentor.com>
Date: Wed, 3 Jun 2015 00:41:15 +0100
Subject: ARM: 8384/1: VDSO: force use of BFD linker

When using a toolchain with gold as the default linker, the VDSO build
fails:

  VDSO    arch/arm/vdso/vdso.so.raw
  HOSTCC  arch/arm/vdso/vdsomunge
  MUNGE   arch/arm/vdso/vdso.so.dbg
  OBJCOPY arch/arm/vdso/vdso.so
BFD: arch/arm/vdso/vdso.so: Not enough room for program headers, try
linking with -N

For whatever reason, ld.gold is omitting an exidx program header that
ld.bfd emits, and even when I work around that, I don't get a working
VDSO.

For now, instead of supporting gold (which will fail to link the
kernel anyway since it does not implement --pic-veneer), direct the
compiler to use the traditional bfd linker.  This is accomplished by
using -fuse-ld, which is implemented in GCC 4.8 and later.

Note: one limitation of this is that if the toolchain is configured
to use gold by default, and the bfd linker is not in $PATH, the VDSO
build will fail:

  VDSO    arch/arm/vdso/vdso.so.raw
collect2: fatal error: cannot find 'ld'

This will happen if CROSS_COMPILE begins with a path such as
/opt/bin/arm-linux-gnu- but /opt/bin is not in $PATH.  This is
considered an acceptable corner-case limitation and is easily worked
around.

Additonal note: we use cc-option instead of cc-ldoption so that
-fuse-ld=bfd is placed in the command line if the compiler recognizes
the option.  Using cc-ldoption results in an attempt to link, which
fails in the situation just described, causing -fuse-ld=bfd to be
omitted and gold to be used for the VDSO link, which is what we're
trying to prevent.

Reported-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/vdso/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 89182ad..9d259d9 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -14,6 +14,7 @@ VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
 VDSO_LDFLAGS += -nostdlib -shared
 VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
 VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id)
+VDSO_LDFLAGS += $(call cc-option, -fuse-ld=bfd)
 
 obj-$(CONFIG_VDSO) += vdso.o
 extra-$(CONFIG_VDSO) += vdso.lds
-- 
cgit v1.1


From 0bbe6b5a73c00f8c8e7eb38fb86993f03cd64b70 Mon Sep 17 00:00:00 2001
From: Michael van der Westhuizen <michael@smart-africa.com>
Date: Thu, 4 Jun 2015 15:14:51 +0100
Subject: ARM: 8388/1: tcm: Don't crash when TCM banks are protected by
 TrustZone

Fixes the TCM initialisation code to handle TCM banks that are
present but inaccessible due to TrustZone configuration.  This is
the default case when enabling the non-secure world.  It may also
be the case that that the user decided to use TCM for TrustZone.

This change has exposed a bug in handling of TCM where no TCM bank
was usable (the 0 size TCM case).  This change addresses the
resulting hang.

This code only handles the ARMv6 TCMTR register format, and will not
work correctly on boards that use the ARMv7 (or any other) format.
This is handled by performing an early exit from the initialisation
function when the TCMTR reports any format other than v6.

Signed-off-by: Michael van der Westhuizen <michael@smart-africa.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/tcm.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 101 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
index 7a3be1d..b10e136 100644
--- a/arch/arm/kernel/tcm.c
+++ b/arch/arm/kernel/tcm.c
@@ -17,6 +17,9 @@
 #include <asm/mach/map.h>
 #include <asm/memory.h>
 #include <asm/system_info.h>
+#include <asm/traps.h>
+
+#define TCMTR_FORMAT_MASK	0xe0000000U
 
 static struct gen_pool *tcm_pool;
 static bool dtcm_present;
@@ -176,6 +179,77 @@ static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
 }
 
 /*
+ * When we are running in the non-secure world and the secure world
+ * has not explicitly given us access to the TCM we will get an
+ * undefined error when reading the TCM region register in the
+ * setup_tcm_bank function (above).
+ *
+ * There are two variants of this register read that we need to trap,
+ * the read for the data TCM and the read for the instruction TCM:
+ *  c0370628:       ee196f11        mrc     15, 0, r6, cr9, cr1, {0}
+ *  c0370674:       ee196f31        mrc     15, 0, r6, cr9, cr1, {1}
+ *
+ * Our undef hook mask explicitly matches all fields of the encoded
+ * instruction other than the destination register.  The mask also
+ * only allows operand 2 to have the values 0 or 1.
+ *
+ * The undefined hook is defined as __init and __initdata, and therefore
+ * must be removed before tcm_init returns.
+ *
+ * In this particular case (MRC with ARM condition code ALways) the
+ * Thumb-2 and ARM instruction encoding are identical, so this hook
+ * will work on a Thumb-2 kernel.
+ *
+ * See A8.8.107, DDI0406C_C ARM Architecture Reference Manual, Encoding
+ * T1/A1 for the bit-by-bit details.
+ *
+ *  mrc   p15, 0, XX, c9, c1, 0
+ *  mrc   p15, 0, XX, c9, c1, 1
+ *   |  |  |   |   |   |   |  +---- opc2           0|1 = 000|001
+ *   |  |  |   |   |   |   +------- CRm              0 = 0001
+ *   |  |  |   |   |   +----------- CRn              0 = 1001
+ *   |  |  |   |   +--------------- Rt               ? = ????
+ *   |  |  |   +------------------- opc1             0 =  000
+ *   |  |  +----------------------- coproc          15 = 1111
+ *   |  +-------------------------- condition   ALways = 1110
+ *   +----------------------------- instruction    MRC = 1110
+ *
+ * Encoding this as per A8.8.107 of DDI0406C, Encoding T1/A1, yields:
+ *  1111 1111 1111 1111 0000 1111 1101 1111 Required Mask
+ *  1110 1110 0001 1001 ???? 1111 0001 0001 mrc p15, 0, XX, c9, c1, 0
+ *  1110 1110 0001 1001 ???? 1111 0011 0001 mrc p15, 0, XX, c9, c1, 1
+ *  [  ] [  ] [ ]| [  ] [  ] [  ] [ ]| +--- CRm
+ *    |    |   | |   |    |    |   | +----- SBO
+ *    |    |   | |   |    |    |   +------- opc2
+ *    |    |   | |   |    |    +----------- coproc
+ *    |    |   | |   |    +---------------- Rt
+ *    |    |   | |   +--------------------- CRn
+ *    |    |   | +------------------------- SBO
+ *    |    |   +--------------------------- opc1
+ *    |    +------------------------------- instruction
+ *    +------------------------------------ condition
+ */
+#define TCM_REGION_READ_MASK		0xffff0fdf
+#define TCM_REGION_READ_INSTR		0xee190f11
+#define DEST_REG_SHIFT			12
+#define DEST_REG_MASK			0xf
+
+static int __init tcm_handler(struct pt_regs *regs, unsigned int instr)
+{
+	regs->uregs[(instr >> DEST_REG_SHIFT) & DEST_REG_MASK] = 0;
+	regs->ARM_pc += 4;
+	return 0;
+}
+
+static struct undef_hook tcm_hook __initdata = {
+	.instr_mask	= TCM_REGION_READ_MASK,
+	.instr_val	= TCM_REGION_READ_INSTR,
+	.cpsr_mask	= MODE_MASK,
+	.cpsr_val	= SVC_MODE,
+	.fn		= tcm_handler
+};
+
+/*
  * This initializes the TCM memory
  */
 void __init tcm_init(void)
@@ -204,9 +278,18 @@ void __init tcm_init(void)
 	}
 
 	tcm_status = read_cpuid_tcmstatus();
+
+	/*
+	 * This code only supports v6-compatible TCMTR implementations.
+	 */
+	if (tcm_status & TCMTR_FORMAT_MASK)
+		return;
+
 	dtcm_banks = (tcm_status >> 16) & 0x03;
 	itcm_banks = (tcm_status & 0x03);
 
+	register_undef_hook(&tcm_hook);
+
 	/* Values greater than 2 for D/ITCM banks are "reserved" */
 	if (dtcm_banks > 2)
 		dtcm_banks = 0;
@@ -218,7 +301,7 @@ void __init tcm_init(void)
 		for (i = 0; i < dtcm_banks; i++) {
 			ret = setup_tcm_bank(0, i, dtcm_banks, &dtcm_end);
 			if (ret)
-				return;
+				goto unregister;
 		}
 		/* This means you compiled more code than fits into DTCM */
 		if (dtcm_code_sz > (dtcm_end - DTCM_OFFSET)) {
@@ -227,6 +310,12 @@ void __init tcm_init(void)
 				dtcm_code_sz, (dtcm_end - DTCM_OFFSET));
 			goto no_dtcm;
 		}
+		/*
+		 * This means that the DTCM sizes were 0 or the DTCM banks
+		 * were inaccessible due to TrustZone configuration.
+		 */
+		if (!(dtcm_end - DTCM_OFFSET))
+			goto no_dtcm;
 		dtcm_res.end = dtcm_end - 1;
 		request_resource(&iomem_resource, &dtcm_res);
 		dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET;
@@ -250,15 +339,21 @@ no_dtcm:
 		for (i = 0; i < itcm_banks; i++) {
 			ret = setup_tcm_bank(1, i, itcm_banks, &itcm_end);
 			if (ret)
-				return;
+				goto unregister;
 		}
 		/* This means you compiled more code than fits into ITCM */
 		if (itcm_code_sz > (itcm_end - ITCM_OFFSET)) {
 			pr_info("CPU ITCM: %u bytes of code compiled to "
 				"ITCM but only %lu bytes of ITCM present\n",
 				itcm_code_sz, (itcm_end - ITCM_OFFSET));
-			return;
+			goto unregister;
 		}
+		/*
+		 * This means that the ITCM sizes were 0 or the ITCM banks
+		 * were inaccessible due to TrustZone configuration.
+		 */
+		if (!(itcm_end - ITCM_OFFSET))
+			goto unregister;
 		itcm_res.end = itcm_end - 1;
 		request_resource(&iomem_resource, &itcm_res);
 		itcm_iomap[0].length = itcm_end - ITCM_OFFSET;
@@ -275,6 +370,9 @@ no_dtcm:
 		pr_info("CPU ITCM: %u bytes of code compiled to ITCM but no "
 			"ITCM banks present in CPU\n", itcm_code_sz);
 	}
+
+unregister:
+	unregister_undef_hook(&tcm_hook);
 }
 
 /*
-- 
cgit v1.1


From 55af8a91640d362b20f2491336fce128c48f4079 Mon Sep 17 00:00:00 2001
From: Mike Looijmans <mike.looijmans@topic.nl>
Date: Wed, 3 Jun 2015 11:25:31 +0100
Subject: ARM: 8387/1: arm/mm/dma-mapping.c: Add arm_coherent_dma_mmap

When dma-coherent transfers are enabled, the mmap call must
not change the pg_prot flags in the vma struct.

Split the arm_dma_mmap into a common and specific parts,
and add a "arm_coherent_dma_mmap" implementation that does
not alter the page protection flags.

Tested on a topic-miami board (Zynq) using the ACP port
to transfer data between FPGA and CPU using the Dyplo
framework. Without this patch, byte-wise access to mmapped
coherent DMA memory was about 20x slower because of the
memory being marked as non-cacheable, and transfer speeds
would not exceed 240MB/s.

After this patch, the mapped memory is cacheable and the
transfer speed is again 600MB/s (limited by the FPGA) when
the data is in the L2 cache, while data integrity is being
maintained.

The patch has no effect on non-coherent DMA.

Signed-off-by: Mike Looijmans <mike.looijmans@topic.nl>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/dma-mapping.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 09c5fe3..d6f5256 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -148,11 +148,14 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
 	dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs);
 static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
 				  dma_addr_t handle, struct dma_attrs *attrs);
+static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		 struct dma_attrs *attrs);
 
 struct dma_map_ops arm_coherent_dma_ops = {
 	.alloc			= arm_coherent_dma_alloc,
 	.free			= arm_coherent_dma_free,
-	.mmap			= arm_dma_mmap,
+	.mmap			= arm_coherent_dma_mmap,
 	.get_sgtable		= arm_dma_get_sgtable,
 	.map_page		= arm_coherent_dma_map_page,
 	.map_sg			= arm_dma_map_sg,
@@ -690,10 +693,7 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
 			   attrs, __builtin_return_address(0));
 }
 
-/*
- * Create userspace mapping for the DMA-coherent memory.
- */
-int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		 struct dma_attrs *attrs)
 {
@@ -704,8 +704,6 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	unsigned long pfn = dma_to_pfn(dev, dma_addr);
 	unsigned long off = vma->vm_pgoff;
 
-	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
-
 	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
@@ -721,6 +719,26 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 }
 
 /*
+ * Create userspace mapping for the DMA-coherent memory.
+ */
+static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		 struct dma_attrs *attrs)
+{
+	return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+}
+
+int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		 struct dma_attrs *attrs)
+{
+#ifdef CONFIG_MMU
+	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
+#endif	/* CONFIG_MMU */
+	return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+}
+
+/*
  * Free a buffer as defined by the above mapping.
  */
 static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
-- 
cgit v1.1


From 6fb18ac9366b74fadb2c1d0da88f143bfe87d001 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Wed, 10 Jun 2015 12:25:15 +0100
Subject: ARM: 8390/1: irqflags: Get arch_irqs_disabled from asm-generic

Commit cb1293e2f594 ("ARM: 8375/1: disable some options on ARMv7-M")
causes the build to on ARMv7-M machines:

  CC      arch/arm/kernel/asm-offsets.s
In file included from include/linux/sem.h:5:0,
                 from include/linux/sched.h:35,
                 from arch/arm/kernel/asm-offsets.c:14:
include/linux/rcupdate.h: In function 'rcu_read_lock_sched_held':
include/linux/rcupdate.h:539:2: error: implicit declaration of function
'arch_irqs_disabled' [-Werror=implicit-function-declaration]
  return preempt_count() != 0 || irqs_disabled();

asm-generic/irqflags.h provides an implementation of arch_irqs_disabled().
Lets grab an implementation from there!

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Acked-by: Maxime Coquelin <maxime.coquelin@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/irqflags.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/arm/include/asm/irqflags.h b/arch/arm/include/asm/irqflags.h
index 3b763d6..4390814 100644
--- a/arch/arm/include/asm/irqflags.h
+++ b/arch/arm/include/asm/irqflags.h
@@ -20,6 +20,7 @@
 
 #if __LINUX_ARM_ARCH__ >= 6
 
+#define arch_local_irq_save arch_local_irq_save
 static inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags;
@@ -31,6 +32,7 @@ static inline unsigned long arch_local_irq_save(void)
 	return flags;
 }
 
+#define arch_local_irq_enable arch_local_irq_enable
 static inline void arch_local_irq_enable(void)
 {
 	asm volatile(
@@ -40,6 +42,7 @@ static inline void arch_local_irq_enable(void)
 		: "memory", "cc");
 }
 
+#define arch_local_irq_disable arch_local_irq_disable
 static inline void arch_local_irq_disable(void)
 {
 	asm volatile(
@@ -56,6 +59,7 @@ static inline void arch_local_irq_disable(void)
 /*
  * Save the current interrupt enable state & disable IRQs
  */
+#define arch_local_irq_save arch_local_irq_save
 static inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags, temp;
@@ -73,6 +77,7 @@ static inline unsigned long arch_local_irq_save(void)
 /*
  * Enable IRQs
  */
+#define arch_local_irq_enable arch_local_irq_enable
 static inline void arch_local_irq_enable(void)
 {
 	unsigned long temp;
@@ -88,6 +93,7 @@ static inline void arch_local_irq_enable(void)
 /*
  * Disable IRQs
  */
+#define arch_local_irq_disable arch_local_irq_disable
 static inline void arch_local_irq_disable(void)
 {
 	unsigned long temp;
@@ -135,6 +141,7 @@ static inline void arch_local_irq_disable(void)
 /*
  * Save the current interrupt enable state.
  */
+#define arch_local_save_flags arch_local_save_flags
 static inline unsigned long arch_local_save_flags(void)
 {
 	unsigned long flags;
@@ -147,6 +154,7 @@ static inline unsigned long arch_local_save_flags(void)
 /*
  * restore saved IRQ & FIQ state
  */
+#define arch_local_irq_restore arch_local_irq_restore
 static inline void arch_local_irq_restore(unsigned long flags)
 {
 	asm volatile(
@@ -156,10 +164,13 @@ static inline void arch_local_irq_restore(unsigned long flags)
 		: "memory", "cc");
 }
 
+#define arch_irqs_disabled_flags arch_irqs_disabled_flags
 static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
 	return flags & IRQMASK_I_BIT;
 }
 
+#include <asm-generic/irqflags.h>
+
 #endif /* ifdef __KERNEL__ */
 #endif /* ifndef __ASM_ARM_IRQFLAGS_H */
-- 
cgit v1.1


From ec3bd0e68a679a7af2c46af1ddc9af8b534a8b0e Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Wed, 10 Jun 2015 20:23:24 +0100
Subject: ARM: 8391/1: l2c: add options to overwrite prefetching behavior

These options make it possible to overwrites the data and instruction
prefetching behavior of the arm pl310 cache controller.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 Documentation/devicetree/bindings/arm/l2cc.txt |  5 +++++
 arch/arm/mm/cache-l2x0.c                       | 20 ++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt
index 0dbabe9..2251dcc 100644
--- a/Documentation/devicetree/bindings/arm/l2cc.txt
+++ b/Documentation/devicetree/bindings/arm/l2cc.txt
@@ -67,6 +67,11 @@ Optional properties:
   disable if zero.
 - arm,prefetch-offset : Override prefetch offset value. Valid values are
   0-7, 15, 23, and 31.
+- prefetch-data : Data prefetch. Value: <0> (forcibly disable), <1>
+  (forcibly enable), property absent (retain settings set by firmware)
+- prefetch-instr : Instruction prefetch. Value: <0> (forcibly disable),
+  <1> (forcibly enable), property absent (retain settings set by
+  firmware)
 
 Example:
 
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 90599f6..71b3d33 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1221,6 +1221,26 @@ static void __init l2c310_of_parse(const struct device_node *np,
 		pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n");
 	}
 
+	ret = of_property_read_u32(np, "prefetch-data", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_DATA_PREFETCH;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DATA_PREFETCH;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF prefetch-data property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "prefetch-instr", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_INSTR_PREFETCH;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF prefetch-instr property value is missing\n");
+	}
+
 	l2x0_saved_regs.prefetch_ctrl = prefetch;
 }
 
-- 
cgit v1.1


From 970d96f9a81b0dd83ddd8bce0e5e1ba31881c5f5 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Tue, 2 Jun 2015 20:43:24 +0100
Subject: ARM: 8383/1: nommu: avoid deprecated source register on mov

In Thumb2 mode, the stack register r13 is deprecated if the
destination register is the program counter (r15). Similar to
head.S, head-nommu.S uses r13 to store the return address used
after configuring the CPU's CP15 register. However, since we do
not enable a MMU, there will be no address switch and it is
possible to use branch with link instruction to call
__after_proc_init.

Avoid using r13 completely by using bl to call __after_proc_init
and get rid of __secondary_switched.

Beside removing unnecessary complexity, this also fixes a
compiler warning when compiling a !MMU kernel:
Warning: Use of r13 as a source register is deprecated when r15
is the destination register.

Tested-?by: Maxime Coquelin <mcoquelin.stm32@gmail.com>

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/head-nommu.S | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index c966016..9b8c5a1 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -77,13 +77,13 @@ ENTRY(stext)
 	orr	r6, r6, #(1 << MPU_RSR_EN)	@ Set region enabled bit
 	bl	__setup_mpu
 #endif
-	ldr	r13, =__mmap_switched		@ address to jump to after
-						@ initialising sctlr
+
 	badr	lr, 1f				@ return (PIC) address
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10
 	ret	r12
- 1:	b	__after_proc_init
+1:	bl	__after_proc_init
+	b	__mmap_switched
 ENDPROC(stext)
 
 #ifdef CONFIG_SMP
@@ -106,8 +106,7 @@ ENTRY(secondary_startup)
 	movs	r10, r5				@ invalid processor?
 	beq	__error_p			@ yes, error 'p'
 
-	adr	r4, __secondary_data
-	ldmia	r4, {r7, r12}
+	ldr	r7, __secondary_data
 
 #ifdef CONFIG_ARM_MPU
 	/* Use MPU region info supplied by __cpu_up */
@@ -115,23 +114,19 @@ ENTRY(secondary_startup)
 	bl      __setup_mpu			@ Initialize the MPU
 #endif
 
-	badr	lr, __after_proc_init		@ return address
-	mov	r13, r12			@ __secondary_switched address
+	badr	lr, 1f				@ return (PIC) address
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10
 	ret	r12
-ENDPROC(secondary_startup)
-
-ENTRY(__secondary_switched)
+1:	bl	__after_proc_init
 	ldr	sp, [r7, #12]			@ set up the stack pointer
 	mov	fp, #0
 	b	secondary_start_kernel
-ENDPROC(__secondary_switched)
+ENDPROC(secondary_startup)
 
 	.type	__secondary_data, %object
 __secondary_data:
 	.long	secondary_data
-	.long	__secondary_switched
 #endif /* CONFIG_SMP */
 
 /*
@@ -164,7 +159,7 @@ __after_proc_init:
 #endif
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
 #endif /* CONFIG_CPU_CP15 */
-	ret	r13
+	ret	lr
 ENDPROC(__after_proc_init)
 	.ltorg
 
-- 
cgit v1.1


From 9ce93bdda7b71fd154986d36c0c1ccf0e7338e26 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 12 Jun 2015 21:19:35 +0100
Subject: ARM: fix new BSYM() usage introduced via for-arm-soc branch

Commit 32e55a777f83 ("ARM: 8389/1: Add cpu_resume_arm() for firmwares
that resume in ARM state") needed to introduce a new usage of BSYM()
to fix a problem with a previous patch.  This in turn causes a conflict
with the "bsym" branch which removes this symbol, replacing it with a
'badr' assembly macro.  Fix this up.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/sleep.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index c5e1e21..0f6c100 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -122,7 +122,7 @@ ENDPROC(cpu_resume_after_mmu)
 #ifdef CONFIG_MMU
 	.arm
 ENTRY(cpu_resume_arm)
- THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is entered in ARM.
+ THUMB(	badr	r9, 1f		)	@ Kernel is entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
  THUMB(	.thumb			)	@ switch to Thumb now.
  THUMB(1:			)
-- 
cgit v1.1


From 002af195a8c720ca47c7884fd0390f3b327423b9 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 23 Jun 2015 20:49:05 -0700
Subject: ARM: Fix build if CLKDEV_LOOKUP is not configured

mips:allmodconfig fails to build with

drivers/clocksource/timer-sp804.c: In function '__sp804_clocksource_and_sched_clock_init':
drivers/clocksource/timer-sp804.c:88:3: error: implicit declaration of function 'clk_get_sys'

because CLKDEV_LOOKUP is not configured and the driver depends on it.

Fixes: 0b7402dce445 ("ARM: 8366/1: move Dual-Timer SP804 driver to drivers/clocksource")
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/clocksource/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 5053470..b2590d6 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -134,7 +134,7 @@ config ARM_GLOBAL_TIMER
 
 config ARM_TIMER_SP804
 	bool "Support for Dual Timer SP804 module"
-	depends on GENERIC_SCHED_CLOCK
+	depends on GENERIC_SCHED_CLOCK && CLKDEV_LOOKUP
 	select CLKSRC_MMIO
 	select CLKSRC_OF if OF
 
-- 
cgit v1.1