From 1e904e1bf6f1285cc2dd5696c44b7cf78cda643f Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Wed, 2 May 2012 20:56:52 -0400
Subject: ARM: vexpress: introduce DCSCB support

This adds basic CPU and cluster reset controls on RTSM for the
A15x4-A7x4 model configuration using the Dual Cluster System
Configuration Block (DCSCB).

The cache coherency interconnect (CCI) is not handled yet.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Pawel Moll <pawel.moll@arm.com>
---
 arch/arm/mach-vexpress/Kconfig  |   8 ++
 arch/arm/mach-vexpress/Makefile |   1 +
 arch/arm/mach-vexpress/dcscb.c  | 164 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 173 insertions(+)
 create mode 100644 arch/arm/mach-vexpress/dcscb.c

(limited to 'arch/arm/mach-vexpress')

diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 5907e10..2f46385 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -57,4 +57,12 @@ config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA
 config ARCH_VEXPRESS_CA9X4
 	bool "Versatile Express Cortex-A9x4 tile"
 
+config ARCH_VEXPRESS_DCSCB
+	bool "Dual Cluster System Control Block (DCSCB) support"
+	depends on MCPM
+	help
+	  Support for the Dual Cluster System Configuration Block (DCSCB).
+	  This is needed to provide CPU and cluster power management
+	  on RTSM implementing big.LITTLE.
+
 endmenu
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 42703e8..518519f 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -6,5 +6,6 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
 
 obj-y					:= v2m.o
 obj-$(CONFIG_ARCH_VEXPRESS_CA9X4)	+= ct-ca9x4.o
+obj-$(CONFIG_ARCH_VEXPRESS_DCSCB)	+= dcscb.o
 obj-$(CONFIG_SMP)			+= platsmp.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
new file mode 100644
index 0000000..2ca4bbc
--- /dev/null
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -0,0 +1,164 @@
+/*
+ * arch/arm/mach-vexpress/dcscb.c - Dual Cluster System Configuration Block
+ *
+ * Created by:	Nicolas Pitre, May 2012
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/of_address.h>
+#include <linux/vexpress.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+
+
+#define RST_HOLD0	0x0
+#define RST_HOLD1	0x4
+#define SYS_SWRESET	0x8
+#define RST_STAT0	0xc
+#define RST_STAT1	0x10
+#define EAG_CFG_R	0x20
+#define EAG_CFG_W	0x24
+#define KFC_CFG_R	0x28
+#define KFC_CFG_W	0x2c
+#define DCS_CFG_R	0x30
+
+/*
+ * We can't use regular spinlocks. In the switcher case, it is possible
+ * for an outbound CPU to call power_down() while its inbound counterpart
+ * is already live using the same logical CPU number which trips lockdep
+ * debugging.
+ */
+static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+static void __iomem *dcscb_base;
+
+static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int rst_hold, cpumask = (1 << cpu);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	if (cpu >= 4 || cluster >= 2)
+		return -EINVAL;
+
+	/*
+	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
+	 * variant exists, we need to disable IRQs manually here.
+	 */
+	local_irq_disable();
+	arch_spin_lock(&dcscb_lock);
+
+	rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+	if (rst_hold & (1 << 8)) {
+		/* remove cluster reset and add individual CPU's reset */
+		rst_hold &= ~(1 << 8);
+		rst_hold |= 0xf;
+	}
+	rst_hold &= ~(cpumask | (cpumask << 4));
+	writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+
+	arch_spin_unlock(&dcscb_lock);
+	local_irq_enable();
+
+	return 0;
+}
+
+static void dcscb_power_down(void)
+{
+	unsigned int mpidr, cpu, cluster, rst_hold, cpumask, last_man;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	cpumask = (1 << cpu);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= 4 || cluster >= 2);
+
+	arch_spin_lock(&dcscb_lock);
+	rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+	rst_hold |= cpumask;
+	if (((rst_hold | (rst_hold >> 4)) & 0xf) == 0xf)
+		rst_hold |= (1 << 8);
+	writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+	arch_spin_unlock(&dcscb_lock);
+	last_man = (rst_hold & (1 << 8));
+
+	/*
+	 * Now let's clean our L1 cache and shut ourself down.
+	 * If we're the last CPU in this cluster then clean L2 too.
+	 */
+
+	/*
+	 * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need
+	 * a preliminary flush here for those CPUs.  At least, that's
+	 * the theory -- without the extra flush, Linux explodes on
+	 * RTSM (to be investigated)..
+	 */
+	flush_cache_louis();
+	set_cr(get_cr() & ~CR_C);
+
+	if (!last_man) {
+		flush_cache_louis();
+	} else {
+		flush_cache_all();
+		outer_flush_all();
+	}
+
+	/* Disable local coherency by clearing the ACTLR "SMP" bit: */
+	set_auxcr(get_auxcr() & ~(1 << 6));
+
+	/* Now we are prepared for power-down, do it: */
+	dsb();
+	wfi();
+
+	/* Not dead at this point?  Let our caller cope. */
+}
+
+static const struct mcpm_platform_ops dcscb_power_ops = {
+	.power_up	= dcscb_power_up,
+	.power_down	= dcscb_power_down,
+};
+
+static int __init dcscb_init(void)
+{
+	struct device_node *node;
+	int ret;
+
+	node = of_find_compatible_node(NULL, NULL, "arm,rtsm,dcscb");
+	if (!node)
+		return -ENODEV;
+	dcscb_base = of_iomap(node, 0);
+	if (!dcscb_base)
+		return -EADDRNOTAVAIL;
+
+	ret = mcpm_platform_register(&dcscb_power_ops);
+	if (ret) {
+		iounmap(dcscb_base);
+		return ret;
+	}
+
+	pr_info("VExpress DCSCB support installed\n");
+
+	/*
+	 * Future entries into the kernel can now go
+	 * through the cluster entry vectors.
+	 */
+	vexpress_flags_set(virt_to_phys(mcpm_entry_point));
+
+	return 0;
+}
+
+early_initcall(dcscb_init);
-- 
cgit v1.1


From 13eae144ec754ab64521891d2cffc3156f6f083f Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Mon, 16 Jul 2012 22:07:10 -0400
Subject: ARM: vexpress/dcscb: add CPU use counts to the power up/down API
 implementation

It is possible for a CPU to be told to power up before it managed
to power itself down.  Solve this race with a usage count to deal
with this possibility as mandated by the MCPM API definition.

Signed-off-by: nicolas Pitre <nico@linaro.org>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Pawel Moll <pawel.moll@arm.com>
---
 arch/arm/mach-vexpress/dcscb.c | 74 +++++++++++++++++++++++++++++++++---------
 1 file changed, 59 insertions(+), 15 deletions(-)

(limited to 'arch/arm/mach-vexpress')

diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
index 2ca4bbc..65dee78 100644
--- a/arch/arm/mach-vexpress/dcscb.c
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -44,6 +44,7 @@
 static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
 static void __iomem *dcscb_base;
+static int dcscb_use_count[4][2];
 
 static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
 {
@@ -60,14 +61,27 @@ static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
 	local_irq_disable();
 	arch_spin_lock(&dcscb_lock);
 
-	rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
-	if (rst_hold & (1 << 8)) {
-		/* remove cluster reset and add individual CPU's reset */
-		rst_hold &= ~(1 << 8);
-		rst_hold |= 0xf;
+	dcscb_use_count[cpu][cluster]++;
+	if (dcscb_use_count[cpu][cluster] == 1) {
+		rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+		if (rst_hold & (1 << 8)) {
+			/* remove cluster reset and add individual CPU's reset */
+			rst_hold &= ~(1 << 8);
+			rst_hold |= 0xf;
+		}
+		rst_hold &= ~(cpumask | (cpumask << 4));
+		writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+	} else if (dcscb_use_count[cpu][cluster] != 2) {
+		/*
+		 * The only possible values are:
+		 * 0 = CPU down
+		 * 1 = CPU (still) up
+		 * 2 = CPU requested to be up before it had a chance
+		 *     to actually make itself down.
+		 * Any other value is a bug.
+		 */
+		BUG();
 	}
-	rst_hold &= ~(cpumask | (cpumask << 4));
-	writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
 
 	arch_spin_unlock(&dcscb_lock);
 	local_irq_enable();
@@ -77,7 +91,8 @@ static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
 
 static void dcscb_power_down(void)
 {
-	unsigned int mpidr, cpu, cluster, rst_hold, cpumask, last_man;
+	unsigned int mpidr, cpu, cluster, rst_hold, cpumask;
+	bool last_man = false, skip_wfi = false;
 
 	mpidr = read_cpuid_mpidr();
 	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
@@ -88,13 +103,26 @@ static void dcscb_power_down(void)
 	BUG_ON(cpu >= 4 || cluster >= 2);
 
 	arch_spin_lock(&dcscb_lock);
-	rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
-	rst_hold |= cpumask;
-	if (((rst_hold | (rst_hold >> 4)) & 0xf) == 0xf)
-		rst_hold |= (1 << 8);
-	writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+	dcscb_use_count[cpu][cluster]--;
+	if (dcscb_use_count[cpu][cluster] == 0) {
+		rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+		rst_hold |= cpumask;
+		if (((rst_hold | (rst_hold >> 4)) & 0xf) == 0xf) {
+			rst_hold |= (1 << 8);
+			last_man = true;
+		}
+		writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+	} else if (dcscb_use_count[cpu][cluster] == 1) {
+		/*
+		 * A power_up request went ahead of us.
+		 * Even if we do not want to shut this CPU down,
+		 * the caller expects a certain state as if the WFI
+		 * was aborted.  So let's continue with cache cleaning.
+		 */
+		skip_wfi = true;
+	} else
+		BUG();
 	arch_spin_unlock(&dcscb_lock);
-	last_man = (rst_hold & (1 << 8));
 
 	/*
 	 * Now let's clean our L1 cache and shut ourself down.
@@ -122,7 +150,8 @@ static void dcscb_power_down(void)
 
 	/* Now we are prepared for power-down, do it: */
 	dsb();
-	wfi();
+	if (!skip_wfi)
+		wfi();
 
 	/* Not dead at this point?  Let our caller cope. */
 }
@@ -132,6 +161,19 @@ static const struct mcpm_platform_ops dcscb_power_ops = {
 	.power_down	= dcscb_power_down,
 };
 
+static void __init dcscb_usage_count_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= 4 || cluster >= 2);
+	dcscb_use_count[cpu][cluster] = 1;
+}
+
 static int __init dcscb_init(void)
 {
 	struct device_node *node;
@@ -144,6 +186,8 @@ static int __init dcscb_init(void)
 	if (!dcscb_base)
 		return -EADDRNOTAVAIL;
 
+	dcscb_usage_count_init();
+
 	ret = mcpm_platform_register(&dcscb_power_ops);
 	if (ret) {
 		iounmap(dcscb_base);
-- 
cgit v1.1


From 2f2df895ee3518fbef28dfa6736dfd1e1c04ee1c Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Wed, 18 Jul 2012 16:41:16 -0400
Subject: ARM: vexpress/dcscb: do not hardcode number of CPUs per cluster

If 4 CPUs are assumed, the A15x1-A7x1 model configuration would never
shut down the initial cluster as the 0xf reset bit mask will never be
observed.  Let's construct this mask based on the provided information
in the DCSCB config register for the number of CPUs per cluster.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Pawel Moll <pawel.moll@arm.com>
---
 arch/arm/mach-vexpress/dcscb.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'arch/arm/mach-vexpress')

diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
index 65dee78..44aa7b0 100644
--- a/arch/arm/mach-vexpress/dcscb.c
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -45,10 +45,12 @@ static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
 static void __iomem *dcscb_base;
 static int dcscb_use_count[4][2];
+static int dcscb_allcpus_mask[2];
 
 static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
 {
 	unsigned int rst_hold, cpumask = (1 << cpu);
+	unsigned int all_mask = dcscb_allcpus_mask[cluster];
 
 	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
 	if (cpu >= 4 || cluster >= 2)
@@ -67,7 +69,7 @@ static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
 		if (rst_hold & (1 << 8)) {
 			/* remove cluster reset and add individual CPU's reset */
 			rst_hold &= ~(1 << 8);
-			rst_hold |= 0xf;
+			rst_hold |= all_mask;
 		}
 		rst_hold &= ~(cpumask | (cpumask << 4));
 		writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
@@ -91,13 +93,14 @@ static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
 
 static void dcscb_power_down(void)
 {
-	unsigned int mpidr, cpu, cluster, rst_hold, cpumask;
+	unsigned int mpidr, cpu, cluster, rst_hold, cpumask, all_mask;
 	bool last_man = false, skip_wfi = false;
 
 	mpidr = read_cpuid_mpidr();
 	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
 	cpumask = (1 << cpu);
+	all_mask = dcscb_allcpus_mask[cluster];
 
 	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
 	BUG_ON(cpu >= 4 || cluster >= 2);
@@ -107,7 +110,7 @@ static void dcscb_power_down(void)
 	if (dcscb_use_count[cpu][cluster] == 0) {
 		rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
 		rst_hold |= cpumask;
-		if (((rst_hold | (rst_hold >> 4)) & 0xf) == 0xf) {
+		if (((rst_hold | (rst_hold >> 4)) & all_mask) == all_mask) {
 			rst_hold |= (1 << 8);
 			last_man = true;
 		}
@@ -177,6 +180,7 @@ static void __init dcscb_usage_count_init(void)
 static int __init dcscb_init(void)
 {
 	struct device_node *node;
+	unsigned int cfg;
 	int ret;
 
 	node = of_find_compatible_node(NULL, NULL, "arm,rtsm,dcscb");
@@ -185,7 +189,9 @@ static int __init dcscb_init(void)
 	dcscb_base = of_iomap(node, 0);
 	if (!dcscb_base)
 		return -EADDRNOTAVAIL;
-
+	cfg = readl_relaxed(dcscb_base + DCS_CFG_R);
+	dcscb_allcpus_mask[0] = (1 << (((cfg >> 16) >> (0 << 2)) & 0xf)) - 1;
+	dcscb_allcpus_mask[1] = (1 << (((cfg >> 16) >> (1 << 2)) & 0xf)) - 1;
 	dcscb_usage_count_init();
 
 	ret = mcpm_platform_register(&dcscb_power_ops);
-- 
cgit v1.1


From d41418c0c0c0dc8a367af96b8e547f31477d9aa0 Mon Sep 17 00:00:00 2001
From: Dave Martin <dave.martin@linaro.org>
Date: Tue, 17 Jul 2012 14:25:44 +0100
Subject: ARM: vexpress/dcscb: handle platform coherency exit/setup and CCI

Add the required code to properly handle race free platform coherency exit
to the DCSCB power down method.

The power_up_setup callback is used to enable the CCI interface for
the cluster being brought up.  This must be done in assembly before
the kernel environment is entered.

Thanks to Achin Gupta and Nicolas Pitre for their help and
contributions.

Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Pawel Moll <pawel.moll@arm.com>
---
 arch/arm/mach-vexpress/Kconfig       |  1 +
 arch/arm/mach-vexpress/Makefile      |  2 +-
 arch/arm/mach-vexpress/dcscb.c       | 77 +++++++++++++++++++++++++++---------
 arch/arm/mach-vexpress/dcscb_setup.S | 38 ++++++++++++++++++
 4 files changed, 98 insertions(+), 20 deletions(-)
 create mode 100644 arch/arm/mach-vexpress/dcscb_setup.S

(limited to 'arch/arm/mach-vexpress')

diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 2f46385..b8bbabe 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -60,6 +60,7 @@ config ARCH_VEXPRESS_CA9X4
 config ARCH_VEXPRESS_DCSCB
 	bool "Dual Cluster System Control Block (DCSCB) support"
 	depends on MCPM
+	select ARM_CCI
 	help
 	  Support for the Dual Cluster System Configuration Block (DCSCB).
 	  This is needed to provide CPU and cluster power management
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 518519f..48ba89a 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -6,6 +6,6 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
 
 obj-y					:= v2m.o
 obj-$(CONFIG_ARCH_VEXPRESS_CA9X4)	+= ct-ca9x4.o
-obj-$(CONFIG_ARCH_VEXPRESS_DCSCB)	+= dcscb.o
+obj-$(CONFIG_ARCH_VEXPRESS_DCSCB)	+= dcscb.o	dcscb_setup.o
 obj-$(CONFIG_SMP)			+= platsmp.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
index 44aa7b0..16d57a8 100644
--- a/arch/arm/mach-vexpress/dcscb.c
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/of_address.h>
 #include <linux/vexpress.h>
+#include <linux/arm-cci.h>
 
 #include <asm/mcpm.h>
 #include <asm/proc-fns.h>
@@ -105,7 +106,10 @@ static void dcscb_power_down(void)
 	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
 	BUG_ON(cpu >= 4 || cluster >= 2);
 
+	__mcpm_cpu_going_down(cpu, cluster);
+
 	arch_spin_lock(&dcscb_lock);
+	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
 	dcscb_use_count[cpu][cluster]--;
 	if (dcscb_use_count[cpu][cluster] == 0) {
 		rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
@@ -125,31 +129,59 @@ static void dcscb_power_down(void)
 		skip_wfi = true;
 	} else
 		BUG();
-	arch_spin_unlock(&dcscb_lock);
-
-	/*
-	 * Now let's clean our L1 cache and shut ourself down.
-	 * If we're the last CPU in this cluster then clean L2 too.
-	 */
 
-	/*
-	 * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need
-	 * a preliminary flush here for those CPUs.  At least, that's
-	 * the theory -- without the extra flush, Linux explodes on
-	 * RTSM (to be investigated)..
-	 */
-	flush_cache_louis();
-	set_cr(get_cr() & ~CR_C);
+	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
+		arch_spin_unlock(&dcscb_lock);
 
-	if (!last_man) {
-		flush_cache_louis();
-	} else {
+		/*
+		 * Flush all cache levels for this cluster.
+		 *
+		 * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need
+		 * a preliminary flush here for those CPUs.  At least, that's
+		 * the theory -- without the extra flush, Linux explodes on
+		 * RTSM (to be investigated).
+		 */
+		flush_cache_all();
+		set_cr(get_cr() & ~CR_C);
 		flush_cache_all();
+
+		/*
+		 * This is a harmless no-op.  On platforms with a real
+		 * outer cache this might either be needed or not,
+		 * depending on where the outer cache sits.
+		 */
 		outer_flush_all();
+
+		/* Disable local coherency by clearing the ACTLR "SMP" bit: */
+		set_auxcr(get_auxcr() & ~(1 << 6));
+
+		/*
+		 * Disable cluster-level coherency by masking
+		 * incoming snoops and DVM messages:
+		 */
+		cci_disable_port_by_cpu(mpidr);
+
+		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+	} else {
+		arch_spin_unlock(&dcscb_lock);
+
+		/*
+		 * Flush the local CPU cache.
+		 *
+		 * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need
+		 * a preliminary flush here for those CPUs.  At least, that's
+		 * the theory -- without the extra flush, Linux explodes on
+		 * RTSM (to be investigated).
+		 */
+		flush_cache_louis();
+		set_cr(get_cr() & ~CR_C);
+		flush_cache_louis();
+
+		/* Disable local coherency by clearing the ACTLR "SMP" bit: */
+		set_auxcr(get_auxcr() & ~(1 << 6));
 	}
 
-	/* Disable local coherency by clearing the ACTLR "SMP" bit: */
-	set_auxcr(get_auxcr() & ~(1 << 6));
+	__mcpm_cpu_down(cpu, cluster);
 
 	/* Now we are prepared for power-down, do it: */
 	dsb();
@@ -177,12 +209,17 @@ static void __init dcscb_usage_count_init(void)
 	dcscb_use_count[cpu][cluster] = 1;
 }
 
+extern void dcscb_power_up_setup(unsigned int affinity_level);
+
 static int __init dcscb_init(void)
 {
 	struct device_node *node;
 	unsigned int cfg;
 	int ret;
 
+	if (!cci_probed())
+		return -ENODEV;
+
 	node = of_find_compatible_node(NULL, NULL, "arm,rtsm,dcscb");
 	if (!node)
 		return -ENODEV;
@@ -195,6 +232,8 @@ static int __init dcscb_init(void)
 	dcscb_usage_count_init();
 
 	ret = mcpm_platform_register(&dcscb_power_ops);
+	if (!ret)
+		ret = mcpm_sync_init(dcscb_power_up_setup);
 	if (ret) {
 		iounmap(dcscb_base);
 		return ret;
diff --git a/arch/arm/mach-vexpress/dcscb_setup.S b/arch/arm/mach-vexpress/dcscb_setup.S
new file mode 100644
index 0000000..4bb7fbe
--- /dev/null
+++ b/arch/arm/mach-vexpress/dcscb_setup.S
@@ -0,0 +1,38 @@
+/*
+ * arch/arm/include/asm/dcscb_setup.S
+ *
+ * Created by:  Dave Martin, 2012-06-22
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+
+ENTRY(dcscb_power_up_setup)
+
+	cmp	r0, #0			@ check affinity level
+	beq	2f
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ * The ACTLR SMP bit does not need to be set here, because cpu_resume()
+ * already restores that.
+ *
+ * A15/A7 may not require explicit L2 invalidation on reset, dependent
+ * on hardware integration decisions.
+ * For now, this code assumes that L2 is either already invalidated,
+ * or invalidation is not required.
+ */
+
+	b	cci_enable_port_for_self
+
+2:	@ Implementation-specific local CPU setup operations should go here,
+	@ if any.  In this case, there is nothing to do.
+
+	bx	lr
+
+ENDPROC(dcscb_power_up_setup)
-- 
cgit v1.1


From 033a899c9b06e7e4f6733a637fee34c632ca2d47 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@linaro.org>
Date: Wed, 30 Jan 2013 09:12:55 +0000
Subject: ARM: vexpress: Select multi-cluster SMP operation if required

Signed-off-by: Jon Medhurst <tixy@linaro.org>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Pawel Moll <pawel.moll@arm.com>
---
 arch/arm/mach-vexpress/core.h    |  2 ++
 arch/arm/mach-vexpress/platsmp.c | 20 ++++++++++++++++++++
 arch/arm/mach-vexpress/v2m.c     |  1 +
 3 files changed, 23 insertions(+)

(limited to 'arch/arm/mach-vexpress')

diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h
index f134cd4..bde4374 100644
--- a/arch/arm/mach-vexpress/core.h
+++ b/arch/arm/mach-vexpress/core.h
@@ -6,6 +6,8 @@
 
 void vexpress_dt_smp_map_io(void);
 
+bool vexpress_smp_init_ops(void);
+
 extern struct smp_operations	vexpress_smp_ops;
 
 extern void vexpress_cpu_die(unsigned int cpu);
diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c
index dc1ace5..993c9ae 100644
--- a/arch/arm/mach-vexpress/platsmp.c
+++ b/arch/arm/mach-vexpress/platsmp.c
@@ -12,9 +12,11 @@
 #include <linux/errno.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/vexpress.h>
 
+#include <asm/mcpm.h>
 #include <asm/smp_scu.h>
 #include <asm/mach/map.h>
 
@@ -203,3 +205,21 @@ struct smp_operations __initdata vexpress_smp_ops = {
 	.cpu_die		= vexpress_cpu_die,
 #endif
 };
+
+bool __init vexpress_smp_init_ops(void)
+{
+#ifdef CONFIG_MCPM
+	/*
+	 * The best way to detect a multi-cluster configuration at the moment
+	 * is to look for the presence of a CCI in the system.
+	 * Override the default vexpress_smp_ops if so.
+	 */
+	struct device_node *node;
+	node = of_find_compatible_node(NULL, NULL, "arm,cci-400");
+	if (node && of_device_is_available(node)) {
+		mcpm_smp_set_ops();
+		return true;
+	}
+#endif
+	return false;
+}
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 8802030..b0eccf7 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -456,6 +456,7 @@ static const char * const v2m_dt_match[] __initconst = {
 DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
 	.dt_compat	= v2m_dt_match,
 	.smp		= smp_ops(vexpress_smp_ops),
+	.smp_init	= smp_init_ops(vexpress_smp_init_ops),
 	.map_io		= v2m_dt_map_io,
 	.init_early	= v2m_dt_init_early,
 	.init_irq	= irqchip_init,
-- 
cgit v1.1