sparc64: Build cpu list and mondo block at top-level xcall_deliver().

Then modify all of the xcall dispatch implementations get passed and use this information. Now all of the xcall dispatch implementations do not need to be mindful of details such as "is current cpu in the list?" and "is cpu online?" Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2008-08-04 16:42:58 -0700
committer: David S. Miller <davem@davemloft.net> 2008-08-04 16:42:58 -0700
commit: 90f7ae8a55190f5edfb9fda957e25c994ed39ec4 (patch)
tree: b815a08c25f4acf37b02a982c67c6d0efd2fe480 /arch/sparc64
parent: c02a5119e862dea9a1361182840d41ae1fe24227 (diff)
download: op-kernel-dev-90f7ae8a55190f5edfb9fda957e25c994ed39ec4.zip
op-kernel-dev-90f7ae8a55190f5edfb9fda957e25c994ed39ec4.tar.gz
1 files changed, 69 insertions, 44 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 6d458b3..2387a9b 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -459,30 +459,35 @@ again:
 	}
 }
 
-static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
+static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 {
+	u64 *mondo, data0, data1, data2;
+	u16 *cpu_list;
 	u64 pstate;
 	int i;
 
 	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
-	for_each_cpu_mask_nr(i, *mask)
-		spitfire_xcall_helper(data0, data1, data2, pstate, i);
+	cpu_list = __va(tb->cpu_list_pa);
+	mondo = __va(tb->cpu_mondo_block_pa);
+	data0 = mondo[0];
+	data1 = mondo[1];
+	data2 = mondo[2];
+	for (i = 0; i < cnt; i++)
+		spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
 }
 
 /* Cheetah now allows to send the whole 64-bytes of data in the interrupt
  * packet, but we have no use for that.  However we do take advantage of
  * the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
  */
-static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask_p)
+static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 {
-	u64 pstate, ver, busy_mask;
 	int nack_busy_id, is_jbus, need_more;
-	cpumask_t mask;
-
-	if (cpus_empty(*mask_p))
-		return;
+	u64 *mondo, pstate, ver, busy_mask;
+	u16 *cpu_list;
 
-	mask = *mask_p;
+	cpu_list = __va(tb->cpu_list_pa);
+	mondo = __va(tb->cpu_mondo_block_pa);
 
 	/* Unfortunately, someone at Sun had the brilliant idea to make the
 	 * busy/nack fields hard-coded by ITID number for this Ultra-III
@@ -505,7 +510,7 @@ retry:
 			     "stxa	%2, [%5] %6\n\t"
 			     "membar	#Sync\n\t"
 			     : /* no outputs */
-			     : "r" (data0), "r" (data1), "r" (data2),
+			     : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
 			       "r" (0x40), "r" (0x50), "r" (0x60),
 			       "i" (ASI_INTR_W));
 
@@ -514,11 +519,16 @@ retry:
 	{
 		int i;
 
-		for_each_cpu_mask_nr(i, mask) {
-			u64 target = (i << 14) | 0x70;
+		for (i = 0; i < cnt; i++) {
+			u64 target, nr;
+
+			nr = cpu_list[i];
+			if (nr == 0xffff)
+				continue;
 
+			target = (nr << 14) | 0x70;
 			if (is_jbus) {
-				busy_mask |= (0x1UL << (i * 2));
+				busy_mask |= (0x1UL << (nr * 2));
 			} else {
 				target |= (nack_busy_id << 24);
 				busy_mask |= (0x1UL <<
@@ -552,11 +562,13 @@ retry:
 				__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
 						     : : "r" (pstate));
 				if (unlikely(need_more)) {
-					int i, cnt = 0;
-					for_each_cpu_mask_nr(i, mask) {
-						cpu_clear(i, mask);
-						cnt++;
-						if (cnt == 32)
+					int i, this_cnt = 0;
+					for (i = 0; i < cnt; i++) {
+						if (cpu_list[i] == 0xffff)
+							continue;
+						cpu_list[i] = 0xffff;
+						this_cnt++;
+						if (this_cnt == 32)
 							break;
 					}
 					goto retry;
@@ -587,16 +599,20 @@ retry:
 			/* Clear out the mask bits for cpus which did not
 			 * NACK us.
 			 */
-			for_each_cpu_mask_nr(i, mask) {
-				u64 check_mask;
+			for (i = 0; i < cnt; i++) {
+				u64 check_mask, nr;
+
+				nr = cpu_list[i];
+				if (nr == 0xffff)
+					continue;
 
 				if (is_jbus)
-					check_mask = (0x2UL << (2*i));
+					check_mask = (0x2UL << (2*nr));
 				else
 					check_mask = (0x2UL <<
 						      this_busy_nack);
 				if ((dispatch_stat & check_mask) == 0)
-					cpu_clear(i, mask);
+					cpu_list[i] = 0xffff;
 				this_busy_nack += 2;
 				if (this_busy_nack == 64)
 					break;
@@ -608,34 +624,17 @@ retry:
 }
 
 /* Multi-cpu list version.  */
-static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
+static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 {
-	int cnt, retries, this_cpu, prev_sent, i;
+	int retries, this_cpu, prev_sent, i;
 	unsigned long status;
 	cpumask_t error_mask;
-	struct trap_per_cpu *tb;
 	u16 *cpu_list;
-	u64 *mondo;
-
-	if (cpus_empty(*mask))
-		return;
 
 	this_cpu = smp_processor_id();
-	tb = &trap_block[this_cpu];
-
-	mondo = __va(tb->cpu_mondo_block_pa);
-	mondo[0] = data0;
-	mondo[1] = data1;
-	mondo[2] = data2;
-	wmb();
 
 	cpu_list = __va(tb->cpu_list_pa);
 
-	/* Setup the initial cpu list.  */
-	cnt = 0;
-	for_each_cpu_mask_nr(i, *mask)
-		cpu_list[cnt++] = i;
-
 	cpus_clear(error_mask);
 	retries = 0;
 	prev_sent = 0;
@@ -743,11 +742,15 @@ dump_cpu_list_and_out:
 	printk("]\n");
 }
 
-static void (*xcall_deliver_impl)(u64, u64, u64, const cpumask_t *);
+static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
 
 static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
 {
+	struct trap_per_cpu *tb;
+	int this_cpu, i, cnt;
 	unsigned long flags;
+	u16 *cpu_list;
+	u64 *mondo;
 
 	/* We have to do this whole thing with interrupts fully disabled.
 	 * Otherwise if we send an xcall from interrupt context it will
@@ -760,7 +763,29 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask
 	 * Fortunately, udelay() uses %stick/%tick so we can use that.
 	 */
 	local_irq_save(flags);
-	xcall_deliver_impl(data0, data1, data2, mask);
+
+	this_cpu = smp_processor_id();
+	tb = &trap_block[this_cpu];
+
+	mondo = __va(tb->cpu_mondo_block_pa);
+	mondo[0] = data0;
+	mondo[1] = data1;
+	mondo[2] = data2;
+	wmb();
+
+	cpu_list = __va(tb->cpu_list_pa);
+
+	/* Setup the initial cpu list.  */
+	cnt = 0;
+	for_each_cpu_mask_nr(i, *mask) {
+		if (i == this_cpu || !cpu_online(i))
+			continue;
+		cpu_list[cnt++] = i;
+	}
+
+	if (cnt)
+		xcall_deliver_impl(tb, cnt);
+
 	local_irq_restore(flags);
 }
author	David S. Miller <davem@davemloft.net>	2008-08-04 16:42:58 -0700
committer	David S. Miller <davem@davemloft.net>	2008-08-04 16:42:58 -0700
commit	90f7ae8a55190f5edfb9fda957e25c994ed39ec4 (patch)
tree	b815a08c25f4acf37b02a982c67c6d0efd2fe480 /arch/sparc64
parent	c02a5119e862dea9a1361182840d41ae1fe24227 (diff)
download	op-kernel-dev-90f7ae8a55190f5edfb9fda957e25c994ed39ec4.zip op-kernel-dev-90f7ae8a55190f5edfb9fda957e25c994ed39ec4.tar.gz