From d0e06d02bc16b86afdae303a0b8a82812a55e8ac Mon Sep 17 00:00:00 2001
From: attilio <attilio@FreeBSD.org>
Date: Sat, 30 Apr 2011 22:30:18 +0000
Subject: idle_cpus_mask is just used in the SMP case and within sched_4BSD.
 Declare appropriately.

---
 sys/kern/sched_4bsd.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'sys/kern/sched_4bsd.c')

diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 9424f73..668ecd1 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -155,6 +155,8 @@ static struct runq runq;
  */
 static struct runq runq_pcpu[MAXCPU];
 long runq_length[MAXCPU];
+
+static cpumask_t idle_cpus_mask;
 #endif
 
 struct pcpuidlestat {
-- 
cgit v1.1


From 7ac8b4739c3f301ecd125a4698c2731e3aef4366 Mon Sep 17 00:00:00 2001
From: attilio <attilio@FreeBSD.org>
Date: Sat, 30 Apr 2011 23:28:07 +0000
Subject: - Remove the following sysctl:   kern.sched.ipiwakeup.onecpu  
 kern.sched.ipiwakeup.htt2

  Because they are absolutely obsolete. Probabilly the whole wakeup
  forward mechanism should be revisited for a better fitting in modern
  hw.
- As map2 variable is no longer used rename map3 to map2
- Fix a string by making more informative the msg and removing the
  arguments passing

Approved by:	julian
---
 sys/kern/sched_4bsd.c | 38 +++++++-------------------------------
 1 file changed, 7 insertions(+), 31 deletions(-)

(limited to 'sys/kern/sched_4bsd.c')

diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 668ecd1..fef9e25 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -235,16 +235,6 @@ SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW,
 	   &forward_wakeup_use_loop, 0,
 	   "Use a loop to find idle cpus");
 
-static int forward_wakeup_use_single = 0;
-SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, onecpu, CTLFLAG_RW,
-	   &forward_wakeup_use_single, 0,
-	   "Only signal one idle cpu");
-
-static int forward_wakeup_use_htt = 0;
-SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, htt2, CTLFLAG_RW,
-	   &forward_wakeup_use_htt, 0,
-	   "account for htt");
-
 #endif
 #if 0
 static int sched_followon = 0;
@@ -1064,7 +1054,7 @@ static int
 forward_wakeup(int cpunum)
 {
 	struct pcpu *pc;
-	cpumask_t dontuse, id, map, map2, map3, me;
+	cpumask_t dontuse, id, map, map2, me;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 
@@ -1089,13 +1079,13 @@ forward_wakeup(int cpunum)
 		return (0);
 
 	dontuse = me | stopped_cpus | hlt_cpus_mask;
-	map3 = 0;
+	map2 = 0;
 	if (forward_wakeup_use_loop) {
 		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpumask;
 			if ((id & dontuse) == 0 &&
 			    pc->pc_curthread == pc->pc_idlethread) {
-				map3 |= id;
+				map2 |= id;
 			}
 		}
 	}
@@ -1106,33 +1096,19 @@ forward_wakeup(int cpunum)
 
 		/* If they are both on, compare and use loop if different. */
 		if (forward_wakeup_use_loop) {
-			if (map != map3) {
-				printf("map (%02X) != map3 (%02X)\n", map,
-				    map3);
-				map = map3;
+			if (map != map2) {
+				printf("map != map2, loop method preferred\n");
+				map = map2;
 			}
 		}
 	} else {
-		map = map3;
+		map = map2;
 	}
 
 	/* If we only allow a specific CPU, then mask off all the others. */
 	if (cpunum != NOCPU) {
 		KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
 		map &= (1 << cpunum);
-	} else {
-		/* Try choose an idle die. */
-		if (forward_wakeup_use_htt) {
-			map2 =  (map & (map >> 1)) & 0x5555;
-			if (map2) {
-				map = map2;
-			}
-		}
-
-		/* Set only one bit. */
-		if (forward_wakeup_use_single) {
-			map = map & ((~map) + 1);
-		}
 	}
 	if (map) {
 		forward_wakeups_delivered++;
-- 
cgit v1.1


From fe4de567b50f7ca317b16f69b7b3a7de693025af Mon Sep 17 00:00:00 2001
From: attilio <attilio@FreeBSD.org>
Date: Thu, 5 May 2011 14:39:14 +0000
Subject: Commit the support for removing cpumask_t and replacing it directly
 with cpuset_t objects. That is going to offer the underlying support for a
 simple bump of MAXCPU and then support for number of cpus > 32 (as it is
 today).

Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.

The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN

while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.

Some technical notes:
- This commit may be considered an ABI nop for all the architectures
  different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
  accessed avoiding migration, because the size of cpuset_t should be
  considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
  primirally done in order to leave some more space in userland to cope
  with KBI extensions). If you need to access kernel cpuset_t from the
  userland please refer to example in this patch on how to do that
  correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now

The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.

Tested by:	pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by:	jeff, jhb, sbruno
---
 sys/kern/sched_4bsd.c | 66 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 44 insertions(+), 22 deletions(-)

(limited to 'sys/kern/sched_4bsd.c')

diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index fef9e25..2fad27c 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -156,7 +156,7 @@ static struct runq runq;
 static struct runq runq_pcpu[MAXCPU];
 long runq_length[MAXCPU];
 
-static cpumask_t idle_cpus_mask;
+static cpuset_t idle_cpus_mask;
 #endif
 
 struct pcpuidlestat {
@@ -951,7 +951,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
 	if (td->td_flags & TDF_IDLETD) {
 		TD_SET_CAN_RUN(td);
 #ifdef SMP
-		idle_cpus_mask &= ~PCPU_GET(cpumask);
+		/* Spinlock held here, assume no migration. */
+		CPU_NAND(&idle_cpus_mask, PCPU_PTR(cpumask));
 #endif
 	} else {
 		if (TD_IS_RUNNING(td)) {
@@ -1025,7 +1026,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
 
 #ifdef SMP
 	if (td->td_flags & TDF_IDLETD)
-		idle_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&idle_cpus_mask, PCPU_PTR(cpumask));
 #endif
 	sched_lock.mtx_lock = (uintptr_t)td;
 	td->td_oncpu = PCPU_GET(cpuid);
@@ -1054,7 +1055,8 @@ static int
 forward_wakeup(int cpunum)
 {
 	struct pcpu *pc;
-	cpumask_t dontuse, id, map, map2, me;
+	cpuset_t dontuse, id, map, map2, me;
+	int iscpuset;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 
@@ -1071,32 +1073,38 @@ forward_wakeup(int cpunum)
 	/*
 	 * Check the idle mask we received against what we calculated
 	 * before in the old version.
+	 *
+	 * Also note that sched_lock is held now, thus no migration is
+	 * expected.
 	 */
 	me = PCPU_GET(cpumask);
 
 	/* Don't bother if we should be doing it ourself. */
-	if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum)))
+	if (CPU_OVERLAP(&me, &idle_cpus_mask) &&
+	    (cpunum == NOCPU || CPU_ISSET(cpunum, &me)))
 		return (0);
 
-	dontuse = me | stopped_cpus | hlt_cpus_mask;
-	map2 = 0;
+	dontuse = me;
+	CPU_OR(&dontuse, &stopped_cpus);
+	CPU_OR(&dontuse, &hlt_cpus_mask);
+	CPU_ZERO(&map2);
 	if (forward_wakeup_use_loop) {
 		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpumask;
-			if ((id & dontuse) == 0 &&
+			if (!CPU_OVERLAP(&id, &dontuse) &&
 			    pc->pc_curthread == pc->pc_idlethread) {
-				map2 |= id;
+				CPU_OR(&map2, &id);
 			}
 		}
 	}
 
 	if (forward_wakeup_use_mask) {
-		map = 0;
-		map = idle_cpus_mask & ~dontuse;
+		map = idle_cpus_mask;
+		CPU_NAND(&map, &dontuse);
 
 		/* If they are both on, compare and use loop if different. */
 		if (forward_wakeup_use_loop) {
-			if (map != map2) {
+			if (CPU_CMP(&map, &map2)) {
 				printf("map != map2, loop method preferred\n");
 				map = map2;
 			}
@@ -1108,18 +1116,22 @@ forward_wakeup(int cpunum)
 	/* If we only allow a specific CPU, then mask off all the others. */
 	if (cpunum != NOCPU) {
 		KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
-		map &= (1 << cpunum);
+		iscpuset = CPU_ISSET(cpunum, &map);
+		if (iscpuset == 0)
+			CPU_ZERO(&map);
+		else
+			CPU_SETOF(cpunum, &map);
 	}
-	if (map) {
+	if (!CPU_EMPTY(&map)) {
 		forward_wakeups_delivered++;
 		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpumask;
-			if ((map & id) == 0)
+			if (!CPU_OVERLAP(&map, &id))
 				continue;
 			if (cpu_idle_wakeup(pc->pc_cpuid))
-				map &= ~id;
+				CPU_NAND(&map, &id);
 		}
-		if (map)
+		if (!CPU_EMPTY(&map))
 			ipi_selected(map, IPI_AST);
 		return (1);
 	}
@@ -1135,7 +1147,7 @@ kick_other_cpu(int pri, int cpuid)
 	int cpri;
 
 	pcpu = pcpu_find(cpuid);
-	if (idle_cpus_mask & pcpu->pc_cpumask) {
+	if (CPU_OVERLAP(&idle_cpus_mask, &pcpu->pc_cpumask)) {
 		forward_wakeups_delivered++;
 		if (!cpu_idle_wakeup(cpuid))
 			ipi_cpu(cpuid, IPI_AST);
@@ -1193,6 +1205,7 @@ void
 sched_add(struct thread *td, int flags)
 #ifdef SMP
 {
+	cpuset_t idle, me, tidlemsk;
 	struct td_sched *ts;
 	int forwarded = 0;
 	int cpu;
@@ -1262,11 +1275,20 @@ sched_add(struct thread *td, int flags)
 	        kick_other_cpu(td->td_priority, cpu);
 	} else {
 		if (!single_cpu) {
-			cpumask_t me = PCPU_GET(cpumask);
-			cpumask_t idle = idle_cpus_mask & me;
 
-			if (!idle && ((flags & SRQ_INTR) == 0) &&
-			    (idle_cpus_mask & ~(hlt_cpus_mask | me)))
+			/*
+			 * Thread spinlock is held here, assume no
+			 * migration is possible.
+			 */
+			me = PCPU_GET(cpumask);
+			idle = idle_cpus_mask;
+			tidlemsk = idle;
+			CPU_AND(&idle, &me);
+			CPU_OR(&me, &hlt_cpus_mask);
+			CPU_NAND(&tidlemsk, &me);
+
+			if (CPU_EMPTY(&idle) && ((flags & SRQ_INTR) == 0) &&
+			    !CPU_EMPTY(&tidlemsk))
 				forwarded = forward_wakeup(cpu);
 		}
 
-- 
cgit v1.1


From bc4d32e80bd81ea96c4c2544ab36f7e8caa777b7 Mon Sep 17 00:00:00 2001
From: attilio <attilio@FreeBSD.org>
Date: Tue, 31 May 2011 21:22:44 +0000
Subject: MFC

---
 sys/kern/sched_4bsd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'sys/kern/sched_4bsd.c')

diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 2fad27c..592bb80 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -1089,7 +1089,7 @@ forward_wakeup(int cpunum)
 	CPU_OR(&dontuse, &hlt_cpus_mask);
 	CPU_ZERO(&map2);
 	if (forward_wakeup_use_loop) {
-		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
+		STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpumask;
 			if (!CPU_OVERLAP(&id, &dontuse) &&
 			    pc->pc_curthread == pc->pc_idlethread) {
@@ -1124,7 +1124,7 @@ forward_wakeup(int cpunum)
 	}
 	if (!CPU_EMPTY(&map)) {
 		forward_wakeups_delivered++;
-		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
+		STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpumask;
 			if (!CPU_OVERLAP(&map, &id))
 				continue;
-- 
cgit v1.1