Commit the support for removing cpumask_t and replacing it directly with

cpuset_t objects. That is going to offer the underlying support for a simple bump of MAXCPU and then support for number of cpus > 32 (as it is today). Right now, cpumask_t is an int, 32 bits on all our supported architecture. cpumask_t on the other side is implemented as an array of longs, and easilly extendible by definition. The architectures touched by this commit are the following: - amd64 - i386 - pc98 - arm - ia64 - XEN while the others are still missing. Userland is believed to be fully converted with the changes contained here. Some technical notes: - This commit may be considered an ABI nop for all the architectures different from amd64 and ia64 (and sparc64 in the future) - per-cpu members, which are now converted to cpuset_t, needs to be accessed avoiding migration, because the size of cpuset_t should be considered unknown - size of cpuset_t objects is different from kernel and userland (this is primirally done in order to leave some more space in userland to cope with KBI extensions). If you need to access kernel cpuset_t from the userland please refer to example in this patch on how to do that correctly (kgdb may be a good source, for example). - Support for other architectures is going to be added soon - Only MAXCPU for amd64 is bumped now The patch has been tested by sbruno and Nicholas Esborn on opteron 4 x 12 pack CPUs. More testing on big SMP is expected to came soon. pluknet tested the patch with his 8-ways on both amd64 and i386. Tested by: pluknet, sbruno, gianni, Nicholas Esborn Reviewed by: jeff, jhb, sbruno
author: attilio <attilio@FreeBSD.org> 2011-05-05 14:39:14 +0000
committer: attilio <attilio@FreeBSD.org> 2011-05-05 14:39:14 +0000
commit: fe4de567b50f7ca317b16f69b7b3a7de693025af (patch)
tree: e5d54bcefbf1fe0c4c6804bdc5c4852b1b64518a /sys/kern/sched_4bsd.c
parent: d3d3db9bac709a7fa4319bf5e8c8fb4e05918772 (diff)
download: FreeBSD-src-fe4de567b50f7ca317b16f69b7b3a7de693025af.zip
FreeBSD-src-fe4de567b50f7ca317b16f69b7b3a7de693025af.tar.gz
1 files changed, 44 insertions, 22 deletions
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index fef9e25..2fad27c 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -156,7 +156,7 @@ static struct runq runq;
 static struct runq runq_pcpu[MAXCPU];
 long runq_length[MAXCPU];
 
-static cpumask_t idle_cpus_mask;
+static cpuset_t idle_cpus_mask;
 #endif
 
 struct pcpuidlestat {
@@ -951,7 +951,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
 	if (td->td_flags & TDF_IDLETD) {
 		TD_SET_CAN_RUN(td);
 #ifdef SMP
-		idle_cpus_mask &= ~PCPU_GET(cpumask);
+		/* Spinlock held here, assume no migration. */
+		CPU_NAND(&idle_cpus_mask, PCPU_PTR(cpumask));
 #endif
 	} else {
 		if (TD_IS_RUNNING(td)) {
@@ -1025,7 +1026,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
 
 #ifdef SMP
 	if (td->td_flags & TDF_IDLETD)
-		idle_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&idle_cpus_mask, PCPU_PTR(cpumask));
 #endif
 	sched_lock.mtx_lock = (uintptr_t)td;
 	td->td_oncpu = PCPU_GET(cpuid);
@@ -1054,7 +1055,8 @@ static int
 forward_wakeup(int cpunum)
 {
 	struct pcpu *pc;
-	cpumask_t dontuse, id, map, map2, me;
+	cpuset_t dontuse, id, map, map2, me;
+	int iscpuset;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 
@@ -1071,32 +1073,38 @@ forward_wakeup(int cpunum)
 	/*
 	 * Check the idle mask we received against what we calculated
 	 * before in the old version.
+	 *
+	 * Also note that sched_lock is held now, thus no migration is
+	 * expected.
 	 */
 	me = PCPU_GET(cpumask);
 
 	/* Don't bother if we should be doing it ourself. */
-	if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum)))
+	if (CPU_OVERLAP(&me, &idle_cpus_mask) &&
+	    (cpunum == NOCPU || CPU_ISSET(cpunum, &me)))
 		return (0);
 
-	dontuse = me | stopped_cpus | hlt_cpus_mask;
-	map2 = 0;
+	dontuse = me;
+	CPU_OR(&dontuse, &stopped_cpus);
+	CPU_OR(&dontuse, &hlt_cpus_mask);
+	CPU_ZERO(&map2);
 	if (forward_wakeup_use_loop) {
 		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpumask;
-			if ((id & dontuse) == 0 &&
+			if (!CPU_OVERLAP(&id, &dontuse) &&
 			    pc->pc_curthread == pc->pc_idlethread) {
-				map2 |= id;
+				CPU_OR(&map2, &id);
 			}
 		}
 	}
 
 	if (forward_wakeup_use_mask) {
-		map = 0;
-		map = idle_cpus_mask & ~dontuse;
+		map = idle_cpus_mask;
+		CPU_NAND(&map, &dontuse);
 
 		/* If they are both on, compare and use loop if different. */
 		if (forward_wakeup_use_loop) {
-			if (map != map2) {
+			if (CPU_CMP(&map, &map2)) {
 				printf("map != map2, loop method preferred\n");
 				map = map2;
 			}
@@ -1108,18 +1116,22 @@ forward_wakeup(int cpunum)
 	/* If we only allow a specific CPU, then mask off all the others. */
 	if (cpunum != NOCPU) {
 		KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
-		map &= (1 << cpunum);
+		iscpuset = CPU_ISSET(cpunum, &map);
+		if (iscpuset == 0)
+			CPU_ZERO(&map);
+		else
+			CPU_SETOF(cpunum, &map);
 	}
-	if (map) {
+	if (!CPU_EMPTY(&map)) {
 		forward_wakeups_delivered++;
 		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpumask;
-			if ((map & id) == 0)
+			if (!CPU_OVERLAP(&map, &id))
 				continue;
 			if (cpu_idle_wakeup(pc->pc_cpuid))
-				map &= ~id;
+				CPU_NAND(&map, &id);
 		}
-		if (map)
+		if (!CPU_EMPTY(&map))
 			ipi_selected(map, IPI_AST);
 		return (1);
 	}
@@ -1135,7 +1147,7 @@ kick_other_cpu(int pri, int cpuid)
 	int cpri;
 
 	pcpu = pcpu_find(cpuid);
-	if (idle_cpus_mask & pcpu->pc_cpumask) {
+	if (CPU_OVERLAP(&idle_cpus_mask, &pcpu->pc_cpumask)) {
 		forward_wakeups_delivered++;
 		if (!cpu_idle_wakeup(cpuid))
 			ipi_cpu(cpuid, IPI_AST);
@@ -1193,6 +1205,7 @@ void
 sched_add(struct thread *td, int flags)
 #ifdef SMP
 {
+	cpuset_t idle, me, tidlemsk;
 	struct td_sched *ts;
 	int forwarded = 0;
 	int cpu;
@@ -1262,11 +1275,20 @@ sched_add(struct thread *td, int flags)
 	        kick_other_cpu(td->td_priority, cpu);
 	} else {
 		if (!single_cpu) {
-			cpumask_t me = PCPU_GET(cpumask);
-			cpumask_t idle = idle_cpus_mask & me;
 
-			if (!idle && ((flags & SRQ_INTR) == 0) &&
-			    (idle_cpus_mask & ~(hlt_cpus_mask | me)))
+			/*
+			 * Thread spinlock is held here, assume no
+			 * migration is possible.
+			 */
+			me = PCPU_GET(cpumask);
+			idle = idle_cpus_mask;
+			tidlemsk = idle;
+			CPU_AND(&idle, &me);
+			CPU_OR(&me, &hlt_cpus_mask);
+			CPU_NAND(&tidlemsk, &me);
+
+			if (CPU_EMPTY(&idle) && ((flags & SRQ_INTR) == 0) &&
+			    !CPU_EMPTY(&tidlemsk))
 				forwarded = forward_wakeup(cpu);
 		}
author	attilio <attilio@FreeBSD.org>	2011-05-05 14:39:14 +0000
committer	attilio <attilio@FreeBSD.org>	2011-05-05 14:39:14 +0000
commit	fe4de567b50f7ca317b16f69b7b3a7de693025af (patch)
tree	e5d54bcefbf1fe0c4c6804bdc5c4852b1b64518a /sys/kern/sched_4bsd.c
parent	d3d3db9bac709a7fa4319bf5e8c8fb4e05918772 (diff)
download	FreeBSD-src-fe4de567b50f7ca317b16f69b7b3a7de693025af.zip FreeBSD-src-fe4de567b50f7ca317b16f69b7b3a7de693025af.tar.gz