summaryrefslogtreecommitdiffstats
path: root/sys/kern/sched_4bsd.c
diff options
context:
space:
mode:
authorattilio <attilio@FreeBSD.org>2011-05-05 14:39:14 +0000
committerattilio <attilio@FreeBSD.org>2011-05-05 14:39:14 +0000
commitfe4de567b50f7ca317b16f69b7b3a7de693025af (patch)
treee5d54bcefbf1fe0c4c6804bdc5c4852b1b64518a /sys/kern/sched_4bsd.c
parentd3d3db9bac709a7fa4319bf5e8c8fb4e05918772 (diff)
downloadFreeBSD-src-fe4de567b50f7ca317b16f69b7b3a7de693025af.zip
FreeBSD-src-fe4de567b50f7ca317b16f69b7b3a7de693025af.tar.gz
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects. That is going to offer the underlying support for a simple bump of MAXCPU and then support for number of cpus > 32 (as it is today). Right now, cpumask_t is an int, 32 bits on all our supported architecture. cpumask_t on the other side is implemented as an array of longs, and easilly extendible by definition. The architectures touched by this commit are the following: - amd64 - i386 - pc98 - arm - ia64 - XEN while the others are still missing. Userland is believed to be fully converted with the changes contained here. Some technical notes: - This commit may be considered an ABI nop for all the architectures different from amd64 and ia64 (and sparc64 in the future) - per-cpu members, which are now converted to cpuset_t, needs to be accessed avoiding migration, because the size of cpuset_t should be considered unknown - size of cpuset_t objects is different from kernel and userland (this is primirally done in order to leave some more space in userland to cope with KBI extensions). If you need to access kernel cpuset_t from the userland please refer to example in this patch on how to do that correctly (kgdb may be a good source, for example). - Support for other architectures is going to be added soon - Only MAXCPU for amd64 is bumped now The patch has been tested by sbruno and Nicholas Esborn on opteron 4 x 12 pack CPUs. More testing on big SMP is expected to came soon. pluknet tested the patch with his 8-ways on both amd64 and i386. Tested by: pluknet, sbruno, gianni, Nicholas Esborn Reviewed by: jeff, jhb, sbruno
Diffstat (limited to 'sys/kern/sched_4bsd.c')
-rw-r--r--sys/kern/sched_4bsd.c66
1 files changed, 44 insertions, 22 deletions
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index fef9e25..2fad27c 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -156,7 +156,7 @@ static struct runq runq;
static struct runq runq_pcpu[MAXCPU];
long runq_length[MAXCPU];
-static cpumask_t idle_cpus_mask;
+static cpuset_t idle_cpus_mask;
#endif
struct pcpuidlestat {
@@ -951,7 +951,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
if (td->td_flags & TDF_IDLETD) {
TD_SET_CAN_RUN(td);
#ifdef SMP
- idle_cpus_mask &= ~PCPU_GET(cpumask);
+ /* Spinlock held here, assume no migration. */
+ CPU_NAND(&idle_cpus_mask, PCPU_PTR(cpumask));
#endif
} else {
if (TD_IS_RUNNING(td)) {
@@ -1025,7 +1026,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
#ifdef SMP
if (td->td_flags & TDF_IDLETD)
- idle_cpus_mask |= PCPU_GET(cpumask);
+ CPU_OR(&idle_cpus_mask, PCPU_PTR(cpumask));
#endif
sched_lock.mtx_lock = (uintptr_t)td;
td->td_oncpu = PCPU_GET(cpuid);
@@ -1054,7 +1055,8 @@ static int
forward_wakeup(int cpunum)
{
struct pcpu *pc;
- cpumask_t dontuse, id, map, map2, me;
+ cpuset_t dontuse, id, map, map2, me;
+ int iscpuset;
mtx_assert(&sched_lock, MA_OWNED);
@@ -1071,32 +1073,38 @@ forward_wakeup(int cpunum)
/*
* Check the idle mask we received against what we calculated
* before in the old version.
+ *
+ * Also note that sched_lock is held now, thus no migration is
+ * expected.
*/
me = PCPU_GET(cpumask);
/* Don't bother if we should be doing it ourself. */
- if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum)))
+ if (CPU_OVERLAP(&me, &idle_cpus_mask) &&
+ (cpunum == NOCPU || CPU_ISSET(cpunum, &me)))
return (0);
- dontuse = me | stopped_cpus | hlt_cpus_mask;
- map2 = 0;
+ dontuse = me;
+ CPU_OR(&dontuse, &stopped_cpus);
+ CPU_OR(&dontuse, &hlt_cpus_mask);
+ CPU_ZERO(&map2);
if (forward_wakeup_use_loop) {
SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
id = pc->pc_cpumask;
- if ((id & dontuse) == 0 &&
+ if (!CPU_OVERLAP(&id, &dontuse) &&
pc->pc_curthread == pc->pc_idlethread) {
- map2 |= id;
+ CPU_OR(&map2, &id);
}
}
}
if (forward_wakeup_use_mask) {
- map = 0;
- map = idle_cpus_mask & ~dontuse;
+ map = idle_cpus_mask;
+ CPU_NAND(&map, &dontuse);
/* If they are both on, compare and use loop if different. */
if (forward_wakeup_use_loop) {
- if (map != map2) {
+ if (CPU_CMP(&map, &map2)) {
printf("map != map2, loop method preferred\n");
map = map2;
}
@@ -1108,18 +1116,22 @@ forward_wakeup(int cpunum)
/* If we only allow a specific CPU, then mask off all the others. */
if (cpunum != NOCPU) {
KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
- map &= (1 << cpunum);
+ iscpuset = CPU_ISSET(cpunum, &map);
+ if (iscpuset == 0)
+ CPU_ZERO(&map);
+ else
+ CPU_SETOF(cpunum, &map);
}
- if (map) {
+ if (!CPU_EMPTY(&map)) {
forward_wakeups_delivered++;
SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
id = pc->pc_cpumask;
- if ((map & id) == 0)
+ if (!CPU_OVERLAP(&map, &id))
continue;
if (cpu_idle_wakeup(pc->pc_cpuid))
- map &= ~id;
+ CPU_NAND(&map, &id);
}
- if (map)
+ if (!CPU_EMPTY(&map))
ipi_selected(map, IPI_AST);
return (1);
}
@@ -1135,7 +1147,7 @@ kick_other_cpu(int pri, int cpuid)
int cpri;
pcpu = pcpu_find(cpuid);
- if (idle_cpus_mask & pcpu->pc_cpumask) {
+ if (CPU_OVERLAP(&idle_cpus_mask, &pcpu->pc_cpumask)) {
forward_wakeups_delivered++;
if (!cpu_idle_wakeup(cpuid))
ipi_cpu(cpuid, IPI_AST);
@@ -1193,6 +1205,7 @@ void
sched_add(struct thread *td, int flags)
#ifdef SMP
{
+ cpuset_t idle, me, tidlemsk;
struct td_sched *ts;
int forwarded = 0;
int cpu;
@@ -1262,11 +1275,20 @@ sched_add(struct thread *td, int flags)
kick_other_cpu(td->td_priority, cpu);
} else {
if (!single_cpu) {
- cpumask_t me = PCPU_GET(cpumask);
- cpumask_t idle = idle_cpus_mask & me;
- if (!idle && ((flags & SRQ_INTR) == 0) &&
- (idle_cpus_mask & ~(hlt_cpus_mask | me)))
+ /*
+ * Thread spinlock is held here, assume no
+ * migration is possible.
+ */
+ me = PCPU_GET(cpumask);
+ idle = idle_cpus_mask;
+ tidlemsk = idle;
+ CPU_AND(&idle, &me);
+ CPU_OR(&me, &hlt_cpus_mask);
+ CPU_NAND(&tidlemsk, &me);
+
+ if (CPU_EMPTY(&idle) && ((flags & SRQ_INTR) == 0) &&
+ !CPU_EMPTY(&tidlemsk))
forwarded = forward_wakeup(cpu);
}
OpenPOWER on IntegriCloud