summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjulian <julian@FreeBSD.org>2004-09-01 06:42:02 +0000
committerjulian <julian@FreeBSD.org>2004-09-01 06:42:02 +0000
commit8354ba9e3ae8db8ce805e66f768b37de9f172dce (patch)
tree93ac3864ee5c297785ca19470bbb03069c0240ae
parent80bf38e921816f690ba642ba76e42b17f5141d66 (diff)
downloadFreeBSD-src-8354ba9e3ae8db8ce805e66f768b37de9f172dce.zip
FreeBSD-src-8354ba9e3ae8db8ce805e66f768b37de9f172dce.tar.gz
Give the 4bsd scheduler the ability to wake up idle processors
when there is new work to be done. MFC after: 5 days
-rw-r--r--sys/amd64/amd64/mp_machdep.c2
-rw-r--r--sys/i386/i386/mp_machdep.c2
-rw-r--r--sys/i386/include/param.h2
-rw-r--r--sys/kern/kern_idle.c18
-rw-r--r--sys/kern/kern_switch.c34
-rw-r--r--sys/kern/sched_4bsd.c77
-rw-r--r--sys/kern/subr_smp.c136
-rw-r--r--sys/sys/smp.h7
8 files changed, 251 insertions, 27 deletions
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 793a56e..07855ce 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -113,7 +113,6 @@ extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
*/
static u_int logical_cpus;
-static u_int logical_cpus_mask;
/* used to hold the AP's until we are ready to release them */
static struct mtx ap_boot_mtx;
@@ -138,7 +137,6 @@ static int start_all_aps(void);
static int start_ap(int apic_id);
static void release_aps(void *dummy);
-static int hlt_cpus_mask;
static int hlt_logical_cpus;
static struct sysctl_ctx_list logical_cpu_clist;
static u_int bootMP_size;
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 20a3daf..915e3ad 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -183,7 +183,6 @@ volatile int smp_tlb_wait;
*/
static u_int logical_cpus;
-static u_int logical_cpus_mask;
/* used to hold the AP's until we are ready to release them */
static struct mtx ap_boot_mtx;
@@ -209,7 +208,6 @@ static void install_ap_tramp(void);
static int start_ap(int apic_id);
static void release_aps(void *dummy);
-static int hlt_cpus_mask;
static int hlt_logical_cpus;
static struct sysctl_ctx_list logical_cpu_clist;
diff --git a/sys/i386/include/param.h b/sys/i386/include/param.h
index bb9c7a7..90541bf 100644
--- a/sys/i386/include/param.h
+++ b/sys/i386/include/param.h
@@ -98,9 +98,7 @@
#define PDRMASK (NBPDR-1)
/* PREEMPTION exposes scheduler bugs that need to be fixed. */
-#if 0
#define PREEMPTION
-#endif
#define IOPAGES 2 /* pages of i/o permission bitmap */
diff --git a/sys/kern/kern_idle.c b/sys/kern/kern_idle.c
index 8af741c..d3d891f 100644
--- a/sys/kern/kern_idle.c
+++ b/sys/kern/kern_idle.c
@@ -36,6 +36,9 @@ __FBSDID("$FreeBSD$");
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/unistd.h>
+#ifdef SMP
+#include <sys/smp.h>
+#endif
static void idle_setup(void *dummy);
SYSINIT(idle_setup, SI_SUB_SCHED_IDLE, SI_ORDER_FIRST, idle_setup, NULL)
@@ -96,9 +99,18 @@ idle_proc(void *dummy)
{
struct proc *p;
struct thread *td;
+#ifdef SMP
+ cpumask_t mycpu;
+#endif
td = curthread;
p = td->td_proc;
+#ifdef SMP
+ mycpu = PCPU_GET(cpumask);
+ mtx_lock_spin(&sched_lock);
+ idle_cpus_mask |= mycpu;
+ mtx_unlock_spin(&sched_lock);
+#endif
for (;;) {
mtx_assert(&Giant, MA_NOTOWNED);
@@ -106,7 +118,13 @@ idle_proc(void *dummy)
cpu_idle();
mtx_lock_spin(&sched_lock);
+#ifdef SMP
+ idle_cpus_mask &= ~mycpu;
+#endif
mi_switch(SW_VOL, NULL);
+#ifdef SMP
+ idle_cpus_mask |= mycpu;
+#endif
mtx_unlock_spin(&sched_lock);
}
}
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index 54d0025..e36128f 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -89,6 +89,7 @@ reassigned to keep this true.
__FBSDID("$FreeBSD$");
#include "opt_full_preemption.h"
+#include "opt_sched.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -104,6 +105,10 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#endif
#include <machine/critical.h>
+#if defined(SMP) && defined(SCHED_4BSD)
+#include <sys/sysctl.h>
+#endif
+
CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
@@ -686,6 +691,12 @@ runq_check(struct runq *rq)
return (0);
}
+#if defined(SMP) && defined(SCHED_4BSD)
+int runq_fuzz = 1;
+SYSCTL_DECL(_kern_sched);
+SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
+#endif
+
/*
* Find the highest priority process on the run queue.
*/
@@ -699,7 +710,28 @@ runq_choose(struct runq *rq)
mtx_assert(&sched_lock, MA_OWNED);
while ((pri = runq_findbit(rq)) != -1) {
rqh = &rq->rq_queues[pri];
- ke = TAILQ_FIRST(rqh);
+#if defined(SMP) && defined(SCHED_4BSD)
+ /* fuzz == 1 is normal.. 0 or less are ignored */
+ if (runq_fuzz > 1) {
+ /*
+ * In the first couple of entries, check if
+ * there is one for our CPU as a preference.
+ */
+ int count = runq_fuzz;
+ int cpu = PCPU_GET(cpuid);
+ struct kse *ke2;
+ ke2 = ke = TAILQ_FIRST(rqh);
+
+ while (count-- && ke2) {
+ if (ke->ke_thread->td_lastcpu == cpu) {
+ ke = ke2;
+ break;
+ }
+ ke2 = TAILQ_NEXT(ke2, ke_procq);
+ }
+ } else
+#endif
+ ke = TAILQ_FIRST(rqh);
KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
CTR3(KTR_RUNQ,
"runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index ae8046a..043514b 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -698,6 +698,10 @@ void
sched_add(struct thread *td, int flags)
{
struct kse *ke;
+#ifdef SMP
+ int forwarded = 0;
+ int cpu;
+#endif
ke = td->td_kse;
mtx_assert(&sched_lock, MA_OWNED);
@@ -711,33 +715,70 @@ sched_add(struct thread *td, int flags)
("sched_add: process swapped out"));
#ifdef SMP
- /*
- * Only try to preempt if the thread is unpinned or pinned to the
- * current CPU.
- */
- if (KSE_CAN_MIGRATE(ke) || ke->ke_runq == &runq_pcpu[PCPU_GET(cpuid)])
-#endif
- /*
- * Don't try preempt if we are already switching.
- * all hell might break loose.
- */
- if ((flags & SRQ_YIELDING) == 0)
- if (maybe_preempt(td))
- return;
-
-#ifdef SMP
if (KSE_CAN_MIGRATE(ke)) {
- CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to gbl runq", ke, td);
+ CTR2(KTR_RUNQ,
+ "sched_add: adding kse:%p (td:%p) to gbl runq", ke, td);
+ cpu = NOCPU;
ke->ke_runq = &runq;
} else {
- CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p)to pcpu runq", ke, td);
if (!SKE_RUNQ_PCPU(ke))
- ke->ke_runq = &runq_pcpu[PCPU_GET(cpuid)];
+ ke->ke_runq = &runq_pcpu[(cpu = PCPU_GET(cpuid))];
+ else
+ cpu = td->td_lastcpu;
+ CTR3(KTR_RUNQ,
+ "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
}
#else
CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to runq", ke, td);
ke->ke_runq = &runq;
+
#endif
+ /*
+ * If we are yielding (on the way out anyhow)
+ * or the thread being saved is US,
+ * then don't try be smart about preemption
+ * or kicking off another CPU
+ * as it won't help and may hinder.
+ * In the YIEDLING case, we are about to run whoever is
+ * being put in the queue anyhow, and in the
+ * OURSELF case, we are puting ourself on the run queue
+ * which also only happens when we are about to yield.
+ */
+ if((flags & SRQ_YIELDING) == 0) {
+#ifdef SMP
+ cpumask_t me = PCPU_GET(cpumask);
+ int idle = idle_cpus_mask & me;
+ /*
+ * Only try to kick off another CPU if
+ * the thread is unpinned
+ * or pinned to another cpu,
+ * and there are other available and idle CPUs.
+ * if we are idle, then skip straight to preemption.
+ */
+ if ( (! idle) &&
+ (idle_cpus_mask & ~(hlt_cpus_mask | me)) &&
+ ( KSE_CAN_MIGRATE(ke) ||
+ ke->ke_runq != &runq_pcpu[PCPU_GET(cpuid)])) {
+ forwarded = forward_wakeup(cpu);
+ }
+ /*
+ * If we failed to kick off another cpu, then look to
+ * see if we should preempt this CPU. Only allow this
+ * if it is not pinned or IS pinned to this CPU.
+ * If we are the idle thread, we also try do preempt.
+ * as it will be quicker and being idle, we won't
+ * lose in doing so..
+ */
+ if ((!forwarded) &&
+ (ke->ke_runq == &runq ||
+ ke->ke_runq == &runq_pcpu[PCPU_GET(cpuid)]))
+#endif
+
+ {
+ if (maybe_preempt(td))
+ return;
+ }
+ }
if ((td->td_proc->p_flag & P_NOLOAD) == 0)
sched_tdcnt++;
runq_add(ke->ke_runq, ke);
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index f1b8499..c354c2d 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -49,9 +49,15 @@ __FBSDID("$FreeBSD$");
#include <machine/smp.h>
+#include "opt_sched.h"
+
#ifdef SMP
volatile cpumask_t stopped_cpus;
volatile cpumask_t started_cpus;
+cpumask_t all_cpus;
+cpumask_t idle_cpus_mask;
+cpumask_t hlt_cpus_mask;
+cpumask_t logical_cpus_mask;
void (*cpustop_restartfunc)(void);
#endif
@@ -62,7 +68,6 @@ int mp_maxcpus = MAXCPU;
struct cpu_top *smp_topology;
volatile int smp_started;
-cpumask_t all_cpus;
u_int mp_maxid;
SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD, NULL, "Kernel SMP");
@@ -96,6 +101,46 @@ SYSCTL_INT(_kern_smp, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
&forward_roundrobin_enabled, 0,
"Forwarding of roundrobin to all other CPUs");
+#ifdef SCHED_4BSD
+/* Enable forwarding of wakeups to all other cpus */
+SYSCTL_NODE(_kern_smp, OID_AUTO, ipiwakeup, CTLFLAG_RD, NULL, "Kernel SMP");
+
+static int forward_wakeup_enabled = 0;
+SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW,
+ &forward_wakeup_enabled, 0,
+ "Forwarding of wakeup to idle CPUs");
+
+static int forward_wakeups_requested = 0;
+SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD,
+ &forward_wakeups_requested, 0,
+ "Requests for Forwarding of wakeup to idle CPUs");
+
+static int forward_wakeups_delivered = 0;
+SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD,
+ &forward_wakeups_delivered, 0,
+ "Completed Forwarding of wakeup to idle CPUs");
+
+static int forward_wakeup_use_mask = 0;
+SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW,
+ &forward_wakeup_use_mask, 0,
+ "Use the mask of idle cpus");
+
+static int forward_wakeup_use_loop = 0;
+SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW,
+ &forward_wakeup_use_loop, 0,
+ "Use a loop to find idle cpus");
+
+static int forward_wakeup_use_single = 0;
+SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, onecpu, CTLFLAG_RW,
+ &forward_wakeup_use_single, 0,
+ "Only signal one idle cpu");
+
+static int forward_wakeup_use_htt = 0;
+SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, htt2, CTLFLAG_RW,
+ &forward_wakeup_use_htt, 0,
+ "account for htt");
+
+#endif /* SCHED_4BSD */
/* Variables needed for SMP rendezvous. */
static void (*smp_rv_setup_func)(void *arg);
static void (*smp_rv_action_func)(void *arg);
@@ -203,6 +248,95 @@ forward_roundrobin(void)
ipi_selected(map, IPI_AST);
}
+#ifdef SCHED_4BSD
+/* enable HTT_2 if you have a 2-way HTT cpu.*/
+int
+forward_wakeup(int cpunum)
+{
+ cpumask_t map, me, dontuse;
+ cpumask_t map2;
+ struct pcpu *pc;
+ cpumask_t id, map3;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+
+ CTR0(KTR_SMP, "forward_wakeup()");
+
+ if ((!forward_wakeup_enabled) ||
+ (forward_wakeup_use_mask == 0 && forward_wakeup_use_loop == 0))
+ return (0);
+ if (!smp_started || cold || panicstr)
+ return (0);
+
+ forward_wakeups_requested++;
+
+/*
+ * check the idle mask we received against what we calculated before
+ * in the old version.
+ */
+ me = PCPU_GET(cpumask);
+ /*
+ * don't bother if we should be doing it ourself..
+ */
+ if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum)))
+ return (0);
+
+ dontuse = me | stopped_cpus | hlt_cpus_mask;
+ map3 = 0;
+ if (forward_wakeup_use_loop) {
+ SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
+ id = pc->pc_cpumask;
+ if ( (id & dontuse) == 0 &&
+ pc->pc_curthread == pc->pc_idlethread) {
+ map3 |= id;
+ }
+ }
+ }
+
+ if (forward_wakeup_use_mask) {
+ map = 0;
+ map = idle_cpus_mask & ~dontuse;
+
+ /* If they are both on, compare and use loop if different */
+ if (forward_wakeup_use_loop) {
+ if (map != map3) {
+ printf("map (%02X) != map3 (%02X)\n",
+ map, map3);
+ map = map3;
+ }
+ }
+ } else {
+ map = map3;
+ }
+ /* If we only allow a specific CPU, then mask off all the others */
+ if (cpunum != NOCPU) {
+ KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
+ map &= (1 << cpunum);
+ } else {
+ /* Try choose an idle die. */
+ if (forward_wakeup_use_htt) {
+ map2 = (map & (map >> 1)) & 0x5555;
+ if (map2) {
+ map = map2;
+ }
+ }
+
+ /* set only one bit */
+ if (forward_wakeup_use_single) {
+ map = map & ((~map) + 1);
+ }
+ }
+ if (map) {
+ forward_wakeups_delivered++;
+ ipi_selected(map, IPI_AST);
+ return (1);
+ }
+ if (cpunum == NOCPU)
+ printf("forward_wakeup: Idle processor not found\n");
+ return (0);
+}
+#endif /* SCHED_4BSD */
+
/*
* When called the executing CPU will send an IPI to all other CPUs
* requesting that they halt execution.
diff --git a/sys/sys/smp.h b/sys/sys/smp.h
index 12bfbba..af9176f 100644
--- a/sys/sys/smp.h
+++ b/sys/sys/smp.h
@@ -51,11 +51,15 @@ extern volatile cpumask_t started_cpus;
extern volatile cpumask_t stopped_cpus;
#endif /* SMP */
-extern cpumask_t all_cpus;
extern u_int mp_maxid;
extern int mp_ncpus;
extern volatile int smp_started;
+extern cpumask_t all_cpus;
+extern cpumask_t idle_cpus_mask;
+extern cpumask_t hlt_cpus_mask;
+extern cpumask_t logical_cpus_mask;
+
/*
* Macro allowing us to determine whether a CPU is absent at any given
* time, thus permitting us to configure sparse maps of cpuid-dependent
@@ -92,6 +96,7 @@ void cpu_mp_start(void);
void forward_signal(struct thread *);
void forward_roundrobin(void);
+int forward_wakeup(int cpunum);
int restart_cpus(cpumask_t);
int stop_cpus(cpumask_t);
void smp_rendezvous_action(void);
OpenPOWER on IntegriCloud