diff options
author | julian <julian@FreeBSD.org> | 2004-09-01 06:42:02 +0000 |
---|---|---|
committer | julian <julian@FreeBSD.org> | 2004-09-01 06:42:02 +0000 |
commit | 8354ba9e3ae8db8ce805e66f768b37de9f172dce (patch) | |
tree | 93ac3864ee5c297785ca19470bbb03069c0240ae | |
parent | 80bf38e921816f690ba642ba76e42b17f5141d66 (diff) | |
download | FreeBSD-src-8354ba9e3ae8db8ce805e66f768b37de9f172dce.zip FreeBSD-src-8354ba9e3ae8db8ce805e66f768b37de9f172dce.tar.gz |
Give the 4bsd scheduler the ability to wake up idle processors
when there is new work to be done.
MFC after: 5 days
-rw-r--r-- | sys/amd64/amd64/mp_machdep.c | 2 | ||||
-rw-r--r-- | sys/i386/i386/mp_machdep.c | 2 | ||||
-rw-r--r-- | sys/i386/include/param.h | 2 | ||||
-rw-r--r-- | sys/kern/kern_idle.c | 18 | ||||
-rw-r--r-- | sys/kern/kern_switch.c | 34 | ||||
-rw-r--r-- | sys/kern/sched_4bsd.c | 77 | ||||
-rw-r--r-- | sys/kern/subr_smp.c | 136 | ||||
-rw-r--r-- | sys/sys/smp.h | 7 |
8 files changed, 251 insertions, 27 deletions
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 793a56e..07855ce 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -113,7 +113,6 @@ extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); */ static u_int logical_cpus; -static u_int logical_cpus_mask; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -138,7 +137,6 @@ static int start_all_aps(void); static int start_ap(int apic_id); static void release_aps(void *dummy); -static int hlt_cpus_mask; static int hlt_logical_cpus; static struct sysctl_ctx_list logical_cpu_clist; static u_int bootMP_size; diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 20a3daf..915e3ad 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -183,7 +183,6 @@ volatile int smp_tlb_wait; */ static u_int logical_cpus; -static u_int logical_cpus_mask; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -209,7 +208,6 @@ static void install_ap_tramp(void); static int start_ap(int apic_id); static void release_aps(void *dummy); -static int hlt_cpus_mask; static int hlt_logical_cpus; static struct sysctl_ctx_list logical_cpu_clist; diff --git a/sys/i386/include/param.h b/sys/i386/include/param.h index bb9c7a7..90541bf 100644 --- a/sys/i386/include/param.h +++ b/sys/i386/include/param.h @@ -98,9 +98,7 @@ #define PDRMASK (NBPDR-1) /* PREEMPTION exposes scheduler bugs that need to be fixed. */ -#if 0 #define PREEMPTION -#endif #define IOPAGES 2 /* pages of i/o permission bitmap */ diff --git a/sys/kern/kern_idle.c b/sys/kern/kern_idle.c index 8af741c..d3d891f 100644 --- a/sys/kern/kern_idle.c +++ b/sys/kern/kern_idle.c @@ -36,6 +36,9 @@ __FBSDID("$FreeBSD$"); #include <sys/resourcevar.h> #include <sys/sched.h> #include <sys/unistd.h> +#ifdef SMP +#include <sys/smp.h> +#endif static void idle_setup(void *dummy); SYSINIT(idle_setup, SI_SUB_SCHED_IDLE, SI_ORDER_FIRST, idle_setup, NULL) @@ -96,9 +99,18 @@ idle_proc(void *dummy) { struct proc *p; struct thread *td; +#ifdef SMP + cpumask_t mycpu; +#endif td = curthread; p = td->td_proc; +#ifdef SMP + mycpu = PCPU_GET(cpumask); + mtx_lock_spin(&sched_lock); + idle_cpus_mask |= mycpu; + mtx_unlock_spin(&sched_lock); +#endif for (;;) { mtx_assert(&Giant, MA_NOTOWNED); @@ -106,7 +118,13 @@ idle_proc(void *dummy) cpu_idle(); mtx_lock_spin(&sched_lock); +#ifdef SMP + idle_cpus_mask &= ~mycpu; +#endif mi_switch(SW_VOL, NULL); +#ifdef SMP + idle_cpus_mask |= mycpu; +#endif mtx_unlock_spin(&sched_lock); } } diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c index 54d0025..e36128f 100644 --- a/sys/kern/kern_switch.c +++ b/sys/kern/kern_switch.c @@ -89,6 +89,7 @@ reassigned to keep this true. __FBSDID("$FreeBSD$"); #include "opt_full_preemption.h" +#include "opt_sched.h" #include <sys/param.h> #include <sys/systm.h> @@ -104,6 +105,10 @@ __FBSDID("$FreeBSD$"); #include <sys/smp.h> #endif #include <machine/critical.h> +#if defined(SMP) && defined(SCHED_4BSD) +#include <sys/sysctl.h> +#endif + CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); @@ -686,6 +691,12 @@ runq_check(struct runq *rq) return (0); } +#if defined(SMP) && defined(SCHED_4BSD) +int runq_fuzz = 1; +SYSCTL_DECL(_kern_sched); +SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); +#endif + /* * Find the highest priority process on the run queue. */ @@ -699,7 +710,28 @@ runq_choose(struct runq *rq) mtx_assert(&sched_lock, MA_OWNED); while ((pri = runq_findbit(rq)) != -1) { rqh = &rq->rq_queues[pri]; - ke = TAILQ_FIRST(rqh); +#if defined(SMP) && defined(SCHED_4BSD) + /* fuzz == 1 is normal.. 0 or less are ignored */ + if (runq_fuzz > 1) { + /* + * In the first couple of entries, check if + * there is one for our CPU as a preference. + */ + int count = runq_fuzz; + int cpu = PCPU_GET(cpuid); + struct kse *ke2; + ke2 = ke = TAILQ_FIRST(rqh); + + while (count-- && ke2) { + if (ke->ke_thread->td_lastcpu == cpu) { + ke = ke2; + break; + } + ke2 = TAILQ_NEXT(ke2, ke_procq); + } + } else +#endif + ke = TAILQ_FIRST(rqh); KASSERT(ke != NULL, ("runq_choose: no proc on busy queue")); CTR3(KTR_RUNQ, "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh); diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index ae8046a..043514b 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -698,6 +698,10 @@ void sched_add(struct thread *td, int flags) { struct kse *ke; +#ifdef SMP + int forwarded = 0; + int cpu; +#endif ke = td->td_kse; mtx_assert(&sched_lock, MA_OWNED); @@ -711,33 +715,70 @@ sched_add(struct thread *td, int flags) ("sched_add: process swapped out")); #ifdef SMP - /* - * Only try to preempt if the thread is unpinned or pinned to the - * current CPU. - */ - if (KSE_CAN_MIGRATE(ke) || ke->ke_runq == &runq_pcpu[PCPU_GET(cpuid)]) -#endif - /* - * Don't try preempt if we are already switching. - * all hell might break loose. - */ - if ((flags & SRQ_YIELDING) == 0) - if (maybe_preempt(td)) - return; - -#ifdef SMP if (KSE_CAN_MIGRATE(ke)) { - CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to gbl runq", ke, td); + CTR2(KTR_RUNQ, + "sched_add: adding kse:%p (td:%p) to gbl runq", ke, td); + cpu = NOCPU; ke->ke_runq = &runq; } else { - CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p)to pcpu runq", ke, td); if (!SKE_RUNQ_PCPU(ke)) - ke->ke_runq = &runq_pcpu[PCPU_GET(cpuid)]; + ke->ke_runq = &runq_pcpu[(cpu = PCPU_GET(cpuid))]; + else + cpu = td->td_lastcpu; + CTR3(KTR_RUNQ, + "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu); } #else CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to runq", ke, td); ke->ke_runq = &runq; + #endif + /* + * If we are yielding (on the way out anyhow) + * or the thread being saved is US, + * then don't try be smart about preemption + * or kicking off another CPU + * as it won't help and may hinder. + * In the YIEDLING case, we are about to run whoever is + * being put in the queue anyhow, and in the + * OURSELF case, we are puting ourself on the run queue + * which also only happens when we are about to yield. + */ + if((flags & SRQ_YIELDING) == 0) { +#ifdef SMP + cpumask_t me = PCPU_GET(cpumask); + int idle = idle_cpus_mask & me; + /* + * Only try to kick off another CPU if + * the thread is unpinned + * or pinned to another cpu, + * and there are other available and idle CPUs. + * if we are idle, then skip straight to preemption. + */ + if ( (! idle) && + (idle_cpus_mask & ~(hlt_cpus_mask | me)) && + ( KSE_CAN_MIGRATE(ke) || + ke->ke_runq != &runq_pcpu[PCPU_GET(cpuid)])) { + forwarded = forward_wakeup(cpu); + } + /* + * If we failed to kick off another cpu, then look to + * see if we should preempt this CPU. Only allow this + * if it is not pinned or IS pinned to this CPU. + * If we are the idle thread, we also try do preempt. + * as it will be quicker and being idle, we won't + * lose in doing so.. + */ + if ((!forwarded) && + (ke->ke_runq == &runq || + ke->ke_runq == &runq_pcpu[PCPU_GET(cpuid)])) +#endif + + { + if (maybe_preempt(td)) + return; + } + } if ((td->td_proc->p_flag & P_NOLOAD) == 0) sched_tdcnt++; runq_add(ke->ke_runq, ke); diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index f1b8499..c354c2d 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -49,9 +49,15 @@ __FBSDID("$FreeBSD$"); #include <machine/smp.h> +#include "opt_sched.h" + #ifdef SMP volatile cpumask_t stopped_cpus; volatile cpumask_t started_cpus; +cpumask_t all_cpus; +cpumask_t idle_cpus_mask; +cpumask_t hlt_cpus_mask; +cpumask_t logical_cpus_mask; void (*cpustop_restartfunc)(void); #endif @@ -62,7 +68,6 @@ int mp_maxcpus = MAXCPU; struct cpu_top *smp_topology; volatile int smp_started; -cpumask_t all_cpus; u_int mp_maxid; SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD, NULL, "Kernel SMP"); @@ -96,6 +101,46 @@ SYSCTL_INT(_kern_smp, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, &forward_roundrobin_enabled, 0, "Forwarding of roundrobin to all other CPUs"); +#ifdef SCHED_4BSD +/* Enable forwarding of wakeups to all other cpus */ +SYSCTL_NODE(_kern_smp, OID_AUTO, ipiwakeup, CTLFLAG_RD, NULL, "Kernel SMP"); + +static int forward_wakeup_enabled = 0; +SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW, + &forward_wakeup_enabled, 0, + "Forwarding of wakeup to idle CPUs"); + +static int forward_wakeups_requested = 0; +SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD, + &forward_wakeups_requested, 0, + "Requests for Forwarding of wakeup to idle CPUs"); + +static int forward_wakeups_delivered = 0; +SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD, + &forward_wakeups_delivered, 0, + "Completed Forwarding of wakeup to idle CPUs"); + +static int forward_wakeup_use_mask = 0; +SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW, + &forward_wakeup_use_mask, 0, + "Use the mask of idle cpus"); + +static int forward_wakeup_use_loop = 0; +SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW, + &forward_wakeup_use_loop, 0, + "Use a loop to find idle cpus"); + +static int forward_wakeup_use_single = 0; +SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, onecpu, CTLFLAG_RW, + &forward_wakeup_use_single, 0, + "Only signal one idle cpu"); + +static int forward_wakeup_use_htt = 0; +SYSCTL_INT(_kern_smp_ipiwakeup, OID_AUTO, htt2, CTLFLAG_RW, + &forward_wakeup_use_htt, 0, + "account for htt"); + +#endif /* SCHED_4BSD */ /* Variables needed for SMP rendezvous. */ static void (*smp_rv_setup_func)(void *arg); static void (*smp_rv_action_func)(void *arg); @@ -203,6 +248,95 @@ forward_roundrobin(void) ipi_selected(map, IPI_AST); } +#ifdef SCHED_4BSD +/* enable HTT_2 if you have a 2-way HTT cpu.*/ +int +forward_wakeup(int cpunum) +{ + cpumask_t map, me, dontuse; + cpumask_t map2; + struct pcpu *pc; + cpumask_t id, map3; + + mtx_assert(&sched_lock, MA_OWNED); + + CTR0(KTR_SMP, "forward_wakeup()"); + + if ((!forward_wakeup_enabled) || + (forward_wakeup_use_mask == 0 && forward_wakeup_use_loop == 0)) + return (0); + if (!smp_started || cold || panicstr) + return (0); + + forward_wakeups_requested++; + +/* + * check the idle mask we received against what we calculated before + * in the old version. + */ + me = PCPU_GET(cpumask); + /* + * don't bother if we should be doing it ourself.. + */ + if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum))) + return (0); + + dontuse = me | stopped_cpus | hlt_cpus_mask; + map3 = 0; + if (forward_wakeup_use_loop) { + SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + id = pc->pc_cpumask; + if ( (id & dontuse) == 0 && + pc->pc_curthread == pc->pc_idlethread) { + map3 |= id; + } + } + } + + if (forward_wakeup_use_mask) { + map = 0; + map = idle_cpus_mask & ~dontuse; + + /* If they are both on, compare and use loop if different */ + if (forward_wakeup_use_loop) { + if (map != map3) { + printf("map (%02X) != map3 (%02X)\n", + map, map3); + map = map3; + } + } + } else { + map = map3; + } + /* If we only allow a specific CPU, then mask off all the others */ + if (cpunum != NOCPU) { + KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum.")); + map &= (1 << cpunum); + } else { + /* Try choose an idle die. */ + if (forward_wakeup_use_htt) { + map2 = (map & (map >> 1)) & 0x5555; + if (map2) { + map = map2; + } + } + + /* set only one bit */ + if (forward_wakeup_use_single) { + map = map & ((~map) + 1); + } + } + if (map) { + forward_wakeups_delivered++; + ipi_selected(map, IPI_AST); + return (1); + } + if (cpunum == NOCPU) + printf("forward_wakeup: Idle processor not found\n"); + return (0); +} +#endif /* SCHED_4BSD */ + /* * When called the executing CPU will send an IPI to all other CPUs * requesting that they halt execution. diff --git a/sys/sys/smp.h b/sys/sys/smp.h index 12bfbba..af9176f 100644 --- a/sys/sys/smp.h +++ b/sys/sys/smp.h @@ -51,11 +51,15 @@ extern volatile cpumask_t started_cpus; extern volatile cpumask_t stopped_cpus; #endif /* SMP */ -extern cpumask_t all_cpus; extern u_int mp_maxid; extern int mp_ncpus; extern volatile int smp_started; +extern cpumask_t all_cpus; +extern cpumask_t idle_cpus_mask; +extern cpumask_t hlt_cpus_mask; +extern cpumask_t logical_cpus_mask; + /* * Macro allowing us to determine whether a CPU is absent at any given * time, thus permitting us to configure sparse maps of cpuid-dependent @@ -92,6 +96,7 @@ void cpu_mp_start(void); void forward_signal(struct thread *); void forward_roundrobin(void); +int forward_wakeup(int cpunum); int restart_cpus(cpumask_t); int stop_cpus(cpumask_t); void smp_rendezvous_action(void); |