diff options
-rw-r--r-- | sys/kern/kern_switch.c | 92 | ||||
-rw-r--r-- | sys/kern/sched_4bsd.c | 160 | ||||
-rw-r--r-- | sys/kern/sched_core.c | 59 | ||||
-rw-r--r-- | sys/kern/sched_ule.c | 175 | ||||
-rw-r--r-- | sys/sys/mutex.h | 9 | ||||
-rw-r--r-- | sys/sys/proc.h | 113 | ||||
-rw-r--r-- | sys/sys/sched.h | 16 |
7 files changed, 460 insertions, 164 deletions
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c index 1ccf64c..13bba12 100644 --- a/sys/kern/kern_switch.c +++ b/sys/kern/kern_switch.c @@ -49,6 +49,8 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #endif +#include <machine/cpu.h> + /* Uncomment this to enable logging of critical_enter/exit. */ #if 0 #define KTR_CRITICAL KTR_SCHED @@ -77,6 +79,49 @@ static int kern_sched_preemption = 0; SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, &kern_sched_preemption, 0, "Kernel preemption enabled"); +#ifdef SCHED_STATS +long switch_preempt; +long switch_owepreempt; +long switch_turnstile; +long switch_sleepq; +long switch_sleepqtimo; +long switch_relinquish; +long switch_needresched; +static SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats"); +SYSCTL_INT(_kern_sched_stats, OID_AUTO, preempt, CTLFLAG_RD, &switch_preempt, 0, ""); +SYSCTL_INT(_kern_sched_stats, OID_AUTO, owepreempt, CTLFLAG_RD, &switch_owepreempt, 0, ""); +SYSCTL_INT(_kern_sched_stats, OID_AUTO, turnstile, CTLFLAG_RD, &switch_turnstile, 0, ""); +SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepq, CTLFLAG_RD, &switch_sleepq, 0, ""); +SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepqtimo, CTLFLAG_RD, &switch_sleepqtimo, 0, ""); +SYSCTL_INT(_kern_sched_stats, OID_AUTO, relinquish, CTLFLAG_RD, &switch_relinquish, 0, ""); +SYSCTL_INT(_kern_sched_stats, OID_AUTO, needresched, CTLFLAG_RD, &switch_needresched, 0, ""); +static int +sysctl_stats_reset(SYSCTL_HANDLER_ARGS) +{ + int error; + int val; + + val = 0; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (val == 0) + return (0); + switch_preempt = 0; + switch_owepreempt = 0; + switch_turnstile = 0; + switch_sleepq = 0; + switch_sleepqtimo = 0; + switch_relinquish = 0; + switch_needresched = 0; + + return (0); +} + +SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL, + 0, sysctl_stats_reset, "I", "Reset scheduler statistics"); +#endif + /************************************************************************ * Functions that manipulate runnability from a thread perspective. * ************************************************************************/ @@ -142,13 +187,13 @@ critical_exit(void) #ifdef PREEMPTION if (td->td_critnest == 1) { td->td_critnest = 0; - mtx_assert(&sched_lock, MA_NOTOWNED); if (td->td_owepreempt) { td->td_critnest = 1; - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_critnest--; + SCHED_STAT_INC(switch_owepreempt); mi_switch(SW_INVOL, NULL); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } } else #endif @@ -173,7 +218,6 @@ maybe_preempt(struct thread *td) int cpri, pri; #endif - mtx_assert(&sched_lock, MA_OWNED); #ifdef PREEMPTION /* * The new thread should not preempt the current thread if any of the @@ -199,6 +243,7 @@ maybe_preempt(struct thread *td) * to the new thread. */ ctd = curthread; + THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT ((ctd->td_sched != NULL && ctd->td_sched->ts_thread == ctd), ("thread has no (or wrong) sched-private part.")); KASSERT((td->td_inhibitors == 0), @@ -219,15 +264,25 @@ maybe_preempt(struct thread *td) ctd->td_owepreempt = 1; return (0); } - /* * Thread is runnable but not yet put on system run queue. */ + MPASS(ctd->td_lock == &sched_lock); + MPASS(td->td_lock == &sched_lock); MPASS(TD_ON_RUNQ(td)); TD_SET_RUNNING(td); CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, td->td_proc->p_pid, td->td_proc->p_comm); + SCHED_STAT_INC(switch_preempt); mi_switch(SW_INVOL|SW_PREEMPT, td); + /* + * td's lock pointer may have changed. We have to return with it + * locked. + */ + spinlock_enter(); + thread_unlock(ctd); + thread_lock(td); + spinlock_exit(); return (1); #else return (0); @@ -442,7 +497,6 @@ runq_choose(struct runq *rq) struct td_sched *ts; int pri; - mtx_assert(&sched_lock, MA_OWNED); while ((pri = runq_findbit(rq)) != -1) { rqh = &rq->rq_queues[pri]; #if defined(SMP) && defined(SCHED_4BSD) @@ -484,7 +538,6 @@ runq_choose_from(struct runq *rq, u_char idx) struct td_sched *ts; int pri; - mtx_assert(&sched_lock, MA_OWNED); if ((pri = runq_findbit_from(rq, idx)) != -1) { rqh = &rq->rq_queues[pri]; ts = TAILQ_FIRST(rqh); @@ -519,9 +572,20 @@ runq_remove_idx(struct runq *rq, struct td_sched *ts, u_char *idx) KASSERT(ts->ts_thread->td_proc->p_sflag & PS_INMEM, ("runq_remove_idx: process swapped out")); pri = ts->ts_rqindex; + KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri)); rqh = &rq->rq_queues[pri]; CTR5(KTR_RUNQ, "runq_remove_idx: td=%p, ts=%p pri=%d %d rqh=%p", ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); + { + struct td_sched *nts; + + TAILQ_FOREACH(nts, rqh, ts_procq) + if (nts == ts) + break; + if (ts != nts) + panic("runq_remove_idx: ts %p not on rqindex %d", + ts, pri); + } TAILQ_REMOVE(rqh, ts, ts_procq); if (TAILQ_EMPTY(rqh)) { CTR0(KTR_RUNQ, "runq_remove_idx: empty"); @@ -589,18 +653,4 @@ sched_set_concurrency(struct proc *p, int concurrency) { } -/* - * Called from thread_exit() for all exiting thread - * - * Not to be confused with sched_exit_thread() - * that is only called from thread_exit() for threads exiting - * without the rest of the process exiting because it is also called from - * sched_exit() and we wouldn't want to call it twice. - * XXX This can probably be fixed. - */ -void -sched_thread_exit(struct thread *td) -{ -} - #endif /* KERN_SWITCH_INCLUDE */ diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index 66952ec..a4b1e08 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -248,7 +248,7 @@ static void maybe_resched(struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); if (td->td_priority < curthread->td_priority) curthread->td_flags |= TDF_NEEDRESCHED; } @@ -377,10 +377,7 @@ schedcpu(void) realstathz = stathz ? stathz : hz; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { - /* - * Prevent state changes and protect run queue. - */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); /* * Increment time in/out of memory. We ignore overflow; with * 16-bit int's (remember them?) overflow takes 45 days. @@ -388,6 +385,7 @@ schedcpu(void) p->p_swtime++; FOREACH_THREAD_IN_PROC(p, td) { awake = 0; + thread_lock(td); ts = td->td_sched; /* * Increment sleep time (if sleeping). We @@ -456,13 +454,16 @@ XXX this is broken td->td_slptime = 0; } else td->td_slptime++; - if (td->td_slptime > 1) + if (td->td_slptime > 1) { + thread_unlock(td); continue; + } td->td_estcpu = decay_cpu(loadfac, td->td_estcpu); resetpriority(td); resetpriority_thread(td); + thread_unlock(td); } /* end of thread loop */ - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } /* end of process loop */ sx_sunlock(&allproc_lock); } @@ -575,6 +576,7 @@ schedinit(void) */ proc0.p_sched = NULL; /* XXX */ thread0.td_sched = &td_sched0; + thread0.td_lock = &sched_lock; td_sched0.ts_thread = &thread0; } @@ -615,7 +617,7 @@ sched_clock(struct thread *td) { struct td_sched *ts; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); ts = td->td_sched; ts->ts_cpticks++; @@ -635,22 +637,23 @@ sched_exit(struct proc *p, struct thread *td) CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", td, td->td_proc->p_comm, td->td_priority); - + PROC_SLOCK_ASSERT(p, MA_OWNED); sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); } void sched_exit_thread(struct thread *td, struct thread *child) { - struct proc *childproc = child->td_proc; CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", - child, childproc->p_comm, child->td_priority); + child, child->td_proc->p_comm, child->td_priority); + thread_lock(td); td->td_estcpu = ESTCPULIM(td->td_estcpu + child->td_estcpu); - childproc->p_estcpu = ESTCPULIM(childproc->p_estcpu + - child->td_estcpu); + thread_unlock(td); + mtx_lock_spin(&sched_lock); if ((child->td_proc->p_flag & P_NOLOAD) == 0) sched_load_rem(); + mtx_unlock_spin(&sched_lock); } void @@ -663,6 +666,7 @@ void sched_fork_thread(struct thread *td, struct thread *childtd) { childtd->td_estcpu = td->td_estcpu; + childtd->td_lock = &sched_lock; sched_newthread(childtd); } @@ -672,18 +676,20 @@ sched_nice(struct proc *p, int nice) struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_assert(&sched_lock, MA_OWNED); + PROC_SLOCK_ASSERT(p, MA_OWNED); p->p_nice = nice; FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); resetpriority(td); resetpriority_thread(td); + thread_unlock(td); } } void sched_class(struct thread *td, int class) { - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_pri_class = class; } @@ -697,7 +703,7 @@ sched_priority(struct thread *td, u_char prio) td, td->td_proc->p_comm, td->td_priority, prio, curthread, curthread->td_proc->p_comm); - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); if (td->td_priority == prio) return; td->td_priority = prio; @@ -818,7 +824,7 @@ void sched_sleep(struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_slptime = 0; } @@ -831,26 +837,18 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) ts = td->td_sched; p = td->td_proc; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); + /* + * Switch to the sched lock to fix things up and pick + * a new thread. + */ + if (td->td_lock != &sched_lock) { + mtx_lock_spin(&sched_lock); + thread_unlock(td); + } if ((p->p_flag & P_NOLOAD) == 0) sched_load_rem(); -#if 0 - /* - * We are volunteering to switch out so we get to nominate - * a successor for the rest of our quantum - * First try another thread in our process - * - * this is too expensive to do without per process run queues - * so skip it for now. - * XXX keep this comment as a marker. - */ - if (sched_followon && - (p->p_flag & P_HADTHREADS) && - (flags & SW_VOL) && - newtd == NULL) - newtd = mumble(); -#endif if (newtd) newtd->td_flags |= (td->td_flags & TDF_NEEDRESCHED); @@ -896,6 +894,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) } else { newtd = choosethread(); } + MPASS(newtd->td_lock == &sched_lock); if (td != newtd) { #ifdef HWPMC_HOOKS @@ -904,7 +903,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) #endif /* I feel sleepy */ - cpu_switch(td, newtd); + cpu_switch(td, newtd, td->td_lock); /* * Where am I? What year is it? * We are in the same thread that went to sleep above, @@ -932,12 +931,13 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) #endif sched_lock.mtx_lock = (uintptr_t)td; td->td_oncpu = PCPU_GET(cpuid); + MPASS(td->td_lock == &sched_lock); } void sched_wakeup(struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); if (td->td_slptime > 1) { updatepri(td); resetpriority(td); @@ -1079,7 +1079,7 @@ sched_add(struct thread *td, int flags) int single_cpu = 0; ts = td->td_sched; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT((td->td_inhibitors == 0), ("sched_add: trying to run inhibited thread")); KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), @@ -1089,6 +1089,14 @@ sched_add(struct thread *td, int flags) CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", td, td->td_proc->p_comm, td->td_priority, curthread, curthread->td_proc->p_comm); + /* + * Now that the thread is moving to the run-queue, set the lock + * to the scheduler's lock. + */ + if (td->td_lock != &sched_lock) { + mtx_lock_spin(&sched_lock); + thread_lock_set(td, &sched_lock); + } TD_SET_RUNQ(td); if (td->td_pinned != 0) { @@ -1140,7 +1148,7 @@ sched_add(struct thread *td, int flags) { struct td_sched *ts; ts = td->td_sched; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT((td->td_inhibitors == 0), ("sched_add: trying to run inhibited thread")); KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), @@ -1150,6 +1158,14 @@ sched_add(struct thread *td, int flags) CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", td, td->td_proc->p_comm, td->td_priority, curthread, curthread->td_proc->p_comm); + /* + * Now that the thread is moving to the run-queue, set the lock + * to the scheduler's lock. + */ + if (td->td_lock != &sched_lock) { + mtx_lock_spin(&sched_lock); + thread_lock_set(td, &sched_lock); + } TD_SET_RUNQ(td); CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to runq", ts, td); ts->ts_runq = &runq; @@ -1207,6 +1223,7 @@ sched_choose(void) struct td_sched *ts; struct runq *rq; + mtx_assert(&sched_lock, MA_OWNED); #ifdef SMP struct td_sched *kecpu; @@ -1256,10 +1273,10 @@ sched_userret(struct thread *td) KASSERT((td->td_flags & TDF_BORROWING) == 0, ("thread with borrowed priority returning to userland")); if (td->td_priority != td->td_user_pri) { - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_priority = td->td_user_pri; td->td_base_pri = td->td_user_pri; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } } @@ -1268,7 +1285,7 @@ sched_bind(struct thread *td, int cpu) { struct td_sched *ts; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(TD_IS_RUNNING(td), ("sched_bind: cannot bind non-running thread")); @@ -1287,25 +1304,26 @@ sched_bind(struct thread *td, int cpu) void sched_unbind(struct thread* td) { - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_sched->ts_flags &= ~TSF_BOUND; } int sched_is_bound(struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); return (td->td_sched->ts_flags & TSF_BOUND); } void sched_relinquish(struct thread *td) { - mtx_lock_spin(&sched_lock); + thread_lock(td); if (td->td_pri_class == PRI_TIMESHARE) sched_prio(td, PRI_MAX_TIMESHARE); + SCHED_STAT_INC(switch_relinquish); mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } int @@ -1363,5 +1381,57 @@ sched_idletd(void *dummy) } } +/* + * A CPU is entering for the first time or a thread is exiting. + */ +void +sched_throw(struct thread *td) +{ + /* + * Correct spinlock nesting. The idle thread context that we are + * borrowing was created so that it would start out with a single + * spin lock (sched_lock) held in fork_trampoline(). Since we've + * explicitly acquired locks in this function, the nesting count + * is now 2 rather than 1. Since we are nested, calling + * spinlock_exit() will simply adjust the counts without allowing + * spin lock using code to interrupt us. + */ + if (td == NULL) { + mtx_lock_spin(&sched_lock); + spinlock_exit(); + } else { + MPASS(td->td_lock == &sched_lock); + } + mtx_assert(&sched_lock, MA_OWNED); + KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count")); + PCPU_SET(switchtime, cpu_ticks()); + PCPU_SET(switchticks, ticks); + cpu_throw(td, choosethread()); /* doesn't return */ +} + +void +sched_fork_exit(struct thread *ctd) +{ + struct thread *td; + + /* + * Finish setting up thread glue so that it begins execution in a + * non-nested critical section with sched_lock held but not recursed. + */ + ctd->td_oncpu = PCPU_GET(cpuid); + sched_lock.mtx_lock = (uintptr_t)ctd; + THREAD_LOCK_ASSERT(ctd, MA_OWNED | MA_NOTRECURSED); + /* + * Processes normally resume in mi_switch() after being + * cpu_switch()'ed to, but when children start up they arrive here + * instead, so we must do much the same things as mi_switch() would. + */ + if ((td = PCPU_GET(deadthread))) { + PCPU_SET(deadthread, NULL); + thread_stash(td); + } + thread_unlock(ctd); +} + #define KERN_SWITCH_INCLUDE 1 #include "kern/kern_switch.c" diff --git a/sys/kern/sched_core.c b/sys/kern/sched_core.c index b0994f8..4cec09b 100644 --- a/sys/kern/sched_core.c +++ b/sys/kern/sched_core.c @@ -784,6 +784,7 @@ schedinit(void) */ proc0.p_sched = NULL; /* XXX */ thread0.td_sched = &kse0; + thread0.td_lock = &sched_lock; kse0.ts_thread = &thread0; kse0.ts_slice = 100; } @@ -1018,7 +1019,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); #endif - cpu_switch(td, newtd); + cpu_switch(td, newtd, td->td_lock); #ifdef HWPMC_HOOKS if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); @@ -1110,6 +1111,7 @@ sched_fork_thread(struct thread *td, struct thread *child) ts = td->td_sched; ts2 = child->td_sched; + child->td_lock = td->td_lock; ts2->ts_slptime = ts2->ts_slptime * CHILD_WEIGHT / 100; if (child->td_pri_class == PRI_TIMESHARE) sched_user_prio(child, sched_calc_pri(ts2)); @@ -1142,7 +1144,8 @@ sched_class(struct thread *td, int class) void sched_exit(struct proc *p, struct thread *childtd) { - mtx_assert(&sched_lock, MA_OWNED); + + PROC_SLOCK_ASSERT(p, MA_OWNED); sched_exit_thread(FIRST_THREAD_IN_PROC(p), childtd); } @@ -1747,5 +1750,57 @@ sched_idletd(void *dummy) } } +/* + * A CPU is entering for the first time or a thread is exiting. + */ +void +sched_throw(struct thread *td) +{ + /* + * Correct spinlock nesting. The idle thread context that we are + * borrowing was created so that it would start out with a single + * spin lock (sched_lock) held in fork_trampoline(). Since we've + * explicitly acquired locks in this function, the nesting count + * is now 2 rather than 1. Since we are nested, calling + * spinlock_exit() will simply adjust the counts without allowing + * spin lock using code to interrupt us. + */ + if (td == NULL) { + mtx_lock_spin(&sched_lock); + spinlock_exit(); + } else { + MPASS(td->td_lock == &sched_lock); + } + mtx_assert(&sched_lock, MA_OWNED); + KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count")); + PCPU_SET(switchtime, cpu_ticks()); + PCPU_SET(switchticks, ticks); + cpu_throw(td, choosethread()); /* doesn't return */ +} + +void +sched_fork_exit(struct thread *ctd) +{ + struct thread *td; + + /* + * Finish setting up thread glue so that it begins execution in a + * non-nested critical section with sched_lock held but not recursed. + */ + ctd->td_oncpu = PCPU_GET(cpuid); + sched_lock.mtx_lock = (uintptr_t)ctd; + THREAD_LOCK_ASSERT(ctd, MA_OWNED | MA_NOTRECURSED); + /* + * Processes normally resume in mi_switch() after being + * cpu_switch()'ed to, but when children start up they arrive here + * instead, so we must do much the same things as mi_switch() would. + */ + if ((td = PCPU_GET(deadthread))) { + PCPU_SET(deadthread, NULL); + thread_stash(td); + } + thread_unlock(ctd); +} + #define KERN_SWITCH_INCLUDE 1 #include "kern/kern_switch.c" diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index 4f4cf41..30761fb 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -229,6 +229,7 @@ static int ipi_thresh = PRI_MIN_KERN; static int steal_htt = 1; static int steal_busy = 1; static int busy_thresh = 4; +static int topology = 0; /* * One thread queue per processor. @@ -434,7 +435,7 @@ tdq_load_add(struct tdq *tdq, struct td_sched *ts) mtx_assert(&sched_lock, MA_OWNED); class = PRI_BASE(ts->ts_thread->td_pri_class); tdq->tdq_load++; - CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); + CTR2(KTR_SCHED, "cpu %jd load: %d", TDQ_ID(tdq), tdq->tdq_load); if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) #ifdef SMP @@ -997,7 +998,7 @@ sched_setup(void *dummy) tdq = &tdq_cpu[i]; tdq_setup(&tdq_cpu[i]); } - if (1) { + if (smp_topology == NULL) { struct tdq_group *tdg; struct tdq *tdq; int cpus; @@ -1027,6 +1028,7 @@ sched_setup(void *dummy) struct cpu_group *cg; int j; + topology = 1; for (i = 0; i < smp_topology->ct_count; i++) { cg = &smp_topology->ct_group[i]; tdg = &tdq_groups[i]; @@ -1248,6 +1250,7 @@ schedinit(void) */ proc0.p_sched = NULL; /* XXX */ thread0.td_sched = &td_sched0; + thread0.td_lock = &sched_lock; td_sched0.ts_ltick = ticks; td_sched0.ts_ftick = ticks; td_sched0.ts_thread = &thread0; @@ -1296,7 +1299,7 @@ sched_thread_priority(struct thread *td, u_char prio) td, td->td_proc->p_comm, td->td_priority, prio, curthread, curthread->td_proc->p_comm); ts = td->td_sched; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); if (td->td_priority == prio) return; @@ -1307,9 +1310,10 @@ sched_thread_priority(struct thread *td, u_char prio) * queue. This could be optimized to not re-add in some * cases. */ + MPASS(td->td_lock == &sched_lock); sched_rem(td); td->td_priority = prio; - sched_add(td, SRQ_BORROWING); + sched_add(td, SRQ_BORROWING|SRQ_OURSELF); } else td->td_priority = prio; } @@ -1427,7 +1431,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) struct td_sched *ts; int preempt; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); preempt = flags & SW_PREEMPT; tdq = TDQ_SELF(); @@ -1440,24 +1444,33 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) * If the thread has been assigned it may be in the process of switching * to the new cpu. This is the case in sched_bind(). */ + /* + * Switch to the sched lock to fix things up and pick + * a new thread. + */ + if (td->td_lock != &sched_lock) { + mtx_lock_spin(&sched_lock); + thread_unlock(td); + } if (TD_IS_IDLETHREAD(td)) { + MPASS(td->td_lock == &sched_lock); TD_SET_CAN_RUN(td); - } else { + } else if (TD_IS_RUNNING(td)) { + /* + * Don't allow the thread to migrate + * from a preemption. + */ tdq_load_rem(tdq, ts); - if (TD_IS_RUNNING(td)) { - /* - * Don't allow the thread to migrate - * from a preemption. - */ - if (preempt) - sched_pin_td(td); - sched_add(td, preempt ? - SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : - SRQ_OURSELF|SRQ_YIELDING); - if (preempt) - sched_unpin_td(td); - } - } + if (preempt) + sched_pin_td(td); + sched_add(td, preempt ? + SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : + SRQ_OURSELF|SRQ_YIELDING); + if (preempt) + sched_unpin_td(td); + } else + tdq_load_rem(tdq, ts); + mtx_assert(&sched_lock, MA_OWNED); if (newtd != NULL) { /* * If we bring in a thread account for it as if it had been @@ -1473,7 +1486,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); #endif - cpu_switch(td, newtd); + cpu_switch(td, newtd, td->td_lock); #ifdef HWPMC_HOOKS if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); @@ -1481,6 +1494,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) } sched_lock.mtx_lock = (uintptr_t)td; td->td_oncpu = PCPU_GET(cpuid); + MPASS(td->td_lock == &sched_lock); } void @@ -1489,12 +1503,14 @@ sched_nice(struct proc *p, int nice) struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_assert(&sched_lock, MA_OWNED); + PROC_SLOCK_ASSERT(p, MA_OWNED); p->p_nice = nice; FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); sched_priority(td); sched_prio(td, td->td_base_user_pri); + thread_unlock(td); } } @@ -1502,7 +1518,7 @@ void sched_sleep(struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_sched->ts_slptime = ticks; } @@ -1513,7 +1529,7 @@ sched_wakeup(struct thread *td) struct td_sched *ts; int slptime; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); ts = td->td_sched; /* * If we slept for more than a tick update our interactivity and @@ -1542,7 +1558,7 @@ sched_wakeup(struct thread *td) void sched_fork(struct thread *td, struct thread *child) { - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); sched_fork_thread(td, child); /* * Penalize the parent and child for forking. @@ -1563,7 +1579,9 @@ sched_fork_thread(struct thread *td, struct thread *child) /* * Initialize child. */ + THREAD_LOCK_ASSERT(td, MA_OWNED); sched_newthread(child); + child->td_lock = &sched_lock; ts = td->td_sched; ts2 = child->td_sched; ts2->ts_cpu = ts->ts_cpu; @@ -1588,7 +1606,7 @@ void sched_class(struct thread *td, int class) { - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); if (td->td_pri_class == class) return; @@ -1627,6 +1645,7 @@ sched_exit(struct proc *p, struct thread *child) CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", child, child->td_proc->p_comm, child->td_priority); + PROC_SLOCK_ASSERT(p, MA_OWNED); td = FIRST_THREAD_IN_PROC(p); sched_exit_thread(td, child); } @@ -1638,7 +1657,9 @@ sched_exit_thread(struct thread *td, struct thread *child) CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", child, child->td_proc->p_comm, child->td_priority); + thread_lock(child); tdq_load_rem(TDQ_CPU(child->td_sched->ts_cpu), child->td_sched); + thread_unlock(child); #ifdef KSE /* * KSE forks and exits so often that this penalty causes short-lived @@ -1653,9 +1674,11 @@ sched_exit_thread(struct thread *td, struct thread *child) * sleep time as a penalty to the parent. This causes shells that * launch expensive things to mark their children as expensive. */ + thread_lock(td); td->td_sched->skg_runtime += child->td_sched->skg_runtime; sched_interact_update(td); sched_priority(td); + thread_unlock(td); } void @@ -1673,10 +1696,10 @@ sched_userret(struct thread *td) KASSERT((td->td_flags & TDF_BORROWING) == 0, ("thread with borrowed priority returning to userland")); if (td->td_priority != td->td_user_pri) { - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_priority = td->td_user_pri; td->td_base_pri = td->td_user_pri; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } } @@ -1805,9 +1828,22 @@ sched_preempt(struct thread *td) */ MPASS(TD_ON_RUNQ(td)); TD_SET_RUNNING(td); + MPASS(ctd->td_lock == &sched_lock); + MPASS(td->td_lock == &sched_lock); CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, td->td_proc->p_pid, td->td_proc->p_comm); + /* + * We enter the switch with two runnable threads that both have + * the same lock. When we return td may be sleeping so we need + * to switch locks to make sure he's locked correctly. + */ + SCHED_STAT_INC(switch_preempt); mi_switch(SW_INVOL|SW_PREEMPT, td); + spinlock_enter(); + thread_unlock(ctd); + thread_lock(td); + spinlock_exit(); + return (1); } @@ -1824,7 +1860,7 @@ sched_add(struct thread *td, int flags) #endif ts = td->td_sched; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", td, td->td_proc->p_comm, td->td_priority, curthread, curthread->td_proc->p_comm); @@ -1834,8 +1870,15 @@ sched_add(struct thread *td, int flags) ("sched_add: bad thread state")); KASSERT(td->td_proc->p_sflag & PS_INMEM, ("sched_add: process swapped out")); - KASSERT(ts->ts_runq == NULL, - ("sched_add: thread %p is still assigned to a run queue", td)); + /* + * Now that the thread is moving to the run-queue, set the lock + * to the scheduler's lock. + */ + if (td->td_lock != &sched_lock) { + mtx_lock_spin(&sched_lock); + thread_lock_set(td, &sched_lock); + } + mtx_assert(&sched_lock, MA_OWNED); TD_SET_RUNQ(td); tdq = TDQ_SELF(); class = PRI_BASE(td->td_pri_class); @@ -1920,7 +1963,7 @@ sched_rem(struct thread *td) CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", td, td->td_proc->p_comm, td->td_priority, curthread, curthread->td_proc->p_comm); - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); ts = td->td_sched; KASSERT(TD_ON_RUNQ(td), ("sched_rem: thread not on run queue")); @@ -1942,7 +1985,7 @@ sched_pctcpu(struct thread *td) if (ts == NULL) return (0); - mtx_lock_spin(&sched_lock); + thread_lock(td); if (ts->ts_ticks) { int rtick; @@ -1952,7 +1995,7 @@ sched_pctcpu(struct thread *td) pctcpu = (FSCALE * ((FSCALE * rtick)/hz)) >> FSHIFT; } td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); return (pctcpu); } @@ -1962,7 +2005,7 @@ sched_bind(struct thread *td, int cpu) { struct td_sched *ts; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); ts = td->td_sched; if (ts->ts_flags & TSF_BOUND) sched_unbind(td); @@ -1982,7 +2025,7 @@ sched_unbind(struct thread *td) { struct td_sched *ts; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); ts = td->td_sched; if ((ts->ts_flags & TSF_BOUND) == 0) return; @@ -1995,18 +2038,19 @@ sched_unbind(struct thread *td) int sched_is_bound(struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); return (td->td_sched->ts_flags & TSF_BOUND); } void sched_relinquish(struct thread *td) { - mtx_lock_spin(&sched_lock); + thread_lock(td); if (td->td_pri_class == PRI_TIMESHARE) sched_prio(td, PRI_MAX_TIMESHARE); + SCHED_STAT_INC(switch_relinquish); mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } int @@ -2071,6 +2115,58 @@ sched_idletd(void *dummy) cpu_idle(); } +/* + * A CPU is entering for the first time or a thread is exiting. + */ +void +sched_throw(struct thread *td) +{ + /* + * Correct spinlock nesting. The idle thread context that we are + * borrowing was created so that it would start out with a single + * spin lock (sched_lock) held in fork_trampoline(). Since we've + * explicitly acquired locks in this function, the nesting count + * is now 2 rather than 1. Since we are nested, calling + * spinlock_exit() will simply adjust the counts without allowing + * spin lock using code to interrupt us. + */ + if (td == NULL) { + mtx_lock_spin(&sched_lock); + spinlock_exit(); + } else { + MPASS(td->td_lock == &sched_lock); + } + mtx_assert(&sched_lock, MA_OWNED); + KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count")); + PCPU_SET(switchtime, cpu_ticks()); + PCPU_SET(switchticks, ticks); + cpu_throw(td, choosethread()); /* doesn't return */ +} + +void +sched_fork_exit(struct thread *ctd) +{ + struct thread *td; + + /* + * Finish setting up thread glue so that it begins execution in a + * non-nested critical section with sched_lock held but not recursed. + */ + ctd->td_oncpu = PCPU_GET(cpuid); + sched_lock.mtx_lock = (uintptr_t)ctd; + THREAD_LOCK_ASSERT(ctd, MA_OWNED | MA_NOTRECURSED); + /* + * Processes normally resume in mi_switch() after being + * cpu_switch()'ed to, but when children start up they arrive here + * instead, so we must do much the same things as mi_switch() would. + */ + if ((td = PCPU_GET(deadthread))) { + PCPU_SET(deadthread, NULL); + thread_stash(td); + } + thread_unlock(ctd); +} + static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, "Scheduler name"); @@ -2093,6 +2189,7 @@ SYSCTL_INT(_kern_sched, OID_AUTO, ipi_thresh, CTLFLAG_RW, &ipi_thresh, 0, ""); SYSCTL_INT(_kern_sched, OID_AUTO, steal_htt, CTLFLAG_RW, &steal_htt, 0, ""); SYSCTL_INT(_kern_sched, OID_AUTO, steal_busy, CTLFLAG_RW, &steal_busy, 0, ""); SYSCTL_INT(_kern_sched, OID_AUTO, busy_thresh, CTLFLAG_RW, &busy_thresh, 0, ""); +SYSCTL_INT(_kern_sched, OID_AUTO, topology, CTLFLAG_RD, &topology, 0, ""); #endif /* ps compat */ diff --git a/sys/sys/mutex.h b/sys/sys/mutex.h index caa1311..d18061a 100644 --- a/sys/sys/mutex.h +++ b/sys/sys/mutex.h @@ -125,6 +125,14 @@ void _mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void _mtx_assert(struct mtx *m, int what, const char *file, int line); #endif +void _thread_lock_flags(struct thread *, int, const char *, int); + +#define thread_lock(tdp) \ + _thread_lock_flags((tdp), 0, __FILE__, __LINE__) +#define thread_lock_flags(tdp, opt) \ + _thread_lock_flags((tdp), (opt), __FILE__, __LINE__) +#define thread_unlock(tdp) \ + mtx_unlock_spin(__DEVOLATILE(struct mtx *, (tdp)->td_lock)) /* * We define our machine-independent (unoptimized) mutex micro-operations @@ -349,6 +357,7 @@ extern struct mtx_pool *mtxpool_sleep; */ extern struct mtx sched_lock; extern struct mtx Giant; +extern struct mtx blocked_lock; /* * Giant lock manipulation and clean exit macros. diff --git a/sys/sys/proc.h b/sys/sys/proc.h index a73d2d5..acde39d 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -134,7 +134,7 @@ struct pargs { * g - process group mtx * h - callout_lock mtx * i - by curproc or the master session mtx - * j - locked by sched_lock mtx + * j - locked by proc slock * k - only accessed by curthread * k*- only accessed by curthread and from an interrupt * l - the attaching proc or attaching proc parent @@ -144,6 +144,7 @@ struct pargs { * p - select lock (sellock) * q - td_contested lock * r - p_peers lock + * t - thread lock * x - created at fork, only changes during single threading in exec * z - zombie threads lock * @@ -195,32 +196,19 @@ struct mqueue_notifier; * other than CPU cycles, which are parceled out to the threads. */ -/*************** - * Threads are the unit of execution - With a single run queue used by all processors: - - RUNQ: --->THREAD---THREAD--... SLEEPQ:[]---THREAD---THREAD---THREAD - []---THREAD - [] - []---THREAD---THREAD - -With PER-CPU run queues: -it gets more complicated. - * - *****************/ - /* * Kernel runnable context (thread). * This is what is put to sleep and reactivated. * Thread context. Processes may have multiple threads. */ struct thread { + volatile struct mtx *td_lock; /* replaces sched lock */ struct proc *td_proc; /* (*) Associated process. */ TAILQ_ENTRY(thread) td_plist; /* (*) All threads in this proc. */ /* The two queues below should someday be merged. */ - TAILQ_ENTRY(thread) td_slpq; /* (j) Sleep queue. */ - TAILQ_ENTRY(thread) td_lockq; /* (j) Lock queue. */ + TAILQ_ENTRY(thread) td_slpq; /* (t) Sleep queue. */ + TAILQ_ENTRY(thread) td_lockq; /* (t) Lock queue. */ TAILQ_HEAD(, selinfo) td_selq; /* (p) List of selinfos. */ struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */ @@ -232,20 +220,20 @@ struct thread { /* Cleared during fork1() or thread_schedule_upcall(). */ #define td_startzero td_flags - int td_flags; /* (j) TDF_* flags. */ - int td_inhibitors; /* (j) Why can not run. */ + int td_flags; /* (t) TDF_* flags. */ + int td_inhibitors; /* (t) Why can not run. */ int td_pflags; /* (k) Private thread (TDP_*) flags. */ int td_dupfd; /* (k) Ret value from fdopen. XXX */ - int td_sqqueue; /* (j) Sleepqueue queue blocked on. */ - void *td_wchan; /* (j) Sleep address. */ - const char *td_wmesg; /* (j) Reason for sleep. */ - u_char td_lastcpu; /* (j) Last cpu we were on. */ - u_char td_oncpu; /* (j) Which cpu we are on. */ + int td_sqqueue; /* (t) Sleepqueue queue blocked on. */ + void *td_wchan; /* (t) Sleep address. */ + const char *td_wmesg; /* (t) Reason for sleep. */ + u_char td_lastcpu; /* (t) Last cpu we were on. */ + u_char td_oncpu; /* (t) Which cpu we are on. */ volatile u_char td_owepreempt; /* (k*) Preempt on last critical_exit */ short td_locks; /* (k) Count of non-spin locks. */ - u_char td_tsqueue; /* (j) Turnstile queue blocked on. */ - struct turnstile *td_blocked; /* (j) Lock thread is blocked on. */ - const char *td_lockname; /* (j) Name of lock blocked on. */ + u_char td_tsqueue; /* (t) Turnstile queue blocked on. */ + struct turnstile *td_blocked; /* (t) Lock thread is blocked on. */ + const char *td_lockname; /* (t) Name of lock blocked on. */ LIST_HEAD(, turnstile) td_contested; /* (q) Contested locks. */ struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */ int td_intr_nesting_level; /* (k) Interrupt recursion. */ @@ -253,18 +241,18 @@ struct thread { struct kse_thr_mailbox *td_mailbox; /* (*) Userland mailbox address. */ struct ucred *td_ucred; /* (k) Reference to credentials. */ struct thread *td_standin; /* (k + a) Use this for an upcall. */ - struct kse_upcall *td_upcall; /* (k + j) Upcall structure. */ - u_int td_estcpu; /* (j) Sum of the same field in KSEs. */ - u_int td_slptime; /* (j) How long completely blocked. */ - struct rusage td_ru; /* (j) rusage information */ - uint64_t td_runtime; /* (j) How many cpu ticks we've run. */ - u_int td_pticks; /* (j) Statclock hits for profiling */ - u_int td_sticks; /* (j) Statclock hits in system mode. */ - u_int td_iticks; /* (j) Statclock hits in intr mode. */ - u_int td_uticks; /* (j) Statclock hits in user mode. */ + struct kse_upcall *td_upcall; /* (k + t) Upcall structure. */ + u_int td_estcpu; /* (t) estimated cpu utilization */ + u_int td_slptime; /* (t) How long completely blocked. */ + struct rusage td_ru; /* (t) rusage information */ + uint64_t td_runtime; /* (t) How many cpu ticks we've run. */ + u_int td_pticks; /* (t) Statclock hits for profiling */ + u_int td_sticks; /* (t) Statclock hits in system mode. */ + u_int td_iticks; /* (t) Statclock hits in intr mode. */ + u_int td_uticks; /* (t) Statclock hits in user mode. */ u_int td_uuticks; /* (k) Statclock hits (usr), for UTS. */ u_int td_usticks; /* (k) Statclock hits (sys), for UTS. */ - int td_intrval; /* (j) Return value of TDF_INTERRUPT. */ + int td_intrval; /* (t) Return value of TDF_INTERRUPT. */ sigset_t td_oldsigmask; /* (k) Saved mask from pre sigpause. */ sigset_t td_sigmask; /* (c) Current signal mask. */ volatile u_int td_generation; /* (k) For detection of preemption */ @@ -278,11 +266,11 @@ struct thread { /* Copied during fork1() or thread_sched_upcall(). */ #define td_startcopy td_endzero - u_char td_base_pri; /* (j) Thread base kernel priority. */ - u_char td_priority; /* (j) Thread active priority. */ - u_char td_pri_class; /* (j) Scheduling class. */ - u_char td_user_pri; /* (j) User pri from estcpu and nice. */ - u_char td_base_user_pri; /* (j) Base user pri */ + u_char td_base_pri; /* (t) Thread base kernel priority. */ + u_char td_priority; /* (t) Thread active priority. */ + u_char td_pri_class; /* (t) Scheduling class. */ + u_char td_user_pri; /* (t) User pri from estcpu and nice. */ + u_char td_base_user_pri; /* (t) Base user pri */ #define td_endcopy td_pcb /* @@ -296,7 +284,7 @@ struct thread { TDS_CAN_RUN, TDS_RUNQ, TDS_RUNNING - } td_state; + } td_state; /* (t) thread state */ register_t td_retval[2]; /* (k) Syscall aux returns. */ struct callout td_slpcallout; /* (h) Callout for sleep. */ struct trapframe *td_frame; /* (k) */ @@ -313,6 +301,16 @@ struct thread { int td_syscalls; /* per-thread syscall count (used by NFS :)) */ }; +struct mtx *thread_lock_block(struct thread *); +void thread_lock_unblock(struct thread *, struct mtx *); +void thread_lock_set(struct thread *, struct mtx *); +#define THREAD_LOCK_ASSERT(td, type) \ +do { \ + struct mtx *__m = __DEVOLATILE(struct mtx *, (td)->td_lock); \ + if (__m != &blocked_lock) \ + mtx_assert(__m, (type)); \ +} while (0) + /* * Flags kept in td_flags: * To change these you MUST have the scheduler lock. @@ -324,22 +322,22 @@ struct thread { #define TDF_IDLETD 0x00000020 /* This is a per-CPU idle thread. */ #define TDF_SELECT 0x00000040 /* Selecting; wakeup/waiting danger. */ #define TDF_SLEEPABORT 0x00000080 /* sleepq_abort was called. */ -#define TDF_TSNOBLOCK 0x00000100 /* Don't block on a turnstile due to race. */ +#define TDF_UNUSEDx100 0x00000100 /* --available-- */ #define TDF_UBORROWING 0x00000200 /* Thread is borrowing user pri. */ #define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */ #define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */ #define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */ #define TDF_INTERRUPT 0x00002000 /* Thread is marked as interrupted. */ #define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */ -#define TDF_UNUSED15 0x00008000 /* --available -- */ +#define TDF_UNUSED15 0x00008000 /* --available-- */ #define TDF_NEEDRESCHED 0x00010000 /* Thread needs to yield. */ #define TDF_NEEDSIGCHK 0x00020000 /* Thread may need signal delivery. */ #define TDF_XSIG 0x00040000 /* Thread is exchanging signal under trace */ #define TDF_UNUSED19 0x00080000 /* Thread is sleeping on a umtx. */ #define TDF_THRWAKEUP 0x00100000 /* Libthr thread must not suspend itself. */ #define TDF_DBSUSPEND 0x00200000 /* Thread is suspended by debugger */ -#define TDF_UNUSED22 0x00400000 /* --available -- */ -#define TDF_UNUSED23 0x00800000 /* --available -- */ +#define TDF_UNUSED22 0x00400000 /* --available-- */ +#define TDF_UNUSED23 0x00800000 /* --available-- */ #define TDF_SCHED0 0x01000000 /* Reserved for scheduler private use */ #define TDF_SCHED1 0x02000000 /* Reserved for scheduler private use */ #define TDF_SCHED2 0x04000000 /* Reserved for scheduler private use */ @@ -482,7 +480,8 @@ struct rusage_ext { */ struct proc { LIST_ENTRY(proc) p_list; /* (d) List of all processes. */ - TAILQ_HEAD(, thread) p_threads; /* (j)(td_plist) Threads. (shortcut) */ + TAILQ_HEAD(, thread) p_threads; /* (j) all threads. */ + struct mtx p_slock; /* process spin lock */ struct ucred *p_ucred; /* (c) Process owner's identity. */ struct filedesc *p_fd; /* (b) Open files. */ struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */ @@ -491,7 +490,7 @@ struct proc { struct plimit *p_limit; /* (c) Process limits. */ struct callout p_limco; /* (c) Limit callout handle */ struct sigacts *p_sigacts; /* (x) Signal actions, state (CPU). */ - TAILQ_HEAD(, kse_upcall) p_upcalls; /* All upcalls in the proc. */ + TAILQ_HEAD(, kse_upcall) p_upcalls; /* (j) All upcalls in the proc. */ /* * The following don't make too much sense. @@ -504,7 +503,6 @@ struct proc { PRS_NORMAL, /* threads can be run. */ PRS_ZOMBIE } p_state; /* (j/c) S* process status. */ - pid_t p_pid; /* (b) Process identifier. */ LIST_ENTRY(proc) p_hash; /* (d) Hash chain. */ LIST_ENTRY(proc) p_pglist; /* (g + e) List of processes in pgrp. */ @@ -542,14 +540,12 @@ struct proc { struct nlminfo *p_nlminfo; /* (?) Only used by/for lockd. */ struct kaioinfo *p_aioinfo; /* (c) ASYNC I/O info. */ struct thread *p_singlethread;/* (c + j) If single threading this is it */ - int p_suspcount; /* (c) Num threads in suspended mode. */ + int p_suspcount; /* (j) Num threads in suspended mode. */ struct thread *p_xthread; /* (c) Trap thread */ int p_boundary_count;/* (c) Num threads at user boundary */ int p_pendingcnt; /* how many signals are pending */ struct itimers *p_itimers; /* (c) POSIX interval timers. */ /* from ksegrp */ - u_int p_estcpu; /* (j) Sum of the field in threads. */ - u_int p_slptime; /* (j) How long completely blocked. */ int p_numupcalls; /* (j) Num upcalls. */ int p_upsleeps; /* (c) Num threads in kse_release(). */ struct kse_thr_mailbox *p_completed; /* (c) Completed thread mboxes. */ @@ -592,6 +588,9 @@ struct proc { #define NOCPU 0xff /* For when we aren't on a CPU. */ +#define PROC_SLOCK(p) mtx_lock_spin(&(p)->p_slock) +#define PROC_SUNLOCK(p) mtx_unlock_spin(&(p)->p_slock) +#define PROC_SLOCK_ASSERT(p, type) mtx_assert(&(p)->p_slock, (type)) /* These flags are kept in p_flag. */ #define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */ @@ -626,7 +625,7 @@ struct proc { #define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE) #define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED) -/* These flags are kept in p_sflag and are protected with sched_lock. */ +/* These flags are kept in p_sflag and are protected with proc slock. */ #define PS_INMEM 0x00001 /* Loaded into memory. */ #define PS_ALRMPEND 0x00020 /* Pending SIGVTALRM needs to be posted. */ #define PS_PROFPEND 0x00040 /* Pending SIGPROF needs to be posted. */ @@ -861,8 +860,8 @@ void stopevent(struct proc *, u_int, u_int); void threadinit(void); void cpu_idle(void); extern void (*cpu_idle_hook)(void); /* Hook to machdep CPU idler. */ -void cpu_switch(struct thread *old, struct thread *new); -void cpu_throw(struct thread *old, struct thread *new) __dead2; +void cpu_switch(struct thread *, struct thread *, struct mtx *); +void cpu_throw(struct thread *, struct thread *) __dead2; void unsleep(struct thread *); void userret(struct thread *, struct trapframe *); @@ -872,6 +871,7 @@ void cpu_fork(struct thread *, struct proc *, struct thread *, int); void cpu_set_fork_handler(struct thread *, void (*)(void *), void *); /* New in KSE. */ +void kse_unlink(struct thread *); void kse_GC(void); void kseinit(void); void cpu_set_upcall(struct thread *td, struct thread *td0); @@ -900,6 +900,7 @@ void childproc_stopped(struct proc *child, int reason); void childproc_continued(struct proc *child); void childproc_exited(struct proc *child); int thread_suspend_check(int how); +void thread_suspend_switch(struct thread *); void thread_suspend_one(struct thread *td); struct thread *thread_switchout(struct thread *td, int flags, struct thread *newtd); diff --git a/sys/sys/sched.h b/sys/sys/sched.h index 1342906..0dcf369 100644 --- a/sys/sys/sched.h +++ b/sys/sys/sched.h @@ -81,6 +81,7 @@ int sched_runnable(void); */ void sched_exit(struct proc *p, struct thread *childtd); void sched_fork(struct thread *td, struct thread *childtd); +void sched_fork_exit(struct thread *td); /* * KSE Groups contain scheduling priority information. They record the @@ -101,6 +102,7 @@ fixpt_t sched_pctcpu(struct thread *td); void sched_prio(struct thread *td, u_char prio); void sched_sleep(struct thread *td); void sched_switch(struct thread *td, struct thread *newtd, int flags); +void sched_throw(struct thread *td); void sched_unlend_prio(struct thread *td, u_char prio); void sched_unlend_user_prio(struct thread *td, u_char pri); void sched_user_prio(struct thread *td, u_char prio); @@ -155,6 +157,19 @@ sched_unpin(void) #define SRQ_PREEMPTED 0x0008 /* has been preempted.. be kind */ #define SRQ_BORROWING 0x0010 /* Priority updated due to prio_lend */ +/* Switch stats. */ +#ifdef SCHED_STATS +extern long switch_preempt; +extern long switch_owepreempt; +extern long switch_turnstile; +extern long switch_sleepq; +extern long switch_sleepqtimo; +extern long switch_relinquish; +extern long switch_needresched; +#define SCHED_STAT_INC(var) atomic_add_long(&(var), 1) +#else +#define SCHED_STAT_INC(var) +#endif /* temporarily here */ void schedinit(void); @@ -162,7 +177,6 @@ void sched_init_concurrency(struct proc *p); void sched_set_concurrency(struct proc *p, int cuncurrency); void sched_schedinit(void); void sched_newproc(struct proc *p, struct thread *td); -void sched_thread_exit(struct thread *td); void sched_newthread(struct thread *td); #endif /* _KERNEL */ |