diff options
author | jeff <jeff@FreeBSD.org> | 2004-08-10 07:52:21 +0000 |
---|---|---|
committer | jeff <jeff@FreeBSD.org> | 2004-08-10 07:52:21 +0000 |
commit | b109ddffbc987be64d4169350f32a3994c4cacec (patch) | |
tree | f5cb69da6bf159497d58dfee41da0f8684408cf3 | |
parent | 25386024e1903fa1cc737c6f705807bf99715a43 (diff) | |
download | FreeBSD-src-b109ddffbc987be64d4169350f32a3994c4cacec.zip FreeBSD-src-b109ddffbc987be64d4169350f32a3994c4cacec.tar.gz |
- Use a new flag, KEF_XFERABLE, to record with certainty that this kse had
contributed to the transferable load count. This prevents any potential
problems with sched_pin() being used around calls to setrunqueue().
- Change the sched_add() load balancing algorithm to try to migrate on
wakeup. This attempts to place threads that communicate with each other
on the same CPU.
- Don't clear the idle counts in kseq_transfer(), let the cpus do that when
they call sched_add() from kseq_assign().
- Correct a few out of date comments.
- Make sure the ke_cpu field is correct when we preempt.
- Call kseq_assign() from sched_clock() to catch any assignments that were
done without IPI. Presently all assignments are done with an IPI, but I'm
trying a patch that limits that.
- Don't migrate a thread if it is still runnable in sched_add(). Previously,
this could only happen for KSE threads, but due to changes to
sched_switch() all threads went through this path.
- Remove some code that was added with preemption but is not necessary.
-rw-r--r-- | sys/kern/sched_ule.c | 110 |
1 files changed, 76 insertions, 34 deletions
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index 175c2bd..9e5028c 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -100,6 +100,7 @@ struct ke_sched { #define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */ #define KEF_BOUND KEF_SCHED1 /* KSE can not migrate. */ +#define KEF_XFERABLE KEF_SCHED2 /* KSE was added as transferable. */ struct kg_sched { int skg_slptime; /* Number of ticks we vol. slept */ @@ -332,6 +333,7 @@ kseq_runq_add(struct kseq *kseq, struct kse *ke) if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { kseq->ksq_transferable++; kseq->ksq_group->ksg_transferable++; + ke->ke_flags |= KEF_XFERABLE; } #endif runq_add(ke->ke_runq, ke); @@ -341,9 +343,10 @@ static __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke) { #ifdef SMP - if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { + if (ke->ke_flags & KEF_XFERABLE) { kseq->ksq_transferable--; kseq->ksq_group->ksg_transferable--; + ke->ke_flags &= ~KEF_XFERABLE; } #endif runq_remove(ke->ke_runq, ke); @@ -651,9 +654,11 @@ kseq_notify(struct kse *ke, int cpu) struct kseq *kseq; struct thread *td; struct pcpu *pcpu; + int prio; ke->ke_cpu = cpu; ke->ke_flags |= KEF_ASSIGNED; + prio = ke->ke_thread->td_priority; kseq = KSEQ_CPU(cpu); @@ -663,6 +668,12 @@ kseq_notify(struct kse *ke, int cpu) do { *(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned; } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); + /* + * Without sched_lock we could lose a race where we set NEEDRESCHED + * on a thread that is switched out before the IPI is delivered. This + * would lead us to miss the resched. This will be a problem once + * sched_lock is pushed down. + */ pcpu = pcpu_find(cpu); td = pcpu->pc_curthread; if (ke->ke_thread->td_priority < td->td_priority || @@ -727,43 +738,56 @@ kseq_transfer(struct kseq *kseq, struct kse *ke, int class) if (smp_started == 0) return (0); cpu = 0; - ksg = kseq->ksq_group; - /* - * If there are any idle groups, give them our extra load. The - * threshold at which we start to reassign kses has a large impact + * If our load exceeds a certain threshold we should attempt to + * reassign this thread. The first candidate is the cpu that + * originally ran the thread. If it is idle, assign it there, + * otherwise, pick an idle cpu. + * + * The threshold at which we start to reassign kses has a large impact * on the overall performance of the system. Tuned too high and * some CPUs may idle. Too low and there will be excess migration * and context switches. */ - if (ksg->ksg_load > (ksg->ksg_cpus * 2) && kseq_idle) { + ksg = kseq->ksq_group; + if (ksg->ksg_load > ksg->ksg_cpus && kseq_idle) { + ksg = KSEQ_CPU(ke->ke_cpu)->ksq_group; + if (kseq_idle & ksg->ksg_mask) { + cpu = ffs(ksg->ksg_idlemask); + if (cpu) + goto migrate; + } /* * Multiple cpus could find this bit simultaneously * but the race shouldn't be terrible. */ cpu = ffs(kseq_idle); if (cpu) - atomic_clear_int(&kseq_idle, 1 << (cpu - 1)); + goto migrate; } /* * If another cpu in this group has idled, assign a thread over * to them after checking to see if there are idled groups. */ - if (cpu == 0 && kseq->ksq_load > 1 && ksg->ksg_idlemask) { + ksg = kseq->ksq_group; + if (ksg->ksg_idlemask) { cpu = ffs(ksg->ksg_idlemask); if (cpu) - ksg->ksg_idlemask &= ~(1 << (cpu - 1)); + goto migrate; } /* - * Now that we've found an idle CPU, migrate the thread. + * No new CPU was found. */ - if (cpu) { - cpu--; - ke->ke_runq = NULL; - kseq_notify(ke, cpu); - return (1); - } return (0); +migrate: + /* + * Now that we've found an idle CPU, migrate the thread. + */ + cpu--; + ke->ke_runq = NULL; + kseq_notify(ke, cpu); + + return (1); } #endif /* SMP */ @@ -958,7 +982,7 @@ sched_slice(struct kse *ke) /* * Rationale: - * KSEs in interactive ksegs get the minimum slice so that we + * KSEs in interactive ksegs get a minimal slice so that we * quickly notice if it abuses its advantage. * * KSEs in non-interactive ksegs are assigned a slice that is @@ -1020,7 +1044,7 @@ sched_interact_update(struct ksegrp *kg) /* * If we have exceeded by more than 1/5th then the algorithm below * will not bring us back into range. Dividing by two here forces - * us into the range of [3/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] + * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] */ if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { kg->kg_runtime /= 2; @@ -1144,9 +1168,9 @@ sched_switch(struct thread *td, struct thread *newtd) * to the new cpu. This is the case in sched_bind(). */ if ((ke->ke_flags & KEF_ASSIGNED) == 0) { - if (td == PCPU_GET(idlethread)) + if (td == PCPU_GET(idlethread)) { TD_SET_CAN_RUN(td); - else if (TD_IS_RUNNING(td)) { + } else if (TD_IS_RUNNING(td)) { kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); setrunqueue(td); } else { @@ -1162,10 +1186,12 @@ sched_switch(struct thread *td, struct thread *newtd) kse_reassign(ke); } } - if (newtd == NULL) - newtd = choosethread(); - else + if (newtd != NULL) { kseq_load_add(KSEQ_SELF(), newtd->td_kse); + ke->ke_cpu = PCPU_GET(cpuid); + ke->ke_runq = KSEQ_SELF()->ksq_curr; + } else + newtd = choosethread(); if (td != newtd) cpu_switch(td, newtd); sched_lock.mtx_lock = (uintptr_t)td; @@ -1392,11 +1418,18 @@ sched_clock(struct thread *td) struct kse *ke; mtx_assert(&sched_lock, MA_OWNED); + kseq = KSEQ_SELF(); #ifdef SMP if (ticks == bal_tick) sched_balance(); if (ticks == gbal_tick) sched_balance_groups(); + /* + * We could have been assigned a non real-time thread without an + * IPI. + */ + if (kseq->ksq_assigned) + kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ #endif /* * sched_setup() apparently happens prior to stathz being set. We @@ -1450,7 +1483,6 @@ sched_clock(struct thread *td) /* * We're out of time, recompute priorities and requeue. */ - kseq = KSEQ_SELF(); kseq_load_rem(kseq, ke); sched_priority(kg); sched_slice(ke); @@ -1553,6 +1585,9 @@ sched_add_internal(struct thread *td, int preemptive) struct kseq *kseq; struct ksegrp *kg; struct kse *ke; +#ifdef SMP + int canmigrate; +#endif int class; mtx_assert(&sched_lock, MA_OWNED); @@ -1602,7 +1637,18 @@ sched_add_internal(struct thread *td, int preemptive) break; } #ifdef SMP - if (ke->ke_cpu != PCPU_GET(cpuid)) { + /* + * Don't migrate running threads here. Force the long term balancer + * to do it. + */ + canmigrate = KSE_CAN_MIGRATE(ke, class); + if (TD_IS_RUNNING(td)) + canmigrate = 0; + + /* + * If this thread is pinned or bound, notify the target cpu. + */ + if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) { ke->ke_runq = NULL; kseq_notify(ke, ke->ke_cpu); return; @@ -1624,20 +1670,16 @@ sched_add_internal(struct thread *td, int preemptive) * Now remove ourselves from the group specific idle mask. */ kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); - } else if (kseq->ksq_load > 1 && KSE_CAN_MIGRATE(ke, class)) + } else if (kseq->ksq_load > 1 && canmigrate) if (kseq_transfer(kseq, ke, class)) return; + ke->ke_cpu = PCPU_GET(cpuid); #endif - if (td->td_priority < curthread->td_priority) - curthread->td_flags |= TDF_NEEDRESCHED; - -#ifdef SMP /* - * Only try to preempt if the thread is unpinned or pinned to the - * current CPU. + * XXX With preemption this is not necessary. */ - if (KSE_CAN_MIGRATE(ke, class) || ke->ke_cpu == PCPU_GET(cpuid)) -#endif + if (td->td_priority < curthread->td_priority) + curthread->td_flags |= TDF_NEEDRESCHED; if (preemptive && maybe_preempt(td)) return; ke->ke_ksegrp->kg_runq_kses++; |