- Introduce kseq_runq_{add,rem}() which are used to insert and remove

kses from the run queues. Also, on SMP, we track the transferable count here. Threads are transferable only as long as they are on the run queue. - Previously, we adjusted our load balancing based on the transferable count minus the number of actual cpus. This was done to account for the threads which were likely to be running. All of this logic is simpler now that transferable accounts for only those threads which can actually be taken. Updated various places in sched_add() and kseq_balance() to account for this. - Rename kseq_{add,rem} to kseq_load_{add,rem} to reflect what they're really doing. The load is accounted for seperately from the runq because the load is accounted for even as the thread is running. - Fix a bug in sched_class() where we weren't properly using the PRI_BASE() version of the kg_pri_class. - Add a large comment that describes the impact of a seemingly simple conditional in sched_add(). - Also in sched_add() check the transferable count and KSE_CAN_MIGRATE() prior to checking kseq_idle. This reduces the frequency of access for kseq_idle which is a shared resource.
author: jeff <jeff@FreeBSD.org> 2003-11-15 07:32:07 +0000
committer: jeff <jeff@FreeBSD.org> 2003-11-15 07:32:07 +0000
commit: be190686facf06b6d4f95d361bd17d2f335766a3 (patch)
tree: e4843ee1bef82df05079d2a0cacb5301917c26a5
parent: 6e230b39ea0471b16153d228f9ebd0e4af41241c (diff)
download: FreeBSD-src-be190686facf06b6d4f95d361bd17d2f335766a3.zip
FreeBSD-src-be190686facf06b6d4f95d361bd17d2f335766a3.tar.gz
1 files changed, 83 insertions, 61 deletions
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index 0c73395..47d67e0 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -249,8 +249,10 @@ static void sched_pctcpu_update(struct kse *ke);
 /* Operations on per processor queues */
 static struct kse * kseq_choose(struct kseq *kseq);
 static void kseq_setup(struct kseq *kseq);
-static void kseq_add(struct kseq *kseq, struct kse *ke);
-static void kseq_rem(struct kseq *kseq, struct kse *ke);
+static void kseq_load_add(struct kseq *kseq, struct kse *ke);
+static void kseq_load_rem(struct kseq *kseq, struct kse *ke);
+static __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke);
+static __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke);
 static void kseq_nice_add(struct kseq *kseq, int nice);
 static void kseq_nice_rem(struct kseq *kseq, int nice);
 void kseq_print(int cpu);
@@ -259,7 +261,7 @@ void kseq_print(int cpu);
 static int sched_pickcpu(void);
 #endif
 static struct kse *runq_steal(struct runq *rq);
-static void kseq_balance(void *arg);
+static void sched_balance(void *arg);
 static void kseq_move(struct kseq *from, int cpu);
 static __inline void kseq_setidle(struct kseq *kseq);
 static void kseq_notify(struct kse *ke, int cpu);
@@ -280,7 +282,7 @@ kseq_print(int cpu)
 
 	printf("kseq:\n");
 	printf("\tload:           %d\n", kseq->ksq_load);
-	printf("\tload REALTIME:  %d\n", kseq->ksq_load_timeshare);
+	printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare);
 #ifdef SMP
 	printf("\tload transferable: %d\n", kseq->ksq_load_transferable);
 #endif
@@ -292,8 +294,28 @@ kseq_print(int cpu)
 			    i - SCHED_PRI_NHALF, kseq->ksq_nice[i]);
 }
 
+static __inline void
+kseq_runq_add(struct kseq *kseq, struct kse *ke)
+{
+#ifdef SMP
+	if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class)))
+		kseq->ksq_load_transferable++;
+#endif
+	runq_add(ke->ke_runq, ke);
+}
+
+static __inline void
+kseq_runq_rem(struct kseq *kseq, struct kse *ke)
+{
+#ifdef SMP
+	if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class)))
+		kseq->ksq_load_transferable--;
+#endif
+	runq_remove(ke->ke_runq, ke);
+}
+
 static void
-kseq_add(struct kseq *kseq, struct kse *ke)
+kseq_load_add(struct kseq *kseq, struct kse *ke)
 {
 	int class;
 	mtx_assert(&sched_lock, MA_OWNED);
@@ -301,21 +323,20 @@ kseq_add(struct kseq *kseq, struct kse *ke)
 	if (class == PRI_TIMESHARE)
 		kseq->ksq_load_timeshare++;
 #ifdef SMP
-	if (KSE_CAN_MIGRATE(ke, class))
-		kseq->ksq_load_transferable++;
 	kseq->ksq_rslices += ke->ke_slice;
 #endif
 	kseq->ksq_load++;
 	if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
-	CTR6(KTR_ULE, "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))",
-	    ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority,
-	    ke->ke_ksegrp->kg_nice, kseq->ksq_nicemin);
+		CTR6(KTR_ULE,
+		    "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))",
+		    ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority,
+		    ke->ke_ksegrp->kg_nice, kseq->ksq_nicemin);
 	if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
 		kseq_nice_add(kseq, ke->ke_ksegrp->kg_nice);
 }
 
 static void
-kseq_rem(struct kseq *kseq, struct kse *ke)
+kseq_load_rem(struct kseq *kseq, struct kse *ke)
 {
 	int class;
 	mtx_assert(&sched_lock, MA_OWNED);
@@ -323,8 +344,6 @@ kseq_rem(struct kseq *kseq, struct kse *ke)
 	if (class == PRI_TIMESHARE)
 		kseq->ksq_load_timeshare--;
 #ifdef SMP
-	if (KSE_CAN_MIGRATE(ke, class))
-		kseq->ksq_load_transferable--;
 	kseq->ksq_rslices -= ke->ke_slice;
 #endif
 	kseq->ksq_load--;
@@ -373,7 +392,7 @@ kseq_nice_rem(struct kseq *kseq, int nice)
 
 #ifdef SMP
 /*
- * kseq_balance is a simple CPU load balancing algorithm.  It operates by
+ * sched_balance is a simple CPU load balancing algorithm.  It operates by
  * finding the least loaded and most loaded cpu and equalizing their load
  * by migrating some processes.
  *
@@ -389,7 +408,7 @@ kseq_nice_rem(struct kseq *kseq, int nice)
  *
  */
 static void
-kseq_balance(void *arg)
+sched_balance(void *arg)
 {
 	struct kseq *kseq;
 	int high_load;
@@ -413,8 +432,8 @@ kseq_balance(void *arg)
 		if (CPU_ABSENT(i) || (i & stopped_cpus) != 0)
 			continue;
 		kseq = KSEQ_CPU(i);
-		if (kseq->ksq_load > high_load) {
-			high_load = kseq->ksq_load;
+		if (kseq->ksq_load_transferable > high_load) {
+			high_load = kseq->ksq_load_transferable;
 			high_cpu = i;
 		}
 		if (low_load == -1 || kseq->ksq_load < low_load) {
@@ -422,32 +441,26 @@ kseq_balance(void *arg)
 			low_cpu = i;
 		}
 	}
-
 	kseq = KSEQ_CPU(high_cpu);
-
-	high_load = kseq->ksq_load_transferable;
 	/*
 	 * Nothing to do.
 	 */
-	if (high_load < kseq->ksq_cpus + 1)
+	if (high_load == 0 || low_load >= kseq->ksq_load)
 		goto out;
-
-	high_load -= kseq->ksq_cpus;
-
-	if (low_load >= high_load)
-		goto out;
-
-	diff = high_load - low_load;
+	/*
+	 * Determine what the imbalance is and then adjust that to how many
+	 * kses we actually have to give up (load_transferable).
+	 */
+	diff = kseq->ksq_load - low_load;
 	move = diff / 2;
 	if (diff & 0x1)
 		move++;
-
+	move = min(move, high_load);
 	for (i = 0; i < move; i++)
 		kseq_move(kseq, low_cpu);
-
 out:
 	mtx_unlock_spin(&sched_lock);
-	callout_reset(&kseq_lb_callout, hz, kseq_balance, NULL);
+	callout_reset(&kseq_lb_callout, hz, sched_balance, NULL);
 
 	return;
 }
@@ -458,9 +471,9 @@ kseq_move(struct kseq *from, int cpu)
 	struct kse *ke;
 
 	ke = kseq_steal(from);
-	runq_remove(ke->ke_runq, ke);
 	ke->ke_state = KES_THREAD;
-	kseq_rem(from, ke);
+	kseq_runq_rem(from, ke);
+	kseq_load_rem(from, ke);
 
 	ke->ke_cpu = cpu;
 	kseq_notify(ke, cpu);
@@ -660,12 +673,12 @@ sched_setup(void *dummy)
 		}
 	}
 	callout_init(&kseq_lb_callout, CALLOUT_MPSAFE);
-	kseq_balance(NULL);
+	sched_balance(NULL);
 #else
 	kseq_setup(KSEQ_SELF());
 #endif
 	mtx_lock_spin(&sched_lock);
-	kseq_add(KSEQ_SELF(), &kse0);
+	kseq_load_add(KSEQ_SELF(), &kse0);
 	mtx_unlock_spin(&sched_lock);
 }
 
@@ -925,7 +938,7 @@ sched_switch(struct thread *td)
 
 	if (TD_IS_RUNNING(td)) {
 		if (td->td_proc->p_flag & P_SA) {
-			kseq_rem(KSEQ_CPU(ke->ke_cpu), ke);
+			kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke);
 			setrunqueue(td);
 		} else {
 			/*
@@ -939,12 +952,11 @@ sched_switch(struct thread *td)
 				else
 					ke->ke_runq = &KSEQ_SELF()->ksq_idle;
 			}
-			runq_add(ke->ke_runq, ke);
-			/* setrunqueue(td); */
+			kseq_runq_add(KSEQ_SELF(), ke);
 		}
 	} else {
 		if (ke->ke_runq)
-			kseq_rem(KSEQ_CPU(ke->ke_cpu), ke);
+			kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke);
 		/*
 		 * We will not be on the run queue. So we must be
 		 * sleeping or similar.
@@ -1103,20 +1115,26 @@ sched_class(struct ksegrp *kg, int class)
 		kseq = KSEQ_CPU(ke->ke_cpu);
 
 #ifdef SMP
-		if (KSE_CAN_MIGRATE(ke, oclass))
-			kseq->ksq_load_transferable--;
-		if (KSE_CAN_MIGRATE(ke, nclass))
-			kseq->ksq_load_transferable++;
+		/*
+		 * On SMP if we're on the RUNQ we must adjust the transferable
+		 * count because could be changing to or from an interrupt
+		 * class.
+		 */
+		if (ke->ke_state == KES_ONRUNQ) {
+			if (KSE_CAN_MIGRATE(ke, oclass))
+				kseq->ksq_load_transferable--;
+			if (KSE_CAN_MIGRATE(ke, nclass))
+				kseq->ksq_load_transferable++;
+		}
 #endif
-		if (oclass == PRI_TIMESHARE)
+		if (oclass == PRI_TIMESHARE) {
 			kseq->ksq_load_timeshare--;
-		if (nclass == PRI_TIMESHARE)
-			kseq->ksq_load_timeshare++;
-
-		if (kg->kg_pri_class == PRI_TIMESHARE)
 			kseq_nice_rem(kseq, kg->kg_nice);
-		else if (class == PRI_TIMESHARE)
+		}
+		if (nclass == PRI_TIMESHARE) {
+			kseq->ksq_load_timeshare++;
 			kseq_nice_add(kseq, kg->kg_nice);
+		}
 	}
 
 	kg->kg_pri_class = class;
@@ -1136,7 +1154,7 @@ sched_exit(struct proc *p, struct proc *child)
 void
 sched_exit_kse(struct kse *ke, struct kse *child)
 {
-	kseq_rem(KSEQ_CPU(child->ke_cpu), child);
+	kseq_load_rem(KSEQ_CPU(child->ke_cpu), child);
 }
 
 void
@@ -1220,14 +1238,14 @@ sched_clock(struct thread *td)
 	/*
 	 * We're out of time, recompute priorities and requeue.
 	 */
-	kseq_rem(kseq, ke);
+	kseq_load_rem(kseq, ke);
 	sched_priority(kg);
 	sched_slice(ke);
 	if (SCHED_CURR(kg, ke))
 		ke->ke_runq = kseq->ksq_curr;
 	else
 		ke->ke_runq = kseq->ksq_next;
-	kseq_add(kseq, ke);
+	kseq_load_add(kseq, ke);
 	td->td_flags |= TDF_NEEDRESCHED;
 }
 
@@ -1290,7 +1308,7 @@ sched_choose(void)
 		if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE)
 			kseq_setidle(kseq);
 #endif
-		runq_remove(ke->ke_runq, ke);
+		kseq_runq_rem(kseq, ke);
 		ke->ke_state = KES_THREAD;
 
 		if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) {
@@ -1373,10 +1391,14 @@ sched_add(struct thread *td)
 	}
 #ifdef SMP
 	/*
-	 * If there are any idle processors, give them our extra load.
+	 * If there are any idle processors, give them our extra load.  The
+	 * threshold at which we start to reassign kses has a large impact
+	 * on the overall performance of the system.  Tuned too high and
+	 * some CPUs may idle.  Too low and there will be excess migration
+	 * and context swiches.
 	 */
-	if (kseq_idle && KSE_CAN_MIGRATE(ke, class) &&
-	    kseq->ksq_load_transferable >= kseq->ksq_cpus) {
+	if (kseq->ksq_load_transferable > kseq->ksq_cpus &&
+	    KSE_CAN_MIGRATE(ke, class) && kseq_idle) {
 		int cpu;
 
 		/*
@@ -1405,8 +1427,8 @@ sched_add(struct thread *td)
 	ke->ke_ksegrp->kg_runq_kses++;
 	ke->ke_state = KES_ONRUNQ;
 
-	runq_add(ke->ke_runq, ke);
-	kseq_add(kseq, ke);
+	kseq_runq_add(kseq, ke);
+	kseq_load_add(kseq, ke);
 }
 
 void
@@ -1430,8 +1452,8 @@ sched_rem(struct thread *td)
 	ke->ke_state = KES_THREAD;
 	ke->ke_ksegrp->kg_runq_kses--;
 	kseq = KSEQ_CPU(ke->ke_cpu);
-	runq_remove(ke->ke_runq, ke);
-	kseq_rem(kseq, ke);
+	kseq_runq_rem(kseq, ke);
+	kseq_load_rem(kseq, ke);
 }
 
 fixpt_t
@@ -1484,7 +1506,7 @@ sched_bind(struct thread *td, int cpu)
 	/* sched_rem without the runq_remove */
 	ke->ke_state = KES_THREAD;
 	ke->ke_ksegrp->kg_runq_kses--;
-	kseq_rem(KSEQ_CPU(ke->ke_cpu), ke);
+	kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke);
 	ke->ke_cpu = cpu;
 	kseq_notify(ke, cpu);
 	/* When we return from mi_switch we'll be on the correct cpu. */
author	jeff <jeff@FreeBSD.org>	2003-11-15 07:32:07 +0000
committer	jeff <jeff@FreeBSD.org>	2003-11-15 07:32:07 +0000
commit	be190686facf06b6d4f95d361bd17d2f335766a3 (patch)
tree	e4843ee1bef82df05079d2a0cacb5301917c26a5
parent	6e230b39ea0471b16153d228f9ebd0e4af41241c (diff)
download	FreeBSD-src-be190686facf06b6d4f95d361bd17d2f335766a3.zip FreeBSD-src-be190686facf06b6d4f95d361bd17d2f335766a3.tar.gz