Rework the interface between priority propagation (lending) and the

schedulers a bit to ensure more correct handling of priorities and fewer priority inversions: - Add two functions to the sched(9) API to handle priority lending: sched_lend_prio() and sched_unlend_prio(). The turnstile code uses these functions to ask the scheduler to lend a thread a set priority and to tell the scheduler when it thinks it is ok for a thread to stop borrowing priority. The unlend case is slightly complex in that the turnstile code tells the scheduler what the minimum priority of the thread needs to be to satisfy the requirements of any other threads blocked on locks owned by the thread in question. The scheduler then decides where the thread can go back to normal mode (if it's normal priority is high enough to satisfy the pending lock requests) or it it should continue to use the priority specified to the sched_unlend_prio() call. This involves adding a new per-thread flag TDF_BORROWING that replaces the ULE-only kse flag for priority elevation. - Schedulers now refuse to lower the priority of a thread that is currently borrowing another therad's priority. - If a scheduler changes the priority of a thread that is currently sitting on a turnstile, it will call a new function turnstile_adjust() to inform the turnstile code of the change. This function resorts the thread on the priority list of the turnstile if needed, and if the thread ends up at the head of the list (due to having the highest priority) and its priority was raised, then it will propagate that new priority to the owner of the lock it is blocked on. Some additional fixes specific to the 4BSD scheduler include: - Common code for updating the priority of a thread when the user priority of its associated kse group has been consolidated in a new static function resetpriority_thread(). One change to this function is that it will now only adjust the priority of a thread if it already has a time sharing priority, thus preserving any boosts from a tsleep() until the thread returns to userland. Also, resetpriority() no longer calls maybe_resched() on each thread in the group. Instead, the code calling resetpriority() is responsible for calling resetpriority_thread() on any threads that need to be updated. - schedcpu() now uses resetpriority_thread() instead of just calling sched_prio() directly after it updates a kse group's user priority. - sched_clock() now uses resetpriority_thread() rather than writing directly to td_priority. - sched_nice() now updates all the priorities of the threads after the group priority has been adjusted. Discussed with: bde Reviewed by: ups, jeffr Tested on: 4bsd, ule Tested on: i386, alpha, sparc64
author: jhb <jhb@FreeBSD.org> 2004-12-30 20:52:44 +0000
committer: jhb <jhb@FreeBSD.org> 2004-12-30 20:52:44 +0000
commit: 3f307e93e34fef6720a206ce7d4ffdbd0df2515c (patch)
tree: f5f18f239d1f27cced3e6b80e05c76ba4ebcfce8 /sys/kern
parent: e3adf386176060465c1c68b63fda3b90fa0f9a0a (diff)
download: FreeBSD-src-3f307e93e34fef6720a206ce7d4ffdbd0df2515c.zip
FreeBSD-src-3f307e93e34fef6720a206ce7d4ffdbd0df2515c.tar.gz
3 files changed, 299 insertions, 105 deletions
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 11faccc..fe90fee 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/sx.h>
+#include <sys/turnstile.h>
 #include <machine/smp.h>
 
 /*
@@ -159,10 +160,12 @@ static void	setup_runqs(void);
 static void	roundrobin(void *arg);
 static void	schedcpu(void);
 static void	schedcpu_thread(void);
+static void	sched_priority(struct thread *td, u_char prio);
 static void	sched_setup(void *dummy);
 static void	maybe_resched(struct thread *td);
 static void	updatepri(struct ksegrp *kg);
 static void	resetpriority(struct ksegrp *kg);
+static void	resetpriority_thread(struct thread *td, struct ksegrp *kg);
 #ifdef SMP
 static int	forward_wakeup(int  cpunum);
 #endif
@@ -516,9 +519,7 @@ schedcpu(void)
 			kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu);
 		      	resetpriority(kg);
 			FOREACH_THREAD_IN_GROUP(kg, td) {
-				if (td->td_priority >= PUSER) {
-					sched_prio(td, kg->kg_user_pri);
-				}
+				resetpriority_thread(td, kg);
 			}
 		} /* end of ksegrp loop */
 		mtx_unlock_spin(&sched_lock);
@@ -561,7 +562,6 @@ updatepri(struct ksegrp *kg)
 			newcpu = decay_cpu(loadfac, newcpu);
 		kg->kg_estcpu = newcpu;
 	}
-	resetpriority(kg);
 }
 
 /*
@@ -573,7 +573,6 @@ static void
 resetpriority(struct ksegrp *kg)
 {
 	register unsigned int newpriority;
-	struct thread *td;
 
 	if (kg->kg_pri_class == PRI_TIMESHARE) {
 		newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT +
@@ -582,9 +581,25 @@ resetpriority(struct ksegrp *kg)
 		    PRI_MAX_TIMESHARE);
 		kg->kg_user_pri = newpriority;
 	}
-	FOREACH_THREAD_IN_GROUP(kg, td) {
-		maybe_resched(td);			/* XXXKSE silly */
-	}
+}
+
+/*
+ * Update the thread's priority when the associated ksegroup's user
+ * priority changes.
+ */
+static void
+resetpriority_thread(struct thread *td, struct ksegrp *kg)
+{
+
+	/* Only change threads with a time sharing user priority. */
+	if (td->td_priority < PRI_MIN_TIMESHARE ||
+	    td->td_priority > PRI_MAX_TIMESHARE)
+		return;
+
+	/* XXX the whole needresched thing is broken, but not silly. */
+	maybe_resched(td);
+
+	sched_prio(td, kg->kg_user_pri);
 }
 
 /* ARGSUSED */
@@ -674,8 +689,7 @@ sched_clock(struct thread *td)
 	kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1);
 	if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
 		resetpriority(kg);
-		if (td->td_priority >= PUSER)
-			td->td_priority = kg->kg_user_pri;
+		resetpriority_thread(td, kg);
 	}
 }
 
@@ -735,12 +749,16 @@ void
 sched_nice(struct proc *p, int nice)
 {
 	struct ksegrp *kg;
+	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	mtx_assert(&sched_lock, MA_OWNED);
 	p->p_nice = nice;
 	FOREACH_KSEGRP_IN_PROC(p, kg) {
 		resetpriority(kg);
+		FOREACH_THREAD_IN_GROUP(kg, td) {
+			resetpriority_thread(td, kg);
+		}
 	}
 }
 
@@ -757,14 +775,16 @@ sched_class(struct ksegrp *kg, int class)
  * changing the assignment of a kse to the thread,
  * and moving a KSE in the system run queue.
  */
-void
-sched_prio(struct thread *td, u_char prio)
+static void
+sched_priority(struct thread *td, u_char prio)
 {
 	CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)",
 	    td, td->td_proc->p_comm, td->td_priority, prio, curthread, 
 	    curthread->td_proc->p_comm);
 
 	mtx_assert(&sched_lock, MA_OWNED);
+	if (td->td_priority == prio)
+		return;
 	if (TD_ON_RUNQ(td)) {
 		adjustrunqueue(td, prio);
 	} else {
@@ -772,13 +792,76 @@ sched_prio(struct thread *td, u_char prio)
 	}
 }
 
+/*
+ * Update a thread's priority when it is lent another thread's
+ * priority.
+ */
+void
+sched_lend_prio(struct thread *td, u_char prio)
+{
+
+	td->td_flags |= TDF_BORROWING;
+	sched_priority(td, prio);
+}
+
+/*
+ * Restore a thread's priority when priority propagation is
+ * over.  The prio argument is the minimum priority the thread
+ * needs to have to satisfy other possible priority lending
+ * requests.  If the thread's regulary priority is less
+ * important than prio the thread will keep a priority boost
+ * of prio.
+ */
+void
+sched_unlend_prio(struct thread *td, u_char prio)
+{
+	u_char base_pri;
+
+	if (td->td_base_pri >= PRI_MIN_TIMESHARE &&
+	    td->td_base_pri <= PRI_MAX_TIMESHARE)
+		base_pri = td->td_ksegrp->kg_user_pri;
+	else
+		base_pri = td->td_base_pri;
+	if (prio >= base_pri) {
+		td->td_flags &= ~TDF_BORROWING;
+		sched_prio(td, base_pri);
+	} else
+		sched_lend_prio(td, prio);
+}
+
+void
+sched_prio(struct thread *td, u_char prio)
+{
+	u_char oldprio;
+
+	/* First, update the base priority. */
+	td->td_base_pri = prio;
+
+	/*
+	 * If the thread is borrowing another thread's priority, don't ever
+	 * lower the priority.
+	 */
+	if (td->td_flags & TDF_BORROWING && td->td_priority < prio)
+		return;
+
+	/* Change the real priority. */
+	oldprio = td->td_priority;
+	sched_priority(td, prio);
+
+	/*
+	 * If the thread is on a turnstile, then let the turnstile update
+	 * its state.
+	 */
+	if (TD_ON_LOCK(td) && oldprio != prio)
+		turnstile_adjust(td, oldprio);
+}
+
 void
 sched_sleep(struct thread *td)
 {
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	td->td_ksegrp->kg_slptime = 0;
-	td->td_base_pri = td->td_priority;
 }
 
 static void remrunqueue(struct thread *td);
@@ -889,8 +972,10 @@ sched_wakeup(struct thread *td)
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	kg = td->td_ksegrp;
-	if (kg->kg_slptime > 1)
+	if (kg->kg_slptime > 1) {
 		updatepri(kg);
+		resetpriority(kg);
+	}
 	kg->kg_slptime = 0;
 	setrunqueue(td, SRQ_BORING);
 }
@@ -1157,10 +1242,13 @@ sched_userret(struct thread *td)
 	 * it here and returning to user mode, so don't waste time setting
 	 * it perfectly here.
 	 */
+	KASSERT((td->td_flags & TDF_BORROWING) == 0,
+	    ("thread with borrowed priority returning to userland"));
 	kg = td->td_ksegrp;
 	if (td->td_priority != kg->kg_user_pri) {
 		mtx_lock_spin(&sched_lock);
 		td->td_priority = kg->kg_user_pri;
+		td->td_base_pri = kg->kg_user_pri;
 		mtx_unlock_spin(&sched_lock);
 	}
 }
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index 513d56f..c7fb07d 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
+#include <sys/turnstile.h>
 #include <sys/vmmeter.h>
 #ifdef KTRACE
 #include <sys/uio.h>
@@ -133,8 +134,7 @@ struct kse {
 #define	KEF_XFERABLE	0x0004		/* Thread was added as transferable. */
 #define	KEF_HOLD	0x0008		/* Thread is temporarily bound. */
 #define	KEF_REMOVED	0x0010		/* Thread was removed while ASSIGNED */
-#define	KEF_PRIOELEV	0x0020		/* Thread has had its prio elevated. */
-#define	KEF_INTERNAL	0x0040
+#define	KEF_INTERNAL	0x0020
 
 struct kg_sched {
 	struct thread	*skg_last_assigned; /* (j) Last thread assigned to */
@@ -234,7 +234,7 @@ static struct kg_sched kg_sched0;
 #define	SCHED_INTERACTIVE(kg)						\
     (sched_interact_score(kg) < SCHED_INTERACT_THRESH)
 #define	SCHED_CURR(kg, ke)						\
-    ((ke->ke_flags & KEF_PRIOELEV) || SCHED_INTERACTIVE(kg))
+    ((ke->ke_thread->td_flags & TDF_BORROWING) || SCHED_INTERACTIVE(kg))
 
 /*
  * Cpu percentage computation macros and defines.
@@ -315,6 +315,7 @@ static void	slot_fill(struct ksegrp *kg);
 static struct kse *sched_choose(void);		/* XXX Should be thread * */
 static void sched_slice(struct kse *ke);
 static void sched_priority(struct ksegrp *kg);
+static void sched_thread_priority(struct thread *td, u_char prio);
 static int sched_interact_score(struct ksegrp *kg);
 static void sched_interact_update(struct ksegrp *kg);
 static void sched_interact_fork(struct ksegrp *kg);
@@ -1066,7 +1067,7 @@ sched_slice(struct kse *ke)
 	kg = ke->ke_ksegrp;
 	kseq = KSEQ_CPU(ke->ke_cpu);
 
-	if (ke->ke_flags & KEF_PRIOELEV) {
+	if (ke->ke_thread->td_flags & TDF_BORROWING) {
 		ke->ke_slice = SCHED_SLICE_MIN;
 		return;
 	}
@@ -1230,7 +1231,7 @@ sched_pctcpu_update(struct kse *ke)
 }
 
 void
-sched_prio(struct thread *td, u_char prio)
+sched_thread_priority(struct thread *td, u_char prio)
 {
 	struct kse *ke;
 
@@ -1239,6 +1240,8 @@ sched_prio(struct thread *td, u_char prio)
 	    curthread->td_proc->p_comm);
 	ke = td->td_kse;
 	mtx_assert(&sched_lock, MA_OWNED);
+	if (td->td_priority == prio)
+		return;
 	if (TD_ON_RUNQ(td)) {
 		/*
 		 * If the priority has been elevated due to priority
@@ -1253,8 +1256,6 @@ sched_prio(struct thread *td, u_char prio)
 			ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr;
 			runq_add(ke->ke_runq, ke, 0);
 		}
-		if (prio < td->td_priority)
-			ke->ke_flags |= KEF_PRIOELEV;
 		/*
 		 * Hold this kse on this cpu so that sched_prio() doesn't
 		 * cause excessive migration.  We only want migration to
@@ -1267,6 +1268,70 @@ sched_prio(struct thread *td, u_char prio)
 		td->td_priority = prio;
 }
 
+/*
+ * Update a thread's priority when it is lent another thread's
+ * priority.
+ */
+void
+sched_lend_prio(struct thread *td, u_char prio)
+{
+
+	td->td_flags |= TDF_BORROWING;
+	sched_thread_priority(td, prio);
+}
+
+/*
+ * Restore a thread's priority when priority propagation is
+ * over.  The prio argument is the minimum priority the thread
+ * needs to have to satisfy other possible priority lending
+ * requests.  If the thread's regular priority is less
+ * important than prio, the thread will keep a priority boost
+ * of prio.
+ */
+void
+sched_unlend_prio(struct thread *td, u_char prio)
+{
+	u_char base_pri;
+
+	if (td->td_base_pri >= PRI_MIN_TIMESHARE &&
+	    td->td_base_pri <= PRI_MAX_TIMESHARE)
+		base_pri = td->td_ksegrp->kg_user_pri;
+	else
+		base_pri = td->td_base_pri;
+	if (prio >= base_pri) {
+		td->td_flags &= ~ TDF_BORROWING;
+		sched_thread_priority(td, base_pri);
+	} else
+		sched_lend_prio(td, prio);
+}
+
+void
+sched_prio(struct thread *td, u_char prio)
+{
+	u_char oldprio;
+
+	/* First, update the base priority. */
+	td->td_base_pri = prio;
+
+	/*
+	 * If the therad is borrowing another thread's priority, don't
+	 * ever lower the priority.
+	 */
+	if (td->td_flags & TDF_BORROWING && td->td_priority < prio)
+		return;
+
+	/* Change the real priority. */
+	oldprio = td->td_priority;
+	sched_thread_priority(td, prio);
+
+	/*
+	 * If the thread is on a turnstile, then let the turnstile update
+	 * its state.
+	 */
+	if (TD_ON_LOCK(td) && oldprio != prio)
+		turnstile_adjust(td, oldprio);
+}
+	
 void
 sched_switch(struct thread *td, struct thread *newtd, int flags)
 {
@@ -1374,7 +1439,6 @@ sched_sleep(struct thread *td)
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	td->td_slptime = ticks;
-	td->td_base_pri = td->td_priority;
 }
 
 void
@@ -1644,21 +1708,14 @@ void
 sched_userret(struct thread *td)
 {
 	struct ksegrp *kg;
-	struct kse *ke;
 
-	kg = td->td_ksegrp;
-	ke = td->td_kse;
-	
-	if (td->td_priority != kg->kg_user_pri ||
-	    ke->ke_flags & KEF_PRIOELEV) {
+	KASSERT((td->td_flags & TDF_BORROWING) == 0,
+	    ("thread with borrowed priority returning to userland"));
+	kg = td->td_ksegrp;	
+	if (td->td_priority != kg->kg_user_pri) {
 		mtx_lock_spin(&sched_lock);
 		td->td_priority = kg->kg_user_pri;
-		if (ke->ke_flags & KEF_PRIOELEV) {
-			ke->ke_flags &= ~KEF_PRIOELEV;
-			sched_slice(ke);
-			if (ke->ke_slice == 0)
-				mi_switch(SW_INVOL, NULL);
-		}
+		td->td_base_pri = kg->kg_user_pri;
 		mtx_unlock_spin(&sched_lock);
 	}
 }
diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c
index 3bb6e94..7eabe21 100644
--- a/sys/kern/subr_turnstile.c
+++ b/sys/kern/subr_turnstile.c
@@ -145,7 +145,9 @@ static void	init_turnstile0(void *dummy);
 #ifdef TURNSTILE_PROFILING
 static void	init_turnstile_profiling(void *arg);
 #endif
-static void	propagate_priority(struct thread *);
+static void	propagate_priority(struct thread *td);
+static int	turnstile_adjust_thread(struct turnstile *ts,
+		    struct thread *td);
 static void	turnstile_setowner(struct turnstile *ts, struct thread *owner);
 
 /*
@@ -158,7 +160,6 @@ propagate_priority(struct thread *td)
 {
 	struct turnstile_chain *tc;
 	struct turnstile *ts;
-	struct thread *td1;
 	int pri;
 
 	mtx_assert(&sched_lock, MA_OWNED);
@@ -187,8 +188,8 @@ propagate_priority(struct thread *td)
 		 * isn't SRUN or SLOCK.
 		 */
 		KASSERT(!TD_IS_SLEEPING(td),
-		    ("sleeping thread (pid %d) owns a non-sleepable lock",
-		    td->td_proc->p_pid));
+		    ("sleeping thread (tid %d) owns a non-sleepable lock",
+		    td->td_tid));
 
 		/*
 		 * If this thread already has higher priority than the
@@ -198,10 +199,16 @@ propagate_priority(struct thread *td)
 			return;
 
 		/*
-		 * If lock holder is actually running, just bump priority.
+		 * Bump this thread's priority.
 		 */
-		if (TD_IS_RUNNING(td)) {
-			td->td_priority = pri;
+		sched_lend_prio(td, pri);
+
+		/*
+		 * If lock holder is actually running or on the run queue
+		 * then we are done.
+		 */
+		if (TD_IS_RUNNING(td) || TD_ON_RUNQ(td)) {
+			MPASS(td->td_blocked == NULL);
 			return;
 		}
 
@@ -214,27 +221,11 @@ propagate_priority(struct thread *td)
 #endif
 
 		/*
-		 * If on run queue move to new run queue, and quit.
-		 * XXXKSE this gets a lot more complicated under threads
-		 * but try anyhow.
-		 */
-		if (TD_ON_RUNQ(td)) {
-			MPASS(td->td_blocked == NULL);
-			sched_prio(td, pri);
-			return;
-		}
-
-		/*
-		 * Bump this thread's priority.
-		 */
-		td->td_priority = pri;
-
-		/*
 		 * If we aren't blocked on a lock, we should be.
 		 */
 		KASSERT(TD_ON_LOCK(td), (
-		    "process %d(%s):%d holds %s but isn't blocked on a lock\n",
-		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
+		    "thread %d(%s):%d holds %s but isn't blocked on a lock\n",
+		    td->td_tid, td->td_proc->p_comm, td->td_state,
 		    ts->ts_lockobj->lo_name));
 
 		/*
@@ -245,61 +236,81 @@ propagate_priority(struct thread *td)
 		tc = TC_LOOKUP(ts->ts_lockobj);
 		mtx_lock_spin(&tc->tc_lock);
 
-		/*
-		 * This thread may not be blocked on this turnstile anymore
-		 * but instead might already be woken up on another CPU
-		 * that is waiting on sched_lock in turnstile_unpend() to
-		 * finish waking this thread up.  We can detect this case
-		 * by checking to see if this thread has been given a
-		 * turnstile by either turnstile_signal() or
-		 * turnstile_broadcast().  In this case, treat the thread as
-		 * if it was already running.
-		 */
-		if (td->td_turnstile != NULL) {
+		/* Resort td on the list if needed. */
+		if (!turnstile_adjust_thread(ts, td)) {
 			mtx_unlock_spin(&tc->tc_lock);
 			return;
 		}
+		mtx_unlock_spin(&tc->tc_lock);
+	}
+}
 
-		/*
-		 * Check if the thread needs to be moved up on
-		 * the blocked chain.  It doesn't need to be moved
-		 * if it is already at the head of the list or if
-		 * the item in front of it still has a higher priority.
-		 */
-		if (td == TAILQ_FIRST(&ts->ts_blocked)) {
-			mtx_unlock_spin(&tc->tc_lock);
-			continue;
-		}
+/*
+ * Adjust the thread's position on a turnstile after its priority has been
+ * changed.
+ */
+static int
+turnstile_adjust_thread(struct turnstile *ts, struct thread *td)
+{
+	struct turnstile_chain *tc;
+	struct thread *td1, *td2;
 
-		td1 = TAILQ_PREV(td, threadqueue, td_lockq);
-		if (td1->td_priority <= pri) {
-			mtx_unlock_spin(&tc->tc_lock);
-			continue;
-		}
+	mtx_assert(&sched_lock, MA_OWNED);
+	MPASS(TD_ON_LOCK(td));
+
+	/*
+	 * This thread may not be blocked on this turnstile anymore
+	 * but instead might already be woken up on another CPU
+	 * that is waiting on sched_lock in turnstile_unpend() to
+	 * finish waking this thread up.  We can detect this case
+	 * by checking to see if this thread has been given a
+	 * turnstile by either turnstile_signal() or
+	 * turnstile_broadcast().  In this case, treat the thread as
+	 * if it was already running.
+	 */
+	if (td->td_turnstile != NULL)
+		return (0);
+
+	/*
+	 * Check if the thread needs to be moved on the blocked chain.
+	 * It needs to be moved if either its priority is lower than
+	 * the previous thread or higher than the next thread.
+	 */
+	tc = TC_LOOKUP(ts->ts_lockobj);
+	mtx_assert(&tc->tc_lock, MA_OWNED);
+	td1 = TAILQ_PREV(td, threadqueue, td_lockq);
+	td2 = TAILQ_NEXT(td, td_lockq);
+	if ((td1 != NULL && td->td_priority < td1->td_priority) ||
+	    (td2 != NULL && td->td_priority > td2->td_priority)) {
 
 		/*
 		 * Remove thread from blocked chain and determine where
-		 * it should be moved up to.  Since we know that td1 has
-		 * a lower priority than td, we know that at least one
-		 * thread in the chain has a lower priority and that
-		 * td1 will thus not be NULL after the loop.
+		 * it should be moved to.
 		 */
 		mtx_lock_spin(&td_contested_lock);
 		TAILQ_REMOVE(&ts->ts_blocked, td, td_lockq);
 		TAILQ_FOREACH(td1, &ts->ts_blocked, td_lockq) {
 			MPASS(td1->td_proc->p_magic == P_MAGIC);
-			if (td1->td_priority > pri)
+			if (td1->td_priority > td->td_priority)
 				break;
 		}
 
-		MPASS(td1 != NULL);
-		TAILQ_INSERT_BEFORE(td1, td, td_lockq);
+		if (td1 == NULL)
+			TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_lockq);
+		else
+			TAILQ_INSERT_BEFORE(td1, td, td_lockq);
 		mtx_unlock_spin(&td_contested_lock);
-		CTR4(KTR_LOCK,
-		    "propagate_priority: td %p moved before %p on [%p] %s",
-		    td, td1, ts->ts_lockobj, ts->ts_lockobj->lo_name);
-		mtx_unlock_spin(&tc->tc_lock);
+		if (td1 == NULL)
+			CTR3(KTR_LOCK,
+		    "turnstile_adjust_thread: td %d put at tail on [%p] %s",
+			    td->td_tid, ts->ts_lockobj, ts->ts_lockobj->lo_name);
+		else
+			CTR4(KTR_LOCK,
+		    "turnstile_adjust_thread: td %d moved before %d on [%p] %s",
+			    td->td_tid, td1->td_tid, ts->ts_lockobj,
+			    ts->ts_lockobj->lo_name);
 	}
+	return (1);
 }
 
 /*
@@ -355,6 +366,46 @@ init_turnstile0(void *dummy)
 SYSINIT(turnstile0, SI_SUB_LOCK, SI_ORDER_ANY, init_turnstile0, NULL);
 
 /*
+ * Update a thread on the turnstile list after it's priority has been changed.
+ * The old priority is passed in as an argument.
+ */
+void
+turnstile_adjust(struct thread *td, u_char oldpri)
+{
+	struct turnstile_chain *tc;
+	struct turnstile *ts;
+
+	mtx_assert(&sched_lock, MA_OWNED);
+	MPASS(TD_ON_LOCK(td));
+
+	/*
+	 * Pick up the lock that td is blocked on.
+	 */
+	ts = td->td_blocked;
+	MPASS(ts != NULL);
+	tc = TC_LOOKUP(ts->ts_lockobj);
+	mtx_lock_spin(&tc->tc_lock);
+
+	/* Resort the turnstile on the list. */
+	if (!turnstile_adjust_thread(ts, td)) {
+		mtx_unlock_spin(&tc->tc_lock);
+		return;
+	}
+
+	/*
+	 * If our priority was lowered and we are at the head of the
+	 * turnstile, then propagate our new priority up the chain.
+	 * Note that we currently don't try to revoke lent priorities
+	 * when our priority goes up.
+	 */
+	if (td == TAILQ_FIRST(&ts->ts_blocked) && td->td_priority < oldpri) {
+		mtx_unlock_spin(&tc->tc_lock);
+		propagate_priority(td);
+	} else
+		mtx_unlock_spin(&tc->tc_lock);
+}
+
+/*
  * Set the owner of the lock this turnstile is attached to.
  */
 static void
@@ -470,7 +521,7 @@ turnstile_claim(struct lock_object *lock)
 	 */
 	mtx_lock_spin(&sched_lock);
 	if (td->td_priority < owner->td_priority)
-		owner->td_priority = td->td_priority; 
+		sched_lend_prio(owner, td->td_priority);
 	mtx_unlock_spin(&sched_lock);
 }
 
@@ -578,14 +629,14 @@ turnstile_wait(struct lock_object *lock, struct thread *owner)
 	propagate_priority(td);
 
 	if (LOCK_LOG_TEST(lock, 0))
-		CTR4(KTR_LOCK, "%s: td %p blocked on [%p] %s", __func__, td,
-		    lock, lock->lo_name);
+		CTR4(KTR_LOCK, "%s: td %d blocked on [%p] %s", __func__,
+		    td->td_tid, lock, lock->lo_name);
 
 	mi_switch(SW_VOL, NULL);
 
 	if (LOCK_LOG_TEST(lock, 0))
-		CTR4(KTR_LOCK, "%s: td %p free from blocked on [%p] %s",
-		    __func__, td, lock, lock->lo_name);
+		CTR4(KTR_LOCK, "%s: td %d free from blocked on [%p] %s",
+		    __func__, td->td_tid, lock, lock->lo_name);
 
 	mtx_unlock_spin(&sched_lock);
 }
@@ -692,7 +743,7 @@ turnstile_unpend(struct turnstile *ts)
 	TAILQ_HEAD( ,thread) pending_threads;
 	struct turnstile_chain *tc;
 	struct thread *td;
-	int cp, pri;
+	u_char cp, pri;
 
 	MPASS(ts != NULL);
 	MPASS(ts->ts_owner == curthread);
@@ -739,9 +790,7 @@ turnstile_unpend(struct turnstile *ts)
 			pri = cp;
 	}
 	mtx_unlock_spin(&td_contested_lock);
-	if (pri > td->td_base_pri)
-		pri = td->td_base_pri;
-	td->td_priority = pri;
+	sched_unlend_prio(td, pri);
 
 	/*
 	 * Wake up all the pending threads.  If a thread is not blocked
author	jhb <jhb@FreeBSD.org>	2004-12-30 20:52:44 +0000
committer	jhb <jhb@FreeBSD.org>	2004-12-30 20:52:44 +0000
commit	3f307e93e34fef6720a206ce7d4ffdbd0df2515c (patch)
tree	f5f18f239d1f27cced3e6b80e05c76ba4ebcfce8 /sys/kern
parent	e3adf386176060465c1c68b63fda3b90fa0f9a0a (diff)
download	FreeBSD-src-3f307e93e34fef6720a206ce7d4ffdbd0df2515c.zip FreeBSD-src-3f307e93e34fef6720a206ce7d4ffdbd0df2515c.tar.gz