From 15c2dd7a1fd67c01c85a5ed9bea3c15a0d5a4d0b Mon Sep 17 00:00:00 2001 From: jeff Date: Mon, 4 Jun 2007 23:51:44 +0000 Subject: Commit 3/14 of sched_lock decomposition. - Add a per-turnstile spinlock to solve potential priority propagation deadlocks that are possible with thread_lock(). - The turnstile lock order is defined as the exact opposite of the lock order used with the sleep locks they represent. This allows us to walk in reverse order in priority_propagate and this is the only place we wish to multiply acquire turnstile locks. - Use the turnstile_chain lock to protect assigning mutexes to turnstiles. - Change the turnstile interface to pass back turnstile pointers to the consumers. This allows us to reduce some locking and makes it easier to cancel turnstile assignment while the turnstile chain lock is held. Tested by: kris, current@ Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc. Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each) --- sys/kern/kern_mutex.c | 149 +++++++++++++++++++----- sys/kern/kern_rwlock.c | 54 ++++----- sys/kern/subr_turnstile.c | 284 ++++++++++++++++++++++++---------------------- 3 files changed, 299 insertions(+), 188 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index fa498b5..ae4a36b 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -127,6 +127,7 @@ struct lock_class lock_class_mtx_spin = { /* * System-wide mutexes */ +struct mtx blocked_lock; struct mtx sched_lock; struct mtx Giant; @@ -305,6 +306,7 @@ void _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file, int line) { + struct turnstile *ts; #ifdef ADAPTIVE_MUTEXES volatile struct thread *owner; #endif @@ -334,7 +336,7 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file, m->lock_object.lo_name, (void *)m->mtx_lock, file, line); while (!_obtain_lock(m, tid)) { - turnstile_lock(&m->lock_object); + ts = turnstile_trywait(&m->lock_object); v = m->mtx_lock; /* @@ -342,7 +344,7 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file, * the turnstile chain lock. */ if (v == MTX_UNOWNED) { - turnstile_release(&m->lock_object); + turnstile_cancel(ts); cpu_spinwait(); continue; } @@ -358,7 +360,7 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file, */ if (v == MTX_CONTESTED) { m->mtx_lock = tid | MTX_CONTESTED; - turnstile_claim(&m->lock_object); + turnstile_claim(ts); break; } #endif @@ -370,7 +372,7 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file, */ if ((v & MTX_CONTESTED) == 0 && !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) { - turnstile_release(&m->lock_object); + turnstile_cancel(ts); cpu_spinwait(); continue; } @@ -387,7 +389,7 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file, if (m != &Giant && TD_IS_RUNNING(owner)) #endif { - turnstile_release(&m->lock_object); + turnstile_cancel(ts); while (mtx_owner(m) == owner && TD_IS_RUNNING(owner)) { cpu_spinwait(); } @@ -414,8 +416,7 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file, /* * Block on the turnstile. */ - turnstile_wait(&m->lock_object, mtx_owner(m), - TS_EXCLUSIVE_QUEUE); + turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE); } #ifdef KTR if (cont_logged) { @@ -428,7 +429,25 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file, waittime, (file), (line)); } +static void +_mtx_lock_spin_failed(struct mtx *m) +{ + struct thread *td; + + td = mtx_owner(m); + + /* If the mutex is unlocked, try again. */ + if (td == NULL) + return; #ifdef SMP + printf( "spin lock %p (%s) held by %p (tid %d) too long\n", + m, m->lock_object.lo_name, td, td->td_tid); +#ifdef WITNESS + witness_display_spinlock(&m->lock_object, td); +#endif + panic("spin lock held too long"); +} + /* * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock. * @@ -440,7 +459,6 @@ _mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file, int line) { int i = 0, contested = 0; - struct thread *td; uint64_t waittime = 0; if (LOCK_LOG_TEST(&m->lock_object, opts)) @@ -458,20 +476,8 @@ _mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file, } if (i < 60000000 || kdb_active || panicstr != NULL) DELAY(1); - else { - td = mtx_owner(m); - - /* If the mutex is unlocked, try again. */ - if (td == NULL) - continue; - printf( - "spin lock %p (%s) held by %p (tid %d) too long\n", - m, m->lock_object.lo_name, td, td->td_tid); -#ifdef WITNESS - witness_display_spinlock(&m->lock_object, td); -#endif - panic("spin lock held too long"); - } + else + _mtx_lock_spin_failed(m); cpu_spinwait(); } spinlock_enter(); @@ -482,10 +488,87 @@ _mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file, lock_profile_obtain_lock_success(&m->lock_object, contested, waittime, (file), (line)); - } #endif /* SMP */ +void +_thread_lock_flags(struct thread *td, int opts, const char *file, int line) +{ + struct mtx *m; + uintptr_t tid; + int i; + + i = 0; + tid = (uintptr_t)curthread; + for (;;) { +retry: + spinlock_enter(); + m = __DEVOLATILE(struct mtx *, td->td_lock); + WITNESS_CHECKORDER(&m->lock_object, + opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line); + while (!_obtain_lock(m, tid)) { + if (m->mtx_lock == tid) { + m->mtx_recurse++; + break; + } + /* Give interrupts a chance while we spin. */ + spinlock_exit(); + while (m->mtx_lock != MTX_UNOWNED) { + if (i++ < 10000000) + cpu_spinwait(); + else if (i < 60000000 || + kdb_active || panicstr != NULL) + DELAY(1); + else + _mtx_lock_spin_failed(m); + cpu_spinwait(); + if (m != td->td_lock) + goto retry; + } + spinlock_enter(); + } + if (m == td->td_lock) + break; + _rel_spin_lock(m); /* does spinlock_exit() */ + } + WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); +} + +struct mtx * +thread_lock_block(struct thread *td) +{ + struct mtx *lock; + + spinlock_enter(); + THREAD_LOCK_ASSERT(td, MA_OWNED); + lock = __DEVOLATILE(struct mtx *, td->td_lock); + td->td_lock = &blocked_lock; + mtx_unlock_spin(lock); + + return (lock); +} + +void +thread_lock_unblock(struct thread *td, struct mtx *new) +{ + mtx_assert(new, MA_OWNED); + MPASS(td->td_lock == &blocked_lock); + atomic_store_rel_ptr((void *)&td->td_lock, (uintptr_t)new); + spinlock_exit(); +} + +void +thread_lock_set(struct thread *td, struct mtx *new) +{ + struct mtx *lock; + + mtx_assert(new, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); + lock = __DEVOLATILE(struct mtx *, td->td_lock); + td->td_lock = new; + mtx_unlock_spin(lock); +} + /* * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. * @@ -508,7 +591,11 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) return; } - turnstile_lock(&m->lock_object); + /* + * We have to lock the chain before the turnstile so this turnstile + * can be removed from the hash list if it is empty. + */ + turnstile_chain_lock(&m->lock_object); ts = turnstile_lookup(&m->lock_object); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); @@ -518,7 +605,7 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) _release_lock_quick(m); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m); - turnstile_release(&m->lock_object); + turnstile_chain_unlock(&m->lock_object); return; } #else @@ -543,7 +630,12 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) m); } #endif + /* + * This turnstile is now no longer associated with the mutex. We can + * unlock the chain lock so a new turnstile may take it's place. + */ turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); + turnstile_chain_unlock(&m->lock_object); #ifndef PREEMPTION /* @@ -557,7 +649,7 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) if (td->td_critnest > 0 || td1->td_priority >= td->td_priority) return; - mtx_lock_spin(&sched_lock); + thread_lock(td1); if (!TD_IS_RUNNING(td1)) { #ifdef notyet if (td->td_ithd != NULL) { @@ -582,7 +674,7 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p", m, (void *)m->mtx_lock); } - mtx_unlock_spin(&sched_lock); + thread_unlock(td1); #endif } @@ -761,7 +853,10 @@ mutex_init(void) */ mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE); mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE); + mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN); + blocked_lock.mtx_lock = 0xdeadc0de; /* Always blocked. */ mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); + mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN | MTX_RECURSE); mtx_init(&devmtx, "cdev", NULL, MTX_DEF); mtx_lock(&Giant); diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c index 1b1113e..90a2142 100644 --- a/sys/kern/kern_rwlock.c +++ b/sys/kern/kern_rwlock.c @@ -187,6 +187,7 @@ _rw_wunlock(struct rwlock *rw, const char *file, int line) void _rw_rlock(struct rwlock *rw, const char *file, int line) { + struct turnstile *ts; #ifdef ADAPTIVE_RWLOCKS volatile struct thread *owner; #endif @@ -256,7 +257,7 @@ _rw_rlock(struct rwlock *rw, const char *file, int line) * has a write lock, so acquire the turnstile lock so we can * begin the process of blocking. */ - turnstile_lock(&rw->lock_object); + ts = turnstile_trywait(&rw->lock_object); /* * The lock might have been released while we spun, so @@ -265,7 +266,7 @@ _rw_rlock(struct rwlock *rw, const char *file, int line) */ x = rw->rw_lock; if (x & RW_LOCK_READ) { - turnstile_release(&rw->lock_object); + turnstile_cancel(ts); cpu_spinwait(); continue; } @@ -279,7 +280,7 @@ _rw_rlock(struct rwlock *rw, const char *file, int line) if (!(x & RW_LOCK_READ_WAITERS)) { if (!atomic_cmpset_ptr(&rw->rw_lock, x, x | RW_LOCK_READ_WAITERS)) { - turnstile_release(&rw->lock_object); + turnstile_cancel(ts); cpu_spinwait(); continue; } @@ -296,7 +297,7 @@ _rw_rlock(struct rwlock *rw, const char *file, int line) */ owner = (struct thread *)RW_OWNER(x); if (TD_IS_RUNNING(owner)) { - turnstile_release(&rw->lock_object); + turnstile_cancel(ts); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); @@ -314,7 +315,7 @@ _rw_rlock(struct rwlock *rw, const char *file, int line) if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); - turnstile_wait(&rw->lock_object, rw_owner(rw), TS_SHARED_QUEUE); + turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", __func__, rw); @@ -407,7 +408,7 @@ _rw_runlock(struct rwlock *rw, const char *file, int line) * Ok, we know we have a waiting writer and we think we * are the last reader, so grab the turnstile lock. */ - turnstile_lock(&rw->lock_object); + turnstile_chain_lock(&rw->lock_object); /* * Try to drop our lock leaving the lock in a unlocked @@ -427,7 +428,7 @@ _rw_runlock(struct rwlock *rw, const char *file, int line) */ if (!atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) { - turnstile_release(&rw->lock_object); + turnstile_chain_unlock(&rw->lock_object); continue; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) @@ -445,6 +446,7 @@ _rw_runlock(struct rwlock *rw, const char *file, int line) MPASS(ts != NULL); turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE); turnstile_unpend(ts, TS_SHARED_LOCK); + turnstile_chain_unlock(&rw->lock_object); break; } lock_profile_release_lock(&rw->lock_object); @@ -458,6 +460,7 @@ _rw_runlock(struct rwlock *rw, const char *file, int line) void _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) { + struct turnstile *ts; #ifdef ADAPTIVE_RWLOCKS volatile struct thread *owner; #endif @@ -468,7 +471,7 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); while (!_rw_write_lock(rw, tid)) { - turnstile_lock(&rw->lock_object); + ts = turnstile_trywait(&rw->lock_object); v = rw->rw_lock; /* @@ -476,7 +479,7 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) * turnstile chain lock, try again. */ if (v == RW_UNLOCKED) { - turnstile_release(&rw->lock_object); + turnstile_cancel(ts); cpu_spinwait(); continue; } @@ -495,12 +498,12 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) if (atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED | RW_LOCK_WRITE_WAITERS, tid | RW_LOCK_WRITE_WAITERS)) { - turnstile_claim(&rw->lock_object); + turnstile_claim(ts); CTR2(KTR_LOCK, "%s: %p claimed by new writer", __func__, rw); break; } - turnstile_release(&rw->lock_object); + turnstile_cancel(ts); cpu_spinwait(); continue; } @@ -513,7 +516,7 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) if (!(v & RW_LOCK_WRITE_WAITERS)) { if (!atomic_cmpset_ptr(&rw->rw_lock, v, v | RW_LOCK_WRITE_WAITERS)) { - turnstile_release(&rw->lock_object); + turnstile_cancel(ts); cpu_spinwait(); continue; } @@ -530,7 +533,7 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) */ owner = (struct thread *)RW_OWNER(v); if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) { - turnstile_release(&rw->lock_object); + turnstile_cancel(ts); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); @@ -548,8 +551,7 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); - turnstile_wait(&rw->lock_object, rw_owner(rw), - TS_EXCLUSIVE_QUEUE); + turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", __func__, rw); @@ -574,7 +576,7 @@ _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); - turnstile_lock(&rw->lock_object); + turnstile_chain_lock(&rw->lock_object); ts = turnstile_lookup(&rw->lock_object); #ifdef ADAPTIVE_RWLOCKS @@ -587,7 +589,7 @@ _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) atomic_store_rel_ptr(&rw->rw_lock, RW_UNLOCKED); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p no sleepers", __func__, rw); - turnstile_release(&rw->lock_object); + turnstile_chain_unlock(&rw->lock_object); return; } #else @@ -640,6 +642,7 @@ _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) CTR2(KTR_LOCK, "%s: %p no sleepers 2", __func__, rw); atomic_store_rel_ptr(&rw->rw_lock, v); turnstile_disown(ts); + turnstile_chain_unlock(&rw->lock_object); return; } #endif @@ -651,6 +654,7 @@ _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) turnstile_broadcast(ts, queue); atomic_store_rel_ptr(&rw->rw_lock, v); turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); + turnstile_chain_unlock(&rw->lock_object); } /* @@ -662,6 +666,7 @@ int _rw_try_upgrade(struct rwlock *rw, const char *file, int line) { uintptr_t v, tid; + struct turnstile *ts; int success; KASSERT(rw->rw_lock != RW_DESTROYED, @@ -686,7 +691,7 @@ _rw_try_upgrade(struct rwlock *rw, const char *file, int line) * Ok, we think we have write waiters, so lock the * turnstile. */ - turnstile_lock(&rw->lock_object); + ts = turnstile_trywait(&rw->lock_object); /* * Try to switch from one reader to a writer again. This time @@ -705,9 +710,9 @@ _rw_try_upgrade(struct rwlock *rw, const char *file, int line) #else if (success && v) #endif - turnstile_claim(&rw->lock_object); + turnstile_claim(ts); else - turnstile_release(&rw->lock_object); + turnstile_cancel(ts); out: LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); if (success) @@ -745,7 +750,7 @@ _rw_downgrade(struct rwlock *rw, const char *file, int line) * Ok, we think we have waiters, so lock the turnstile so we can * read the waiter flags without any races. */ - turnstile_lock(&rw->lock_object); + turnstile_chain_lock(&rw->lock_object); v = rw->rw_lock; MPASS(v & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)); @@ -779,12 +784,9 @@ _rw_downgrade(struct rwlock *rw, const char *file, int line) (v & RW_LOCK_WRITE_WAITERS)); if (v & RW_LOCK_READ_WAITERS) turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); -#ifdef ADAPTIVE_RWLOCKS - else if (ts == NULL) - turnstile_release(&rw->lock_object); -#endif - else + else if (ts) turnstile_disown(ts); + turnstile_chain_unlock(&rw->lock_object); out: LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); } diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c index 4435751..f8d1931 100644 --- a/sys/kern/subr_turnstile.c +++ b/sys/kern/subr_turnstile.c @@ -116,6 +116,7 @@ __FBSDID("$FreeBSD$"); * q - td_contested lock */ struct turnstile { + struct mtx ts_lock; /* Spin lock for self. */ struct threadqueue ts_blocked[2]; /* (c + q) Blocked threads. */ struct threadqueue ts_pending; /* (c) Pending threads. */ LIST_ENTRY(turnstile) ts_hash; /* (c) Chain and free list. */ @@ -162,6 +163,7 @@ static void turnstile_setowner(struct turnstile *ts, struct thread *owner); static void turnstile_dtor(void *mem, int size, void *arg); #endif static int turnstile_init(void *mem, int size, int flags); +static void turnstile_fini(void *mem, int size); /* * Walks the chain of turnstiles and their owners to propagate the priority @@ -171,13 +173,20 @@ static int turnstile_init(void *mem, int size, int flags); static void propagate_priority(struct thread *td) { - struct turnstile_chain *tc; struct turnstile *ts; int pri; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); pri = td->td_priority; ts = td->td_blocked; + MPASS(td->td_lock == &ts->ts_lock); + /* + * Grab a recursive lock on this turnstile chain so it stays locked + * for the whole operation. The caller expects us to return with + * the original lock held. We only ever lock down the chain so + * the lock order is constant. + */ + mtx_lock_spin(&ts->ts_lock); for (;;) { td = ts->ts_owner; @@ -186,9 +195,12 @@ propagate_priority(struct thread *td) * This might be a read lock with no owner. There's * not much we can do, so just bail. */ + mtx_unlock_spin(&ts->ts_lock); return; } + thread_lock_flags(td, MTX_DUPOK); + mtx_unlock_spin(&ts->ts_lock); MPASS(td->td_proc != NULL); MPASS(td->td_proc->p_magic == P_MAGIC); @@ -213,8 +225,10 @@ propagate_priority(struct thread *td) * If this thread already has higher priority than the * thread that is being blocked, we are finished. */ - if (td->td_priority <= pri) + if (td->td_priority <= pri) { + thread_unlock(td); return; + } /* * Bump this thread's priority. @@ -227,6 +241,7 @@ propagate_priority(struct thread *td) */ if (TD_IS_RUNNING(td) || TD_ON_RUNQ(td)) { MPASS(td->td_blocked == NULL); + thread_unlock(td); return; } @@ -251,15 +266,13 @@ propagate_priority(struct thread *td) */ ts = td->td_blocked; MPASS(ts != NULL); - tc = TC_LOOKUP(ts->ts_lockobj); - mtx_lock_spin(&tc->tc_lock); - + MPASS(td->td_lock == &ts->ts_lock); /* Resort td on the list if needed. */ if (!turnstile_adjust_thread(ts, td)) { - mtx_unlock_spin(&tc->tc_lock); + mtx_unlock_spin(&ts->ts_lock); return; } - mtx_unlock_spin(&tc->tc_lock); + /* The thread lock is released as ts lock above. */ } } @@ -270,17 +283,16 @@ propagate_priority(struct thread *td) static int turnstile_adjust_thread(struct turnstile *ts, struct thread *td) { - struct turnstile_chain *tc; struct thread *td1, *td2; int queue; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); MPASS(TD_ON_LOCK(td)); /* * This thread may not be blocked on this turnstile anymore * but instead might already be woken up on another CPU - * that is waiting on sched_lock in turnstile_unpend() to + * that is waiting on the thread lock in turnstile_unpend() to * finish waking this thread up. We can detect this case * by checking to see if this thread has been given a * turnstile by either turnstile_signal() or @@ -295,8 +307,7 @@ turnstile_adjust_thread(struct turnstile *ts, struct thread *td) * It needs to be moved if either its priority is lower than * the previous thread or higher than the next thread. */ - tc = TC_LOOKUP(ts->ts_lockobj); - mtx_assert(&tc->tc_lock, MA_OWNED); + MPASS(td->td_lock == &ts->ts_lock); td1 = TAILQ_PREV(td, threadqueue, td_lockq); td2 = TAILQ_NEXT(td, td_lockq); if ((td1 != NULL && td->td_priority < td1->td_priority) || @@ -385,9 +396,10 @@ init_turnstile0(void *dummy) turnstile_zone = uma_zcreate("TURNSTILE", sizeof(struct turnstile), #ifdef INVARIANTS - NULL, turnstile_dtor, turnstile_init, NULL, UMA_ALIGN_CACHE, 0); + NULL, turnstile_dtor, turnstile_init, turnstile_fini, + UMA_ALIGN_CACHE, 0); #else - NULL, NULL, turnstile_init, NULL, UMA_ALIGN_CACHE, 0); + NULL, NULL, turnstile_init, turnstile_fini, UMA_ALIGN_CACHE, 0); #endif thread0.td_turnstile = turnstile_alloc(); } @@ -400,10 +412,8 @@ SYSINIT(turnstile0, SI_SUB_LOCK, SI_ORDER_ANY, init_turnstile0, NULL); void turnstile_adjust(struct thread *td, u_char oldpri) { - struct turnstile_chain *tc; struct turnstile *ts; - mtx_assert(&sched_lock, MA_OWNED); MPASS(TD_ON_LOCK(td)); /* @@ -411,15 +421,12 @@ turnstile_adjust(struct thread *td, u_char oldpri) */ ts = td->td_blocked; MPASS(ts != NULL); - tc = TC_LOOKUP(ts->ts_lockobj); - mtx_lock_spin(&tc->tc_lock); + MPASS(td->td_lock == &ts->ts_lock); + mtx_assert(&ts->ts_lock, MA_OWNED); /* Resort the turnstile on the list. */ - if (!turnstile_adjust_thread(ts, td)) { - mtx_unlock_spin(&tc->tc_lock); + if (!turnstile_adjust_thread(ts, td)) return; - } - /* * If our priority was lowered and we are at the head of the * turnstile, then propagate our new priority up the chain. @@ -430,12 +437,8 @@ turnstile_adjust(struct thread *td, u_char oldpri) td->td_tsqueue == TS_SHARED_QUEUE); if (td == TAILQ_FIRST(&ts->ts_blocked[td->td_tsqueue]) && td->td_priority < oldpri) { - mtx_unlock_spin(&tc->tc_lock); - critical_enter(); propagate_priority(td); - critical_exit(); - } else - mtx_unlock_spin(&tc->tc_lock); + } } /* @@ -487,9 +490,19 @@ turnstile_init(void *mem, int size, int flags) TAILQ_INIT(&ts->ts_blocked[TS_SHARED_QUEUE]); TAILQ_INIT(&ts->ts_pending); LIST_INIT(&ts->ts_free); + mtx_init(&ts->ts_lock, "turnstile lock", NULL, MTX_SPIN | MTX_RECURSE); return (0); } +static void +turnstile_fini(void *mem, int size) +{ + struct turnstile *ts; + + ts = mem; + mtx_destroy(&ts->ts_lock); +} + /* * Get a turnstile for a new thread. */ @@ -514,12 +527,51 @@ turnstile_free(struct turnstile *ts) * Lock the turnstile chain associated with the specified lock. */ void -turnstile_lock(struct lock_object *lock) +turnstile_chain_lock(struct lock_object *lock) +{ + struct turnstile_chain *tc; + + tc = TC_LOOKUP(lock); + mtx_lock_spin(&tc->tc_lock); +} + +struct turnstile * +turnstile_trywait(struct lock_object *lock) { struct turnstile_chain *tc; + struct turnstile *ts; tc = TC_LOOKUP(lock); mtx_lock_spin(&tc->tc_lock); + LIST_FOREACH(ts, &tc->tc_turnstiles, ts_hash) + if (ts->ts_lockobj == lock) { + mtx_lock_spin(&ts->ts_lock); + return (ts); + } + + ts = curthread->td_turnstile; + MPASS(ts != NULL); + mtx_lock_spin(&ts->ts_lock); + KASSERT(ts->ts_lockobj == NULL, ("stale ts_lockobj pointer")); + ts->ts_lockobj = lock; + + return (ts); +} + +void +turnstile_cancel(struct turnstile *ts) +{ + struct turnstile_chain *tc; + struct lock_object *lock; + + mtx_assert(&ts->ts_lock, MA_OWNED); + + mtx_unlock_spin(&ts->ts_lock); + lock = ts->ts_lockobj; + if (ts == curthread->td_turnstile) + ts->ts_lockobj = NULL; + tc = TC_LOOKUP(lock); + mtx_unlock_spin(&tc->tc_lock); } /* @@ -536,8 +588,10 @@ turnstile_lookup(struct lock_object *lock) tc = TC_LOOKUP(lock); mtx_assert(&tc->tc_lock, MA_OWNED); LIST_FOREACH(ts, &tc->tc_turnstiles, ts_hash) - if (ts->ts_lockobj == lock) + if (ts->ts_lockobj == lock) { + mtx_lock_spin(&ts->ts_lock); return (ts); + } return (NULL); } @@ -545,7 +599,7 @@ turnstile_lookup(struct lock_object *lock) * Unlock the turnstile chain associated with a given lock. */ void -turnstile_release(struct lock_object *lock) +turnstile_chain_unlock(struct lock_object *lock) { struct turnstile_chain *tc; @@ -574,16 +628,13 @@ turnstile_first_waiter(struct turnstile *ts) * owner appropriately. */ void -turnstile_claim(struct lock_object *lock) +turnstile_claim(struct turnstile *ts) { - struct turnstile_chain *tc; - struct turnstile *ts; struct thread *td, *owner; + struct turnstile_chain *tc; - tc = TC_LOOKUP(lock); - mtx_assert(&tc->tc_lock, MA_OWNED); - ts = turnstile_lookup(lock); - MPASS(ts != NULL); + mtx_assert(&ts->ts_lock, MA_OWNED); + MPASS(ts != curthread->td_turnstile); owner = curthread; mtx_lock_spin(&td_contested_lock); @@ -593,15 +644,18 @@ turnstile_claim(struct lock_object *lock) td = turnstile_first_waiter(ts); MPASS(td != NULL); MPASS(td->td_proc->p_magic == P_MAGIC); - mtx_unlock_spin(&tc->tc_lock); + MPASS(td->td_lock == &ts->ts_lock); /* * Update the priority of the new owner if needed. */ - mtx_lock_spin(&sched_lock); + thread_lock(owner); if (td->td_priority < owner->td_priority) sched_lend_prio(owner, td->td_priority); - mtx_unlock_spin(&sched_lock); + thread_unlock(owner); + tc = TC_LOOKUP(ts->ts_lockobj); + mtx_unlock_spin(&ts->ts_lock); + mtx_unlock_spin(&tc->tc_lock); } /* @@ -611,31 +665,28 @@ turnstile_claim(struct lock_object *lock) * turnstile chain locked and will return with it unlocked. */ void -turnstile_wait(struct lock_object *lock, struct thread *owner, int queue) +turnstile_wait(struct turnstile *ts, struct thread *owner, int queue) { struct turnstile_chain *tc; - struct turnstile *ts; struct thread *td, *td1; + struct lock_object *lock; td = curthread; - tc = TC_LOOKUP(lock); - mtx_assert(&tc->tc_lock, MA_OWNED); - MPASS(td->td_turnstile != NULL); + mtx_assert(&ts->ts_lock, MA_OWNED); if (queue == TS_SHARED_QUEUE) MPASS(owner != NULL); if (owner) MPASS(owner->td_proc->p_magic == P_MAGIC); MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE); - /* Look up the turnstile associated with the lock 'lock'. */ - ts = turnstile_lookup(lock); - /* * If the lock does not already have a turnstile, use this thread's * turnstile. Otherwise insert the current thread into the * turnstile already in use by this lock. */ - if (ts == NULL) { + tc = TC_LOOKUP(ts->ts_lockobj); + if (ts == td->td_turnstile) { + mtx_assert(&tc->tc_lock, MA_OWNED); #ifdef TURNSTILE_PROFILING tc->tc_depth++; if (tc->tc_depth > tc->tc_max_depth) { @@ -644,7 +695,7 @@ turnstile_wait(struct lock_object *lock, struct thread *owner, int queue) turnstile_max_depth = tc->tc_max_depth; } #endif - ts = td->td_turnstile; + tc = TC_LOOKUP(ts->ts_lockobj); LIST_INSERT_HEAD(&tc->tc_turnstiles, ts, ts_hash); KASSERT(TAILQ_EMPTY(&ts->ts_pending), ("thread's turnstile has pending threads")); @@ -654,8 +705,7 @@ turnstile_wait(struct lock_object *lock, struct thread *owner, int queue) ("thread's turnstile has shared waiters")); KASSERT(LIST_EMPTY(&ts->ts_free), ("thread's turnstile has a non-empty free list")); - KASSERT(ts->ts_lockobj == NULL, ("stale ts_lockobj pointer")); - ts->ts_lockobj = lock; + MPASS(ts->ts_lockobj != NULL); mtx_lock_spin(&td_contested_lock); TAILQ_INSERT_TAIL(&ts->ts_blocked[queue], td, td_lockq); turnstile_setowner(ts, owner); @@ -674,58 +724,31 @@ turnstile_wait(struct lock_object *lock, struct thread *owner, int queue) MPASS(td->td_turnstile != NULL); LIST_INSERT_HEAD(&ts->ts_free, td->td_turnstile, ts_hash); } + thread_lock(td); + thread_lock_set(td, &ts->ts_lock); td->td_turnstile = NULL; - mtx_unlock_spin(&tc->tc_lock); - - mtx_lock_spin(&sched_lock); - /* - * Handle race condition where a thread on another CPU that owns - * lock 'lock' could have woken us in between us dropping the - * turnstile chain lock and acquiring the sched_lock. - */ - if (td->td_flags & TDF_TSNOBLOCK) { - td->td_flags &= ~TDF_TSNOBLOCK; - mtx_unlock_spin(&sched_lock); - return; - } - -#ifdef notyet - /* - * If we're borrowing an interrupted thread's VM context, we - * must clean up before going to sleep. - */ - if (td->td_ithd != NULL) { - struct ithd *it = td->td_ithd; - - if (it->it_interrupted) { - if (LOCK_LOG_TEST(lock, 0)) - CTR3(KTR_LOCK, "%s: %p interrupted %p", - __func__, it, it->it_interrupted); - intr_thd_fixup(it); - } - } -#endif /* Save who we are blocked on and switch. */ + lock = ts->ts_lockobj; td->td_tsqueue = queue; td->td_blocked = ts; td->td_lockname = lock->lo_name; TD_SET_LOCK(td); - critical_enter(); + mtx_unlock_spin(&tc->tc_lock); propagate_priority(td); - critical_exit(); if (LOCK_LOG_TEST(lock, 0)) CTR4(KTR_LOCK, "%s: td %d blocked on [%p] %s", __func__, td->td_tid, lock, lock->lo_name); + MPASS(td->td_lock == &ts->ts_lock); + SCHED_STAT_INC(switch_turnstile); mi_switch(SW_VOL, NULL); if (LOCK_LOG_TEST(lock, 0)) CTR4(KTR_LOCK, "%s: td %d free from blocked on [%p] %s", __func__, td->td_tid, lock, lock->lo_name); - - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } /* @@ -740,11 +763,10 @@ turnstile_signal(struct turnstile *ts, int queue) int empty; MPASS(ts != NULL); + mtx_assert(&ts->ts_lock, MA_OWNED); MPASS(curthread->td_proc->p_magic == P_MAGIC); MPASS(ts->ts_owner == curthread || (queue == TS_EXCLUSIVE_QUEUE && ts->ts_owner == NULL)); - tc = TC_LOOKUP(ts->ts_lockobj); - mtx_assert(&tc->tc_lock, MA_OWNED); MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE); /* @@ -766,6 +788,8 @@ turnstile_signal(struct turnstile *ts, int queue) empty = TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]) && TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]); if (empty) { + tc = TC_LOOKUP(ts->ts_lockobj); + mtx_assert(&tc->tc_lock, MA_OWNED); MPASS(LIST_EMPTY(&ts->ts_free)); #ifdef TURNSTILE_PROFILING tc->tc_depth--; @@ -791,9 +815,14 @@ turnstile_broadcast(struct turnstile *ts, int queue) struct thread *td; MPASS(ts != NULL); + mtx_assert(&ts->ts_lock, MA_OWNED); MPASS(curthread->td_proc->p_magic == P_MAGIC); MPASS(ts->ts_owner == curthread || (queue == TS_EXCLUSIVE_QUEUE && ts->ts_owner == NULL)); + /* + * We must have the chain locked so that we can remove the empty + * turnstile from the hash queue. + */ tc = TC_LOOKUP(ts->ts_lockobj); mtx_assert(&tc->tc_lock, MA_OWNED); MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE); @@ -833,15 +862,14 @@ void turnstile_unpend(struct turnstile *ts, int owner_type) { TAILQ_HEAD( ,thread) pending_threads; - struct turnstile_chain *tc; + struct turnstile *nts; struct thread *td; u_char cp, pri; MPASS(ts != NULL); + mtx_assert(&ts->ts_lock, MA_OWNED); MPASS(ts->ts_owner == curthread || (owner_type == TS_SHARED_LOCK && ts->ts_owner == NULL)); - tc = TC_LOOKUP(ts->ts_lockobj); - mtx_assert(&tc->tc_lock, MA_OWNED); MPASS(!TAILQ_EMPTY(&ts->ts_pending)); /* @@ -855,7 +883,15 @@ turnstile_unpend(struct turnstile *ts, int owner_type) TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE])) ts->ts_lockobj = NULL; #endif - + /* + * Adjust the priority of curthread based on other contested + * locks it owns. Don't lower the priority below the base + * priority however. + */ + td = curthread; + pri = PRI_MAX; + thread_lock(td); + mtx_lock_spin(&td_contested_lock); /* * Remove the turnstile from this thread's list of contested locks * since this thread doesn't own it anymore. New threads will @@ -864,31 +900,17 @@ turnstile_unpend(struct turnstile *ts, int owner_type) * lock. */ if (ts->ts_owner != NULL) { - mtx_lock_spin(&td_contested_lock); ts->ts_owner = NULL; LIST_REMOVE(ts, ts_link); - mtx_unlock_spin(&td_contested_lock); } - critical_enter(); - mtx_unlock_spin(&tc->tc_lock); - - /* - * Adjust the priority of curthread based on other contested - * locks it owns. Don't lower the priority below the base - * priority however. - */ - td = curthread; - pri = PRI_MAX; - mtx_lock_spin(&sched_lock); - mtx_lock_spin(&td_contested_lock); - LIST_FOREACH(ts, &td->td_contested, ts_link) { - cp = turnstile_first_waiter(ts)->td_priority; + LIST_FOREACH(nts, &td->td_contested, ts_link) { + cp = turnstile_first_waiter(nts)->td_priority; if (cp < pri) pri = cp; } mtx_unlock_spin(&td_contested_lock); sched_unlend_prio(td, pri); - + thread_unlock(td); /* * Wake up all the pending threads. If a thread is not blocked * on a lock, then it is currently executing on another CPU in @@ -899,23 +921,21 @@ turnstile_unpend(struct turnstile *ts, int owner_type) while (!TAILQ_EMPTY(&pending_threads)) { td = TAILQ_FIRST(&pending_threads); TAILQ_REMOVE(&pending_threads, td, td_lockq); + thread_lock(td); + MPASS(td->td_lock == &ts->ts_lock); MPASS(td->td_proc->p_magic == P_MAGIC); - if (TD_ON_LOCK(td)) { - td->td_blocked = NULL; - td->td_lockname = NULL; + MPASS(TD_ON_LOCK(td)); + TD_CLR_LOCK(td); + MPASS(TD_CAN_RUN(td)); + td->td_blocked = NULL; + td->td_lockname = NULL; #ifdef INVARIANTS - td->td_tsqueue = 0xff; + td->td_tsqueue = 0xff; #endif - TD_CLR_LOCK(td); - MPASS(TD_CAN_RUN(td)); - sched_add(td, SRQ_BORING); - } else { - td->td_flags |= TDF_TSNOBLOCK; - MPASS(TD_IS_RUNNING(td) || TD_ON_RUNQ(td)); - } + sched_add(td, SRQ_BORING); + thread_unlock(td); } - critical_exit(); - mtx_unlock_spin(&sched_lock); + mtx_unlock_spin(&ts->ts_lock); } /* @@ -925,14 +945,12 @@ turnstile_unpend(struct turnstile *ts, int owner_type) void turnstile_disown(struct turnstile *ts) { - struct turnstile_chain *tc; struct thread *td; u_char cp, pri; MPASS(ts != NULL); + mtx_assert(&ts->ts_lock, MA_OWNED); MPASS(ts->ts_owner == curthread); - tc = TC_LOOKUP(ts->ts_lockobj); - mtx_assert(&tc->tc_lock, MA_OWNED); MPASS(TAILQ_EMPTY(&ts->ts_pending)); MPASS(!TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]) || !TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE])); @@ -947,7 +965,6 @@ turnstile_disown(struct turnstile *ts) ts->ts_owner = NULL; LIST_REMOVE(ts, ts_link); mtx_unlock_spin(&td_contested_lock); - mtx_unlock_spin(&tc->tc_lock); /* * Adjust the priority of curthread based on other contested @@ -956,7 +973,8 @@ turnstile_disown(struct turnstile *ts) */ td = curthread; pri = PRI_MAX; - mtx_lock_spin(&sched_lock); + thread_lock(td); + mtx_unlock_spin(&ts->ts_lock); mtx_lock_spin(&td_contested_lock); LIST_FOREACH(ts, &td->td_contested, ts_link) { cp = turnstile_first_waiter(ts)->td_priority; @@ -965,7 +983,7 @@ turnstile_disown(struct turnstile *ts) } mtx_unlock_spin(&td_contested_lock); sched_unlend_prio(td, pri); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } /* @@ -975,12 +993,10 @@ struct thread * turnstile_head(struct turnstile *ts, int queue) { #ifdef INVARIANTS - struct turnstile_chain *tc; MPASS(ts != NULL); MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE); - tc = TC_LOOKUP(ts->ts_lockobj); - mtx_assert(&tc->tc_lock, MA_OWNED); + mtx_assert(&ts->ts_lock, MA_OWNED); #endif return (TAILQ_FIRST(&ts->ts_blocked[queue])); } @@ -992,12 +1008,10 @@ int turnstile_empty(struct turnstile *ts, int queue) { #ifdef INVARIANTS - struct turnstile_chain *tc; MPASS(ts != NULL); MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE); - tc = TC_LOOKUP(ts->ts_lockobj); - mtx_assert(&tc->tc_lock, MA_OWNED); + mtx_assert(&ts->ts_lock, MA_OWNED); #endif return (TAILQ_EMPTY(&ts->ts_blocked[queue])); } -- cgit v1.1