Optimize sx locks to use simple atomic operations for the common cases of

obtaining and releasing shared and exclusive locks. The algorithms for manipulating the lock cookie are very similar to that rwlocks. This patch also adds support for exclusive locks using the same algorithm as mutexes. A new sx_init_flags() function has been added so that optional flags can be specified to alter a given locks behavior. The flags include SX_DUPOK, SX_NOWITNESS, SX_NOPROFILE, and SX_QUITE which are all identical in nature to the similar flags for mutexes. Adaptive spinning on select locks may be enabled by enabling the ADAPTIVE_SX kernel option. Only locks initialized with the SX_ADAPTIVESPIN flag via sx_init_flags() will adaptively spin. The common cases for sx_slock(), sx_sunlock(), sx_xlock(), and sx_xunlock() are now performed inline in non-debug kernels. As a result, <sys/sx.h> now requires <sys/lock.h> to be included prior to <sys/sx.h>. The new kernel option SX_NOINLINE can be used to disable the aforementioned inlining in non-debug kernels. The size of struct sx has changed, so the kernel ABI is probably greatly disturbed. MFC after: 1 month Submitted by: attilio Tested by: kris, pjd
author: jhb <jhb@FreeBSD.org> 2007-03-31 23:23:42 +0000
committer: jhb <jhb@FreeBSD.org> 2007-03-31 23:23:42 +0000
commit: b0b93a3c55b874a04a163db8dcf6af0b7e28b2e4 (patch)
tree: 6f71182be4d218a6130d92f4c3455591243f0019 /sys/kern/kern_sx.c
parent: 7f18b608045d45d3d89e8cbaa7710fcbbc4d7eab (diff)
download: FreeBSD-src-b0b93a3c55b874a04a163db8dcf6af0b7e28b2e4.zip
FreeBSD-src-b0b93a3c55b874a04a163db8dcf6af0b7e28b2e4.tar.gz
1 files changed, 682 insertions, 230 deletions
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 67e3ca7..e6f35eb 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -1,12 +1,14 @@
 /*-
- * Copyright (C) 2001 Jason Evans <jasone@freebsd.org>.  All rights reserved.
+ * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
+ * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
- *    the first lines of this file unmodified other than the possible 
+ *    the first lines of this file unmodified other than the possible
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
@@ -26,32 +28,88 @@
  */
 
 /*
- * Shared/exclusive locks.  This implementation assures deterministic lock
- * granting behavior, so that slocks and xlocks are interleaved.
+ * Shared/exclusive locks.  This implementation attempts to ensure
+ * deterministic lock granting behavior, so that slocks and xlocks are
+ * interleaved.
  *
  * Priority propagation will not generally raise the priority of lock holders,
  * so should not be relied upon in combination with sx locks.
  */
 
+#include "opt_adaptive_sx.h"
+#include "opt_ddb.h"
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include "opt_ddb.h"
-
 #include <sys/param.h>
-#include <sys/systm.h>
 #include <sys/ktr.h>
-#include <sys/linker_set.h>
-#include <sys/condvar.h>
 #include <sys/lock.h>
+#include <sys/lock_profile.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/sleepqueue.h>
 #include <sys/sx.h>
-#include <sys/lock_profile.h>
+#include <sys/systm.h>
+
+#ifdef ADAPTIVE_SX
+#include <machine/cpu.h>
+#endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
+#endif
+
+#if !defined(SMP) && defined(ADAPTIVE_SX)
+#error "You must have SMP to enable the ADAPTIVE_SX option"
+#endif
+
+/* Handy macros for sleep queues. */
+#define	SQ_EXCLUSIVE_QUEUE	0
+#define	SQ_SHARED_QUEUE		1
+
+/*
+ * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
+ * drop Giant anytime we have to sleep or if we adaptively spin.
+ */
+#define	GIANT_DECLARE							\
+	int _giantcnt = 0;						\
+	WITNESS_SAVE_DECL(Giant)					\
+
+#define	GIANT_SAVE() do {						\
+	if (mtx_owned(&Giant)) {					\
+		WITNESS_SAVE(&Giant.lock_object, Giant);		\
+		while (mtx_owned(&Giant)) {				\
+			_giantcnt++;					\
+			mtx_unlock(&Giant);				\
+		}							\
+	}								\
+} while (0)
+
+#define GIANT_RESTORE() do {						\
+	if (_giantcnt > 0) {						\
+		mtx_assert(&Giant, MA_NOTOWNED);			\
+		while (_giantcnt--)					\
+			mtx_lock(&Giant);				\
+		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
+	}								\
+} while (0)
+
+/*
+ * Returns true if an exclusive lock is recursed.  It curthread
+ * currently has an exclusive lock.
+ */
+#define	sx_recursed(sx)		((sx)->sx_recurse != 0)
+
+/*
+ * Return a pointer to the owning thread if the lock is exclusively
+ * locked.
+ */
+#define	sx_xholder(sx)							\
+	((sx)->sx_lock & SX_LOCK_SHARED ? NULL :			\
+	(struct thread *)SX_OWNER((sx)->sx_lock))
 
+#ifdef DDB
 static void	db_show_sx(struct lock_object *lock);
 #endif
 static void	lock_sx(struct lock_object *lock, int how);
@@ -89,7 +147,7 @@ unlock_sx(struct lock_object *lock)
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
-	sx_assert(sx, SX_LOCKED | LA_NOTRECURSED);
+	sx_assert(sx, SX_LOCKED | SX_NOTRECURSED);
 	if (sx_xlocked(sx)) {
 		sx_xunlock(sx);
 		return (1);
@@ -108,33 +166,33 @@ sx_sysinit(void *arg)
 }
 
 void
-sx_init(struct sx *sx, const char *description)
+sx_init_flags(struct sx *sx, const char *description, int opts)
 {
-
-	sx->sx_lock = mtx_pool_find(mtxpool_lockbuilder, sx);
-	sx->sx_cnt = 0;
-	cv_init(&sx->sx_shrd_cv, description);
-	sx->sx_shrd_wcnt = 0;
-	cv_init(&sx->sx_excl_cv, description);
-	sx->sx_excl_wcnt = 0;
-	sx->sx_xholder = NULL;
+	int flags;
+
+	flags = LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE;
+	if (opts & SX_DUPOK)
+		flags |= LO_DUPOK;
+	if (opts & SX_NOPROFILE)
+		flags |= LO_NOPROFILE;
+	if (!(opts & SX_NOWITNESS))
+		flags |= LO_WITNESS;
+	if (opts & SX_QUIET)
+		flags |= LO_QUIET;
+
+	flags |= opts & SX_ADAPTIVESPIN;
+	sx->sx_lock = SX_LOCK_UNLOCKED;
+	sx->sx_recurse = 0;
 	lock_profile_object_init(&sx->lock_object, &lock_class_sx, description);
-	lock_init(&sx->lock_object, &lock_class_sx, description, NULL,
-	    LO_WITNESS | LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE);
+	lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
 }
 
 void
 sx_destroy(struct sx *sx)
 {
 
-	KASSERT((sx->sx_cnt == 0 && sx->sx_shrd_wcnt == 0 && sx->sx_excl_wcnt ==
-	    0), ("%s (%s): holders or waiters\n", __func__,
-	    sx->lock_object.lo_name));
-
-	sx->sx_lock = NULL;
-	cv_destroy(&sx->sx_shrd_cv);
-	cv_destroy(&sx->sx_excl_cv);
-	
+	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
+	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
 	lock_profile_object_destroy(&sx->lock_object);
 	lock_destroy(&sx->lock_object);
 }
@@ -142,224 +200,592 @@ sx_destroy(struct sx *sx)
 void
 _sx_slock(struct sx *sx, const char *file, int line)
 {
-	uint64_t waittime = 0;
-	int contested = 0;
 
-	mtx_lock(sx->sx_lock);
-	KASSERT(sx->sx_xholder != curthread,
-	    ("%s (%s): slock while xlock is held @ %s:%d\n", __func__,
-	    sx->lock_object.lo_name, file, line));
+	MPASS(curthread != NULL);
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line);
-
-	/*
-	 * Loop in case we lose the race for lock acquisition.
-	 */
-	while (sx->sx_cnt < 0) {
-		sx->sx_shrd_wcnt++;
-		lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime);
-		cv_wait(&sx->sx_shrd_cv, sx->sx_lock);
-		sx->sx_shrd_wcnt--;
-	}
-
-	/* Acquire a shared lock. */
-	sx->sx_cnt++;
-
-        if (sx->sx_cnt == 1)
-		lock_profile_obtain_lock_success(&sx->lock_object, contested, waittime, file, line);
-
+	__sx_slock(sx, file, line);
 	LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
 	WITNESS_LOCK(&sx->lock_object, 0, file, line);
 	curthread->td_locks++;
-
-	mtx_unlock(sx->sx_lock);
 }
 
 int
 _sx_try_slock(struct sx *sx, const char *file, int line)
 {
+	uintptr_t x;
 
-	mtx_lock(sx->sx_lock);
-	if (sx->sx_cnt >= 0) {
-		sx->sx_cnt++;
+	x = sx->sx_lock;
+	if ((x & SX_LOCK_SHARED) && atomic_cmpset_acq_ptr(&sx->sx_lock, x,
+	    x + SX_ONE_SHARER)) {
 		LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
 		WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
 		curthread->td_locks++;
-		mtx_unlock(sx->sx_lock);
 		return (1);
-	} else {
-		LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
-		mtx_unlock(sx->sx_lock);
-		return (0);
 	}
+
+	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
+	return (0);
 }
 
 void
 _sx_xlock(struct sx *sx, const char *file, int line)
 {
-	int contested = 0;
-	uint64_t waittime = 0;
 
-	mtx_lock(sx->sx_lock);
-
-	/*
-	 * With sx locks, we're absolutely not permitted to recurse on
-	 * xlocks, as it is fatal (deadlock). Normally, recursion is handled
-	 * by WITNESS, but as it is not semantically correct to hold the
-	 * xlock while in here, we consider it API abuse and put it under
-	 * INVARIANTS.
-	 */
-	KASSERT(sx->sx_xholder != curthread,
-	    ("%s (%s): xlock already held @ %s:%d", __func__,
-	    sx->lock_object.lo_name, file, line));
+	MPASS(curthread != NULL);
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line);
-
-	/* Loop in case we lose the race for lock acquisition. */
-	while (sx->sx_cnt != 0) {
-		sx->sx_excl_wcnt++;
-		lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime);
-		cv_wait(&sx->sx_excl_cv, sx->sx_lock);
-		sx->sx_excl_wcnt--;
-	}
-
-	MPASS(sx->sx_cnt == 0);
-
-	/* Acquire an exclusive lock. */
-	sx->sx_cnt--;
-	sx->sx_xholder = curthread;
-
-	lock_profile_obtain_lock_success(&sx->lock_object, contested, waittime, file, line);
-	LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, 0, file, line);
+	__sx_xlock(sx, curthread, file, line);
+	LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse, file, line);
 	WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 	curthread->td_locks++;
-
-	mtx_unlock(sx->sx_lock);
 }
 
 int
 _sx_try_xlock(struct sx *sx, const char *file, int line)
 {
+	int rval;
 
-	mtx_lock(sx->sx_lock);
-	if (sx->sx_cnt == 0) {
-		sx->sx_cnt--;
-		sx->sx_xholder = curthread;
-		LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, 1, file, line);
-		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file,
-		    line);
+	MPASS(curthread != NULL);
+
+	if (sx_xlocked(sx)) {
+		sx->sx_recurse++;
+		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
+		rval = 1;
+	} else
+		rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED,
+		    (uintptr_t)curthread);
+	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
+	if (rval) {
+		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
+		    file, line);
 		curthread->td_locks++;
-		mtx_unlock(sx->sx_lock);
-		return (1);
-	} else {
-		LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, 0, file, line);
-		mtx_unlock(sx->sx_lock);
-		return (0);
 	}
+
+	return (rval);
 }
 
 void
 _sx_sunlock(struct sx *sx, const char *file, int line)
 {
-	_sx_assert(sx, SX_SLOCKED, file, line);
-	mtx_lock(sx->sx_lock);
 
+	MPASS(curthread != NULL);
+	_sx_assert(sx, SX_SLOCKED, file, line);
 	curthread->td_locks--;
 	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
+	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
+	lock_profile_release_lock(&sx->lock_object);
+	__sx_sunlock(sx, file, line);
+}
+
+void
+_sx_xunlock(struct sx *sx, const char *file, int line)
+{
+
+	MPASS(curthread != NULL);
+	_sx_assert(sx, SX_XLOCKED, file, line);
+	curthread->td_locks--;
+	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
+	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
+	    line);
+	lock_profile_release_lock(&sx->lock_object);
+	__sx_xunlock(sx, curthread, file, line);
+}
+
+/*
+ * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
+ * This will only succeed if this thread holds a single shared lock.
+ * Return 1 if if the upgrade succeed, 0 otherwise.
+ */
+int
+_sx_try_upgrade(struct sx *sx, const char *file, int line)
+{
+	uintptr_t x;
+	int success;
+
+	_sx_assert(sx, SX_SLOCKED, file, line);
+
+	/*
+	 * Try to switch from one shared lock to an exclusive lock.  We need
+	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
+	 * we will wake up the exclusive waiters when we drop the lock.
+	 */
+	x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
+	success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
+	    (uintptr_t)curthread | x);
+	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
+	if (success)
+		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
+		    file, line);
+	return (success);
+}
+
+/*
+ * Downgrade an unrecursed exclusive lock into a single shared lock.
+ */
+void
+_sx_downgrade(struct sx *sx, const char *file, int line)
+{
+	uintptr_t x;
+
+	_sx_assert(sx, SX_XLOCKED | SX_NOTRECURSED, file, line);
+#ifndef INVARIANTS
+	if (sx_recursed(sx))
+		panic("downgrade of a recursed lock");
+#endif
 
-	/* Release. */
-	sx->sx_cnt--;
+	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
 
-	if (sx->sx_cnt == 0) {
-		lock_profile_release_lock(&sx->lock_object);
+	/*
+	 * Try to switch from an exclusive lock with no shared waiters
+	 * to one sharer with no shared waiters.  If there are
+	 * exclusive waiters, we don't need to lock the sleep queue so
+	 * long as we preserve the flag.  We do one quick try and if
+	 * that fails we grab the sleepq lock to keep the flags from
+	 * changing and do it the slow way.
+	 *
+	 * We have to lock the sleep queue if there are shared waiters
+	 * so we can wake them up.
+	 */
+	x = sx->sx_lock;
+	if (!(x & SX_LOCK_SHARED_WAITERS) &&
+	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
+	    (x & SX_LOCK_EXCLUSIVE_WAITERS))) {
+		LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
+		return;
 	}
 
 	/*
-	 * If we just released the last shared lock, wake any waiters up, giving
-	 * exclusive lockers precedence.  In order to make sure that exclusive
-	 * lockers won't be blocked forever, don't wake shared lock waiters if
-	 * there are exclusive lock waiters.
+	 * Lock the sleep queue so we can read the waiters bits
+	 * without any races and wakeup any shared waiters.
 	 */
-	if (sx->sx_excl_wcnt > 0) {
-		if (sx->sx_cnt == 0)
-			cv_signal(&sx->sx_excl_cv);
-	} else if (sx->sx_shrd_wcnt > 0)
-		cv_broadcast(&sx->sx_shrd_cv);
+	sleepq_lock(&sx->lock_object);
 
-	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
+	/*
+	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
+	 * shared lock.  If there are any shared waiters, wake them up.
+	 */
+	x = sx->sx_lock;
+	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
+	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
+	if (x & SX_LOCK_SHARED_WAITERS)
+		sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, -1,
+		    SQ_SHARED_QUEUE);
+	else
+		sleepq_release(&sx->lock_object);
 
-	mtx_unlock(sx->sx_lock);
+	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
 }
 
+/*
+ * This function represents the so-called 'hard case' for sx_xlock
+ * operation.  All 'easy case' failures are redirected to this.  Note
+ * that ideally this would be a static function, but it needs to be
+ * accessible from at least sx.h.
+ */
 void
-_sx_xunlock(struct sx *sx, const char *file, int line)
+_sx_xlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
 {
-	_sx_assert(sx, SX_XLOCKED, file, line);
-	mtx_lock(sx->sx_lock);
-	MPASS(sx->sx_cnt == -1);
+	GIANT_DECLARE;
+#ifdef ADAPTIVE_SX
+	volatile struct thread *owner;
+#endif
+	uintptr_t x;
 
-	curthread->td_locks--;
-	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
+	/* If we already hold an exclusive lock, then recurse. */
+	if (sx_xlocked(sx)) {
+		sx->sx_recurse++;
+		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
+		return;
+	}
 
-	/* Release. */
-	sx->sx_cnt++;
-	sx->sx_xholder = NULL;
+	if (LOCK_LOG_TEST(&sx->lock_object, 0))
+		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
+		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
 
-	/*
-	 * Wake up waiters if there are any.  Give precedence to slock waiters.
-	 */
-	if (sx->sx_shrd_wcnt > 0)
-		cv_broadcast(&sx->sx_shrd_cv);
-	else if (sx->sx_excl_wcnt > 0)
-		cv_signal(&sx->sx_excl_cv);
+	while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
+#ifdef ADAPTIVE_SX
+		/*
+		 * If the lock is write locked and the owner is
+		 * running on another CPU, spin until the owner stops
+		 * running or the state of the lock changes.
+		 */
+		x = sx->sx_lock;
+		if (!(x & SX_LOCK_SHARED) &&
+		    (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
+			x = SX_OWNER(x);
+			owner = (struct thread *)x;
+			if (TD_IS_RUNNING(owner)) {
+				if (LOCK_LOG_TEST(&sx->lock_object, 0))
+					CTR3(KTR_LOCK,
+					    "%s: spinning on %p held by %p",
+					    __func__, sx, owner);
+				GIANT_SAVE();
+				while (SX_OWNER(sx->sx_lock) == x &&
+				    TD_IS_RUNNING(owner))
+					cpu_spinwait();
+				continue;
+			}
+		}
+#endif
 
-	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, 0, file, line);
+		sleepq_lock(&sx->lock_object);
+		x = sx->sx_lock;
 
-	lock_profile_release_lock(&sx->lock_object);
-	mtx_unlock(sx->sx_lock);
+		/*
+		 * If the lock was released while spinning on the
+		 * sleep queue chain lock, try again.
+		 */
+		if (x == SX_LOCK_UNLOCKED) {
+			sleepq_release(&sx->lock_object);
+			continue;
+		}
+
+#ifdef ADAPTIVE_SX
+		/*
+		 * The current lock owner might have started executing
+		 * on another CPU (or the lock could have changed
+		 * owners) while we were waiting on the sleep queue
+		 * chain lock.  If so, drop the sleep queue lock and try
+		 * again.
+		 */
+		if (!(x & SX_LOCK_SHARED) &&
+		    (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
+			owner = (struct thread *)SX_OWNER(x);
+			if (TD_IS_RUNNING(owner)) {
+				sleepq_release(&sx->lock_object);
+				continue;
+			}
+		}
+#endif
+
+		/*
+		 * If an exclusive lock was released with both shared
+		 * and exclusive waiters and a shared waiter hasn't
+		 * woken up and acquired the lock yet, sx_lock will be
+		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
+		 * If we see that value, try to acquire it once.  Note
+		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
+		 * as there are other exclusive waiters still.  If we
+		 * fail, restart the loop.
+		 */
+		if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
+			if (atomic_cmpset_acq_ptr(&sx->sx_lock,
+			    SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS,
+			    tid | SX_LOCK_EXCLUSIVE_WAITERS)) {
+				sleepq_release(&sx->lock_object);
+				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
+				    __func__, sx);
+				break;
+			}
+			sleepq_release(&sx->lock_object);
+			continue;
+		}
+
+		/*
+		 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
+		 * than loop back and retry.
+		 */
+		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
+			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
+			    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
+				sleepq_release(&sx->lock_object);
+				continue;
+			}
+			if (LOCK_LOG_TEST(&sx->lock_object, 0))
+				CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
+				    __func__, sx);
+		}
+
+		/*
+		 * Since we have been unable to acquire the exclusive
+		 * lock and the exclusive waiters flag is set, we have
+		 * to sleep.
+		 */
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
+			    __func__, sx);
+
+		GIANT_SAVE();
+		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
+		    SLEEPQ_SX, SQ_EXCLUSIVE_QUEUE);
+		sleepq_wait(&sx->lock_object);
+
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
+			    __func__, sx);
+	}
+
+	GIANT_RESTORE();
 }
 
-int
-_sx_try_upgrade(struct sx *sx, const char *file, int line)
+/*
+ * This function represents the so-called 'hard case' for sx_xunlock
+ * operation.  All 'easy case' failures are redirected to this.  Note
+ * that ideally this would be a static function, but it needs to be
+ * accessible from at least sx.h.
+ */
+void
+_sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
 {
+	uintptr_t x;
+	int queue;
 
-	_sx_assert(sx, SX_SLOCKED, file, line);
-	mtx_lock(sx->sx_lock);
+	MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
 
-	if (sx->sx_cnt == 1) {
-		sx->sx_cnt = -1;
-		sx->sx_xholder = curthread;
+	/* If the lock is recursed, then unrecurse one level. */
+	if (sx_xlocked(sx) && sx_recursed(sx)) {
+		if ((--sx->sx_recurse) == 0)
+			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
+		return;
+	}
+	MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
+	    SX_LOCK_EXCLUSIVE_WAITERS));
+	if (LOCK_LOG_TEST(&sx->lock_object, 0))
+		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
 
-		LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, 1, file, line);
-		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
-		    file, line);
+	sleepq_lock(&sx->lock_object);
+	x = SX_LOCK_UNLOCKED;
 
-		mtx_unlock(sx->sx_lock);
-		return (1);
-	} else {
-		LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, 0, file, line);
-		mtx_unlock(sx->sx_lock);
-		return (0);
+	/*
+	 * The wake up algorithm here is quite simple and probably not
+	 * ideal.  It gives precedence to shared waiters if they are
+	 * present.  For this condition, we have to preserve the
+	 * state of the exclusive waiters flag.
+	 */
+	if (sx->sx_lock & SX_LOCK_SHARED_WAITERS) {
+		queue = SQ_SHARED_QUEUE;
+		x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS);
+	} else
+		queue = SQ_EXCLUSIVE_QUEUE;
+
+	/* Wake up all the waiters for the specific queue. */
+	if (LOCK_LOG_TEST(&sx->lock_object, 0))
+		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
+		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
+		    "exclusive");
+	atomic_store_rel_ptr(&sx->sx_lock, x);
+	sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, -1, queue);
+}
+
+/*
+ * This function represents the so-called 'hard case' for sx_slock
+ * operation.  All 'easy case' failures are redirected to this.  Note
+ * that ideally this would be a static function, but it needs to be
+ * accessible from at least sx.h.
+ */
+void
+_sx_slock_hard(struct sx *sx, const char *file, int line)
+{
+	GIANT_DECLARE;
+#ifdef ADAPTIVE_SX
+	volatile struct thread *owner;
+#endif
+	uintptr_t x;
+
+	/*
+	 * As with rwlocks, we don't make any attempt to try to block
+	 * shared locks once there is an exclusive waiter.
+	 */
+	for (;;) {
+		x = sx->sx_lock;
+
+		/*
+		 * If no other thread has an exclusive lock then try to bump up
+		 * the count of sharers.  Since we have to preserve the state
+		 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
+		 * shared lock loop back and retry.
+		 */
+		if (x & SX_LOCK_SHARED) {
+			MPASS(!(x & SX_LOCK_SHARED_WAITERS));
+			if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
+			    x + SX_ONE_SHARER)) {
+				if (LOCK_LOG_TEST(&sx->lock_object, 0))
+					CTR4(KTR_LOCK,
+					    "%s: %p succeed %p -> %p", __func__,
+					    sx, (void *)x,
+					    (void *)(x + SX_ONE_SHARER));
+				break;
+			}
+			continue;
+		}
+
+#ifdef ADAPTIVE_SX
+		/*
+		 * If the owner is running on another CPU, spin until
+		 * the owner stops running or the state of the lock
+		 * changes.
+		 */
+		else if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) {
+			x = SX_OWNER(x);
+			owner = (struct thread *)x;
+			if (TD_IS_RUNNING(owner)) {
+				if (LOCK_LOG_TEST(&sx->lock_object, 0))
+					CTR3(KTR_LOCK,
+					    "%s: spinning on %p held by %p",
+					    __func__, sx, owner);
+				GIANT_SAVE();
+				while (SX_OWNER(sx->sx_lock) == x &&
+				    TD_IS_RUNNING(owner))
+					cpu_spinwait();
+				continue;
+			}
+		}
+#endif
+
+		/*
+		 * Some other thread already has an exclusive lock, so
+		 * start the process of blocking.
+		 */
+		sleepq_lock(&sx->lock_object);
+		x = sx->sx_lock;
+
+		/*
+		 * The lock could have been released while we spun.
+		 * In this case loop back and retry.
+		 */
+		if (x & SX_LOCK_SHARED) {
+			sleepq_release(&sx->lock_object);
+			continue;
+		}
+
+#ifdef ADAPTIVE_SX
+		/*
+		 * If the owner is running on another CPU, spin until
+		 * the owner stops running or the state of the lock
+		 * changes.
+		 */
+		if (!(x & SX_LOCK_SHARED) &&
+		    (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
+			owner = (struct thread *)SX_OWNER(x);
+			if (TD_IS_RUNNING(owner)) {
+				sleepq_release(&sx->lock_object);
+				continue;
+			}
+		}
+#endif
+
+		/*
+		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
+		 * fail to set it drop the sleep queue lock and loop
+		 * back.
+		 */
+		if (!(x & SX_LOCK_SHARED_WAITERS)) {
+			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
+			    x | SX_LOCK_SHARED_WAITERS)) {
+				sleepq_release(&sx->lock_object);
+				continue;
+			}
+			if (LOCK_LOG_TEST(&sx->lock_object, 0))
+				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
+				    __func__, sx);
+		}
+
+		/*
+		 * Since we have been unable to acquire the shared lock,
+		 * we have to sleep.
+		 */
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
+			    __func__, sx);
+
+		GIANT_SAVE();
+		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
+		    SLEEPQ_SX, SQ_SHARED_QUEUE);
+		sleepq_wait(&sx->lock_object);
+
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
+			    __func__, sx);
 	}
+
+	GIANT_RESTORE();
 }
 
+/*
+ * This function represents the so-called 'hard case' for sx_sunlock
+ * operation.  All 'easy case' failures are redirected to this.  Note
+ * that ideally this would be a static function, but it needs to be
+ * accessible from at least sx.h.
+ */
 void
-_sx_downgrade(struct sx *sx, const char *file, int line)
+_sx_sunlock_hard(struct sx *sx, const char *file, int line)
 {
+	uintptr_t x;
 
-	_sx_assert(sx, SX_XLOCKED, file, line);
-	mtx_lock(sx->sx_lock);
-	MPASS(sx->sx_cnt == -1);
+	for (;;) {
+		x = sx->sx_lock;
 
-	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
+		/*
+		 * We should never have sharers while at least one thread
+		 * holds a shared lock.
+		 */
+		KASSERT(!(x & SX_LOCK_SHARED_WAITERS),
+		    ("%s: waiting sharers", __func__));
 
-	sx->sx_cnt = 1;
-	sx->sx_xholder = NULL;
-        if (sx->sx_shrd_wcnt > 0)
-                cv_broadcast(&sx->sx_shrd_cv);
+		/*
+		 * See if there is more than one shared lock held.  If
+		 * so, just drop one and return.
+		 */
+		if (SX_SHARERS(x) > 1) {
+			if (atomic_cmpset_ptr(&sx->sx_lock, x,
+			    x - SX_ONE_SHARER)) {
+				if (LOCK_LOG_TEST(&sx->lock_object, 0))
+					CTR4(KTR_LOCK,
+					    "%s: %p succeeded %p -> %p",
+					    __func__, sx, (void *)x,
+					    (void *)(x - SX_ONE_SHARER));
+				break;
+			}
+			continue;
+		}
 
-	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
+		/*
+		 * If there aren't any waiters for an exclusive lock,
+		 * then try to drop it quickly.
+		 */
+		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
+			MPASS(x == SX_SHARERS_LOCK(1));
+			if (atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1),
+			    SX_LOCK_UNLOCKED)) {
+				if (LOCK_LOG_TEST(&sx->lock_object, 0))
+					CTR2(KTR_LOCK, "%s: %p last succeeded",
+					    __func__, sx);
+				break;
+			}
+			continue;
+		}
+
+		/*
+		 * At this point, there should just be one sharer with
+		 * exclusive waiters.
+		 */
+		MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
 
-	mtx_unlock(sx->sx_lock);
+		sleepq_lock(&sx->lock_object);
+
+		/*
+		 * Wake up semantic here is quite simple:
+		 * Just wake up all the exclusive waiters.
+		 * Note that the state of the lock could have changed,
+		 * so if it fails loop back and retry.
+		 */
+		if (!atomic_cmpset_ptr(&sx->sx_lock,
+		    SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS,
+		    SX_LOCK_UNLOCKED)) {
+			sleepq_release(&sx->lock_object);
+			continue;
+		}
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
+			    "exclusive queue", __func__, sx);
+		sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, -1,
+		    SQ_EXCLUSIVE_QUEUE);
+		break;
+	}
 }
 
 #ifdef INVARIANT_SUPPORT
@@ -375,45 +801,76 @@ _sx_downgrade(struct sx *sx, const char *file, int line)
 void
 _sx_assert(struct sx *sx, int what, const char *file, int line)
 {
+#ifndef WITNESS
+	int slocked = 0;
+#endif
 
 	if (panicstr != NULL)
 		return;
 	switch (what) {
-	case SX_LOCKED:
-	case SX_LOCKED | LA_NOTRECURSED:
 	case SX_SLOCKED:
+	case SX_SLOCKED | SX_NOTRECURSED:
+	case SX_SLOCKED | SX_RECURSED:
+#ifndef WITNESS
+		slocked = 1;
+		/* FALLTHROUGH */
+#endif
+	case SX_LOCKED:
+	case SX_LOCKED | SX_NOTRECURSED:
+	case SX_LOCKED | SX_RECURSED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
-		mtx_lock(sx->sx_lock);
-		if (sx->sx_cnt <= 0 &&
-		    (what == SX_SLOCKED || sx->sx_xholder != curthread))
+		/*
+		 * If some other thread has an exclusive lock or we
+		 * have one and are asserting a shared lock, fail.
+		 * Also, if no one has a lock at all, fail.
+		 */
+		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
+		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
+		    sx_xholder(sx) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
-			    sx->lock_object.lo_name, (what == SX_SLOCKED) ?
-			    "share " : "", file, line);
-		mtx_unlock(sx->sx_lock);
+			    sx->lock_object.lo_name, slocked ? "share " : "",
+			    file, line);
+
+		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
+			if (sx_recursed(sx)) {
+				if (what & SX_NOTRECURSED)
+					panic("Lock %s recursed @ %s:%d\n",
+					    sx->lock_object.lo_name, file,
+					    line);
+			} else if (what & SX_RECURSED)
+				panic("Lock %s not recursed @ %s:%d\n",
+				    sx->lock_object.lo_name, file, line);
+		}
 #endif
 		break;
 	case SX_XLOCKED:
-		mtx_lock(sx->sx_lock);
-		if (sx->sx_xholder != curthread)
+	case SX_XLOCKED | SX_NOTRECURSED:
+	case SX_XLOCKED | SX_RECURSED:
+		if (sx_xholder(sx) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
-		mtx_unlock(sx->sx_lock);
+		if (sx_recursed(sx)) {
+			if (what & SX_NOTRECURSED)
+				panic("Lock %s recursed @ %s:%d\n",
+				    sx->lock_object.lo_name, file, line);
+		} else if (what & SX_RECURSED)
+			panic("Lock %s not recursed @ %s:%d\n",
+			    sx->lock_object.lo_name, file, line);
 		break;
 	case SX_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
-		 * We are able to check only exclusive lock here,
-		 * we cannot assert that *this* thread owns slock.
+		 * If we hold an exclusve lock fail.  We can't
+		 * reliably check to see if we hold a shared lock or
+		 * not.
 		 */
-		mtx_lock(sx->sx_lock);
-		if (sx->sx_xholder == curthread)
+		if (sx_xholder(sx) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
-		mtx_unlock(sx->sx_lock);
 #endif
 		break;
 	default:
@@ -424,7 +881,7 @@ _sx_assert(struct sx *sx, int what, const char *file, int line)
 #endif	/* INVARIANT_SUPPORT */
 
 #ifdef DDB
-void
+static void
 db_show_sx(struct lock_object *lock)
 {
 	struct thread *td;
@@ -433,16 +890,33 @@ db_show_sx(struct lock_object *lock)
 	sx = (struct sx *)lock;
 
 	db_printf(" state: ");
-	if (sx->sx_cnt < 0) {
-		td = sx->sx_xholder;
+	if (sx->sx_lock == SX_LOCK_UNLOCKED)
+		db_printf("UNLOCKED\n");
+	else if (sx->sx_lock & SX_LOCK_SHARED)
+		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
+	else {
+		td = sx_xholder(sx);
 		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm);
-	} else if (sx->sx_cnt > 0)
-		db_printf("SLOCK: %d locks\n", sx->sx_cnt);
-	else
-		db_printf("UNLOCKED\n");
-	db_printf(" waiters: %d shared, %d exclusive\n", sx->sx_shrd_wcnt,
-	    sx->sx_excl_wcnt);
+		if (sx_recursed(sx))
+			db_printf(" recursed: %d\n", sx->sx_recurse);
+	}
+
+	db_printf(" waiters: ");
+	switch(sx->sx_lock &
+	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
+	case SX_LOCK_SHARED_WAITERS:
+		db_printf("shared\n");
+		break;
+	case SX_LOCK_EXCLUSIVE_WAITERS:
+		db_printf("exclusive\n");
+		break;
+	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
+		db_printf("exclusive and shared\n");
+		break;
+	default:
+		db_printf("none\n");
+	}
 }
 
 /*
@@ -454,47 +928,25 @@ int
 sx_chain(struct thread *td, struct thread **ownerp)
 {
 	struct sx *sx;
-	struct cv *cv;
 
 	/*
-	 * First, see if it looks like td is blocked on a condition
-	 * variable.
+	 * Check to see if this thread is blocked on an sx lock.
+	 * First, we check the lock class.  If that is ok, then we
+	 * compare the lock name against the wait message.
 	 */
-	cv = td->td_wchan;
-	if (cv->cv_description != td->td_wmesg)
+	sx = td->td_wchan;
+	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
+	    sx->lock_object.lo_name != td->td_wmesg)
 		return (0);
 
-	/*
-	 * Ok, see if it looks like td is blocked on the exclusive
-	 * condition variable.
-	 */
-	sx = (struct sx *)((char *)cv - offsetof(struct sx, sx_excl_cv));
-	if (LOCK_CLASS(&sx->lock_object) == &lock_class_sx &&
-	    sx->sx_excl_wcnt > 0)
-		goto ok;
-
-	/*
-	 * Second, see if it looks like td is blocked on the shared
-	 * condition variable.
-	 */
-	sx = (struct sx *)((char *)cv - offsetof(struct sx, sx_shrd_cv));
-	if (LOCK_CLASS(&sx->lock_object) == &lock_class_sx &&
-	    sx->sx_shrd_wcnt > 0)
-		goto ok;
-
-	/* Doesn't seem to be an sx lock. */
-	return (0);
-
-ok:
 	/* We think we have an sx lock, so output some details. */
 	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
-	if (sx->sx_cnt >= 0) {
-		db_printf("SLOCK (count %d)\n", sx->sx_cnt);
-		*ownerp = NULL;
-	} else {
+	*ownerp = sx_xholder(sx);
+	if (sx->sx_lock & SX_LOCK_SHARED)
+		db_printf("SLOCK (count %ju)\n",
+		    (uintmax_t)SX_SHARERS(sx->sx_lock));
+	else
 		db_printf("XLOCK\n");
-		*ownerp = sx->sx_xholder;
-	}
 	return (1);
 }
 #endif
author	jhb <jhb@FreeBSD.org>	2007-03-31 23:23:42 +0000
committer	jhb <jhb@FreeBSD.org>	2007-03-31 23:23:42 +0000
commit	b0b93a3c55b874a04a163db8dcf6af0b7e28b2e4 (patch)
tree	6f71182be4d218a6130d92f4c3455591243f0019 /sys/kern/kern_sx.c
parent	7f18b608045d45d3d89e8cbaa7710fcbbc4d7eab (diff)
download	FreeBSD-src-b0b93a3c55b874a04a163db8dcf6af0b7e28b2e4.zip FreeBSD-src-b0b93a3c55b874a04a163db8dcf6af0b7e28b2e4.tar.gz