Add implementation of robust mutexes, hopefully close enough to the

intention of the POSIX IEEE Std 1003.1TM-2008/Cor 1-2013. A robust mutex is guaranteed to be cleared by the system upon either thread or process owner termination while the mutex is held. The next mutex locker is then notified about inconsistent mutex state and can execute (or abandon) corrective actions. The patch mostly consists of small changes here and there, adding neccessary checks for the inconsistent and abandoned conditions into existing paths. Additionally, the thread exit handler was extended to iterate over the userspace-maintained list of owned robust mutexes, unlocking and marking as terminated each of them. The list of owned robust mutexes cannot be maintained atomically synchronous with the mutex lock state (it is possible in kernel, but is too expensive). Instead, for the duration of lock or unlock operation, the current mutex is remembered in a special slot that is also checked by the kernel at thread termination. Kernel must be aware about the per-thread location of the heads of robust mutex lists and the current active mutex slot. When a thread touches a robust mutex for the first time, a new umtx op syscall is issued which informs about location of lists heads. The umtx sleep queues for PP and PI mutexes are split between non-robust and robust. Somewhat unrelated changes in the patch: 1. Style. 2. The fix for proper tdfind() call use in umtxq_sleep_pi() for shared pi mutexes. 3. Removal of the userspace struct pthread_mutex m_owner field. 4. The sysctl kern.ipc.umtx_vnode_persistent is added, which controls the lifetime of the shared mutex associated with a vnode' page. Reviewed by: jilles (previous version, supposedly the objection was fixed) Discussed with: brooks, Martin Simmons <martin@lispworks.com> (some aspects) Tested by: pho Sponsored by: The FreeBSD Foundation
author: kib <kib@FreeBSD.org> 2016-05-17 09:56:22 +0000
committer: kib <kib@FreeBSD.org> 2016-05-17 09:56:22 +0000
commit: 8da898f26c04f1b12f46ec60020d7d15d03799a9 (patch)
tree: d626a38beb7329b5a3aa09877b11a5ec03a6eb38 /lib/libthr/thread/thr_cond.c
parent: afc75dd440c351adf17eb82272dd3e3f62f97410 (diff)
download: FreeBSD-src-8da898f26c04f1b12f46ec60020d7d15d03799a9.zip
FreeBSD-src-8da898f26c04f1b12f46ec60020d7d15d03799a9.tar.gz
1 files changed, 63 insertions, 53 deletions
diff --git a/lib/libthr/thread/thr_cond.c b/lib/libthr/thread/thr_cond.c
index 0e37b70..4d9356a 100644
--- a/lib/libthr/thread/thr_cond.c
+++ b/lib/libthr/thread/thr_cond.c
@@ -188,46 +188,57 @@ _pthread_cond_destroy(pthread_cond_t *cond)
  */
 static int
 cond_wait_kernel(struct pthread_cond *cvp, struct pthread_mutex *mp,
-	const struct timespec *abstime, int cancel)
+    const struct timespec *abstime, int cancel)
 {
-	struct pthread	*curthread = _get_curthread();
-	int		recurse;
-	int		error, error2 = 0;
+	struct pthread *curthread;
+	int error, error2, recurse, robust;
+
+	curthread = _get_curthread();
+	robust = _mutex_enter_robust(curthread, mp);
 
 	error = _mutex_cv_detach(mp, &recurse);
-	if (error != 0)
+	if (error != 0) {
+		if (robust)
+			_mutex_leave_robust(curthread, mp);
 		return (error);
+	}
 
-	if (cancel) {
+	if (cancel)
 		_thr_cancel_enter2(curthread, 0);
-		error = _thr_ucond_wait((struct ucond *)&cvp->__has_kern_waiters,
-			(struct umutex *)&mp->m_lock, abstime,
-			CVWAIT_ABSTIME|CVWAIT_CLOCKID);
+	error = _thr_ucond_wait((struct ucond *)&cvp->__has_kern_waiters,
+	    (struct umutex *)&mp->m_lock, abstime, CVWAIT_ABSTIME |
+	    CVWAIT_CLOCKID);
+	if (cancel)
 		_thr_cancel_leave(curthread, 0);
-	} else {
-		error = _thr_ucond_wait((struct ucond *)&cvp->__has_kern_waiters,
-			(struct umutex *)&mp->m_lock, abstime,
-			CVWAIT_ABSTIME|CVWAIT_CLOCKID);
-	}
 
 	/*
 	 * Note that PP mutex and ROBUST mutex may return
 	 * interesting error codes.
 	 */
 	if (error == 0) {
-		error2 = _mutex_cv_lock(mp, recurse);
+		error2 = _mutex_cv_lock(mp, recurse, true);
 	} else if (error == EINTR || error == ETIMEDOUT) {
-		error2 = _mutex_cv_lock(mp, recurse);
+		error2 = _mutex_cv_lock(mp, recurse, true);
+		/*
+		 * Do not do cancellation on EOWNERDEAD there.  The
+		 * cancellation cleanup handler will use the protected
+		 * state and unlock the mutex without making the state
+		 * consistent and the state will be unrecoverable.
+		 */
 		if (error2 == 0 && cancel)
 			_thr_testcancel(curthread);
+
 		if (error == EINTR)
 			error = 0;
 	} else {
 		/* We know that it didn't unlock the mutex. */
-		error2 = _mutex_cv_attach(mp, recurse);
-		if (error2 == 0 && cancel)
+		_mutex_cv_attach(mp, recurse);
+		if (cancel)
 			_thr_testcancel(curthread);
+		error2 = 0;
 	}
+	if (robust)
+		_mutex_leave_robust(curthread, mp);
 	return (error2 != 0 ? error2 : error);
 }
 
@@ -240,14 +251,13 @@ cond_wait_kernel(struct pthread_cond *cvp, struct pthread_mutex *mp,
 
 static int
 cond_wait_user(struct pthread_cond *cvp, struct pthread_mutex *mp,
-	const struct timespec *abstime, int cancel)
+    const struct timespec *abstime, int cancel)
 {
-	struct pthread	*curthread = _get_curthread();
+	struct pthread *curthread;
 	struct sleepqueue *sq;
-	int	recurse;
-	int	error;
-	int	defered;
+	int deferred, error, error2, recurse;
 
+	curthread = _get_curthread();
 	if (curthread->wchan != NULL)
 		PANIC("thread was already on queue.");
 
@@ -260,32 +270,31 @@ cond_wait_user(struct pthread_cond *cvp, struct pthread_mutex *mp,
 	 * us to check it without locking in pthread_cond_signal().
 	 */
 	cvp->__has_user_waiters = 1; 
-	defered = 0;
-	(void)_mutex_cv_unlock(mp, &recurse, &defered);
+	deferred = 0;
+	(void)_mutex_cv_unlock(mp, &recurse, &deferred);
 	curthread->mutex_obj = mp;
 	_sleepq_add(cvp, curthread);
 	for(;;) {
 		_thr_clear_wake(curthread);
 		_sleepq_unlock(cvp);
-		if (defered) {
-			defered = 0;
+		if (deferred) {
+			deferred = 0;
 			if ((mp->m_lock.m_owner & UMUTEX_CONTESTED) == 0)
-				(void)_umtx_op_err(&mp->m_lock, UMTX_OP_MUTEX_WAKE2,
-					 mp->m_lock.m_flags, 0, 0);
+				(void)_umtx_op_err(&mp->m_lock,
+				    UMTX_OP_MUTEX_WAKE2, mp->m_lock.m_flags,
+				    0, 0);
 		}
 		if (curthread->nwaiter_defer > 0) {
 			_thr_wake_all(curthread->defer_waiters,
-				curthread->nwaiter_defer);
+			    curthread->nwaiter_defer);
 			curthread->nwaiter_defer = 0;
 		}
 
-		if (cancel) {
+		if (cancel)
 			_thr_cancel_enter2(curthread, 0);
-			error = _thr_sleep(curthread, cvp->__clock_id, abstime);
+		error = _thr_sleep(curthread, cvp->__clock_id, abstime);
+		if (cancel)
 			_thr_cancel_leave(curthread, 0);
-		} else {
-			error = _thr_sleep(curthread, cvp->__clock_id, abstime);
-		}
 
 		_sleepq_lock(cvp);
 		if (curthread->wchan == NULL) {
@@ -293,25 +302,26 @@ cond_wait_user(struct pthread_cond *cvp, struct pthread_mutex *mp,
 			break;
 		} else if (cancel && SHOULD_CANCEL(curthread)) {
 			sq = _sleepq_lookup(cvp);
-			cvp->__has_user_waiters = 
-				_sleepq_remove(sq, curthread);
+			cvp->__has_user_waiters = _sleepq_remove(sq, curthread);
 			_sleepq_unlock(cvp);
 			curthread->mutex_obj = NULL;
-			_mutex_cv_lock(mp, recurse);
+			error2 = _mutex_cv_lock(mp, recurse, false);
 			if (!THR_IN_CRITICAL(curthread))
 				_pthread_exit(PTHREAD_CANCELED);
 			else /* this should not happen */
-				return (0);
+				return (error2);
 		} else if (error == ETIMEDOUT) {
 			sq = _sleepq_lookup(cvp);
 			cvp->__has_user_waiters =
-				_sleepq_remove(sq, curthread);
+			    _sleepq_remove(sq, curthread);
 			break;
 		}
 	}
 	_sleepq_unlock(cvp);
 	curthread->mutex_obj = NULL;
-	_mutex_cv_lock(mp, recurse);
+	error2 = _mutex_cv_lock(mp, recurse, false);
+	if (error == 0)
+		error = error2;
 	return (error);
 }
 
@@ -338,12 +348,12 @@ cond_wait_common(pthread_cond_t *cond, pthread_mutex_t *mutex,
 		return (error);
 
 	if (curthread->attr.sched_policy != SCHED_OTHER ||
-	    (mp->m_lock.m_flags & (UMUTEX_PRIO_PROTECT|UMUTEX_PRIO_INHERIT|
-		USYNC_PROCESS_SHARED)) != 0 ||
+	    (mp->m_lock.m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT |
+	    USYNC_PROCESS_SHARED)) != 0 ||
 	    (cvp->__flags & USYNC_PROCESS_SHARED) != 0)
-		return cond_wait_kernel(cvp, mp, abstime, cancel);
+		return (cond_wait_kernel(cvp, mp, abstime, cancel));
 	else
-		return cond_wait_user(cvp, mp, abstime, cancel);
+		return (cond_wait_user(cvp, mp, abstime, cancel));
 }
 
 int
@@ -420,15 +430,15 @@ cond_signal_common(pthread_cond_t *cond)
 	td = _sleepq_first(sq);
 	mp = td->mutex_obj;
 	cvp->__has_user_waiters = _sleepq_remove(sq, td);
-	if (mp->m_owner == TID(curthread)) {
+	if (PMUTEX_OWNER_ID(mp) == TID(curthread)) {
 		if (curthread->nwaiter_defer >= MAX_DEFER_WAITERS) {
 			_thr_wake_all(curthread->defer_waiters,
-					curthread->nwaiter_defer);
+			    curthread->nwaiter_defer);
 			curthread->nwaiter_defer = 0;
 		}
 		curthread->defer_waiters[curthread->nwaiter_defer++] =
-			&td->wake_addr->value;
-		mp->m_flags |= PMUTEX_FLAG_DEFERED;
+		    &td->wake_addr->value;
+		mp->m_flags |= PMUTEX_FLAG_DEFERRED;
 	} else {
 		waddr = &td->wake_addr->value;
 	}
@@ -452,15 +462,15 @@ drop_cb(struct pthread *td, void *arg)
 	struct pthread *curthread = ba->curthread;
 
 	mp = td->mutex_obj;
-	if (mp->m_owner == TID(curthread)) {
+	if (PMUTEX_OWNER_ID(mp) == TID(curthread)) {
 		if (curthread->nwaiter_defer >= MAX_DEFER_WAITERS) {
 			_thr_wake_all(curthread->defer_waiters,
-				curthread->nwaiter_defer);
+			    curthread->nwaiter_defer);
 			curthread->nwaiter_defer = 0;
 		}
 		curthread->defer_waiters[curthread->nwaiter_defer++] =
-			&td->wake_addr->value;
-		mp->m_flags |= PMUTEX_FLAG_DEFERED;
+		    &td->wake_addr->value;
+		mp->m_flags |= PMUTEX_FLAG_DEFERRED;
 	} else {
 		if (ba->count >= MAX_DEFER_WAITERS) {
 			_thr_wake_all(ba->waddrs, ba->count);
author	kib <kib@FreeBSD.org>	2016-05-17 09:56:22 +0000
committer	kib <kib@FreeBSD.org>	2016-05-17 09:56:22 +0000
commit	8da898f26c04f1b12f46ec60020d7d15d03799a9 (patch)
tree	d626a38beb7329b5a3aa09877b11a5ec03a6eb38 /lib/libthr/thread/thr_cond.c
parent	afc75dd440c351adf17eb82272dd3e3f62f97410 (diff)
download	FreeBSD-src-8da898f26c04f1b12f46ec60020d7d15d03799a9.zip FreeBSD-src-8da898f26c04f1b12f46ec60020d7d15d03799a9.tar.gz