summaryrefslogtreecommitdiffstats
path: root/sys/sys
diff options
context:
space:
mode:
authormjg <mjg@FreeBSD.org>2017-03-16 06:35:53 +0000
committermjg <mjg@FreeBSD.org>2017-03-16 06:35:53 +0000
commit75c730bce9acd14b7ecb04b7be87b14661c92be3 (patch)
treea495b1d47bb6dabb7a674823c4cd6dfa1959abc9 /sys/sys
parent871eda175a84b535a6be9a606ffb7d37a9ac194c (diff)
downloadFreeBSD-src-75c730bce9acd14b7ecb04b7be87b14661c92be3.zip
FreeBSD-src-75c730bce9acd14b7ecb04b7be87b14661c92be3.tar.gz
MFC r313269,r313270,r313271,r313272,r313274,r313278,r313279,r313996,r314474
mtx: switch to fcmpset The found value is passed to locking routines in order to reduce cacheline accesses. mtx_unlock grows an explicit check for regular unlock. On ll/sc architectures the routine can fail even if the lock could have been handled by the inline primitive. == rwlock: switch to fcmpset == sx: switch to fcmpset == sx: uninline slock/sunlock Shared locking routines explicitly read the value and test it. If the change attempt fails, they fall back to a regular function which would retry in a loop. The problem is that with many concurrent readers the risk of failure is pretty high and even the value returned by fcmpset is very likely going to be stale by the time the loop in the fallback routine is reached. Uninline said primitives. It gives a throughput increase when doing concurrent slocks/sunlocks with 80 hardware threads from ~50 mln/s to ~56 mln/s. Interestingly, rwlock primitives are already not inlined. == sx: add witness support missed in r313272 == mtx: fix up _mtx_obtain_lock_fetch usage in thread lock Since _mtx_obtain_lock_fetch no longer sets the argument to MTX_UNOWNED, callers have to do it on their own. == mtx: fixup r313278, the assignemnt was supposed to go inside the loop == mtx: fix spin mutexes interaction with failed fcmpset While doing so move recursion support down to the fallback routine. == locks: ensure proper barriers are used with atomic ops when necessary Unclear how, but the locking routine for mutexes was using the *release* barrier instead of acquire. This must have been either a copy-pasto or bad completion. Going through other uses of atomics shows no barriers in: - upgrade routines (addressed in this patch) - sections protected with turnstile locks - this should be fine as necessary barriers are in the worst case provided by turnstile unlock I would like to thank Mark Millard and andreast@ for reporting the problem and testing previous patches before the issue got identified.
Diffstat (limited to 'sys/sys')
-rw-r--r--sys/sys/mutex.h38
-rw-r--r--sys/sys/rwlock.h20
-rw-r--r--sys/sys/sx.h63
3 files changed, 44 insertions, 77 deletions
diff --git a/sys/sys/mutex.h b/sys/sys/mutex.h
index 11ed9d7..841bd76 100644
--- a/sys/sys/mutex.h
+++ b/sys/sys/mutex.h
@@ -98,13 +98,13 @@ void mtx_sysinit(void *arg);
int _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file,
int line);
void mutex_init(void);
-void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
- const char *file, int line);
+void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, uintptr_t tid,
+ int opts, const char *file, int line);
void __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file,
int line);
#ifdef SMP
-void _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts,
- const char *file, int line);
+void _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v, uintptr_t tid,
+ int opts, const char *file, int line);
#endif
void __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file,
int line);
@@ -140,13 +140,13 @@ void thread_lock_flags_(struct thread *, int, const char *, int);
_mtx_destroy(&(m)->mtx_lock)
#define mtx_trylock_flags_(m, o, f, l) \
_mtx_trylock_flags_(&(m)->mtx_lock, o, f, l)
-#define _mtx_lock_sleep(m, t, o, f, l) \
- __mtx_lock_sleep(&(m)->mtx_lock, t, o, f, l)
+#define _mtx_lock_sleep(m, v, t, o, f, l) \
+ __mtx_lock_sleep(&(m)->mtx_lock, v, t, o, f, l)
#define _mtx_unlock_sleep(m, o, f, l) \
__mtx_unlock_sleep(&(m)->mtx_lock, o, f, l)
#ifdef SMP
-#define _mtx_lock_spin(m, t, o, f, l) \
- _mtx_lock_spin_cookie(&(m)->mtx_lock, t, o, f, l)
+#define _mtx_lock_spin(m, v, t, o, f, l) \
+ _mtx_lock_spin_cookie(&(m)->mtx_lock, v, t, o, f, l)
#endif
#define _mtx_lock_flags(m, o, f, l) \
__mtx_lock_flags(&(m)->mtx_lock, o, f, l)
@@ -171,6 +171,11 @@ void thread_lock_flags_(struct thread *, int, const char *, int);
#define _mtx_obtain_lock(mp, tid) \
atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid))
+#define _mtx_obtain_lock_fetch(mp, vp, tid) ({ \
+ *vp = MTX_UNOWNED; \
+ atomic_fcmpset_acq_ptr(&(mp)->mtx_lock, vp, (tid)); \
+})
+
/* Try to release mtx_lock if it is unrecursed and uncontested. */
#define _mtx_release_lock(mp, tid) \
atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), MTX_UNOWNED)
@@ -188,9 +193,10 @@ void thread_lock_flags_(struct thread *, int, const char *, int);
/* Lock a normal mutex. */
#define __mtx_lock(mp, tid, opts, file, line) do { \
uintptr_t _tid = (uintptr_t)(tid); \
+ uintptr_t _v; \
\
- if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid)))\
- _mtx_lock_sleep((mp), _tid, (opts), (file), (line)); \
+ if (!_mtx_obtain_lock_fetch((mp), &_v, _tid)) \
+ _mtx_lock_sleep((mp), _v, _tid, (opts), (file), (line));\
else \
LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, \
mp, 0, 0, file, line); \
@@ -205,14 +211,12 @@ void thread_lock_flags_(struct thread *, int, const char *, int);
#ifdef SMP
#define __mtx_lock_spin(mp, tid, opts, file, line) do { \
uintptr_t _tid = (uintptr_t)(tid); \
+ uintptr_t _v; \
\
spinlock_enter(); \
- if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid))) {\
- if ((mp)->mtx_lock == _tid) \
- (mp)->mtx_recurse++; \
- else \
- _mtx_lock_spin((mp), _tid, (opts), (file), (line)); \
- } else \
+ if (!_mtx_obtain_lock_fetch((mp), &_v, _tid)) \
+ _mtx_lock_spin((mp), _v, _tid, (opts), (file), (line)); \
+ else \
LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, \
mp, 0, 0, file, line); \
} while (0)
@@ -265,7 +269,7 @@ void thread_lock_flags_(struct thread *, int, const char *, int);
\
if ((mp)->mtx_recurse == 0) \
LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, mp); \
- if ((mp)->mtx_lock != _tid || !_mtx_release_lock((mp), _tid)) \
+ if (!_mtx_release_lock((mp), _tid)) \
_mtx_unlock_sleep((mp), (opts), (file), (line)); \
} while (0)
diff --git a/sys/sys/rwlock.h b/sys/sys/rwlock.h
index e5b1166..816037e 100644
--- a/sys/sys/rwlock.h
+++ b/sys/sys/rwlock.h
@@ -84,6 +84,11 @@
#define _rw_write_lock(rw, tid) \
atomic_cmpset_acq_ptr(&(rw)->rw_lock, RW_UNLOCKED, (tid))
+#define _rw_write_lock_fetch(rw, vp, tid) ({ \
+ *vp = RW_UNLOCKED; \
+ atomic_fcmpset_acq_ptr(&(rw)->rw_lock, vp, (tid)); \
+})
+
/* Release a write lock quickly if there are no waiters. */
#define _rw_write_unlock(rw, tid) \
atomic_cmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED)
@@ -97,9 +102,10 @@
/* Acquire a write lock. */
#define __rw_wlock(rw, tid, file, line) do { \
uintptr_t _tid = (uintptr_t)(tid); \
+ uintptr_t _v; \
\
- if ((rw)->rw_lock != RW_UNLOCKED || !_rw_write_lock((rw), _tid))\
- _rw_wlock_hard((rw), _tid, (file), (line)); \
+ if (!_rw_write_lock_fetch((rw), &_v, _tid)) \
+ _rw_wlock_hard((rw), _v, _tid, (file), (line)); \
else \
LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, \
0, 0, file, line, LOCKSTAT_WRITER); \
@@ -114,7 +120,7 @@
else { \
LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, \
LOCKSTAT_WRITER); \
- if ((rw)->rw_lock != _tid || !_rw_write_unlock((rw), _tid))\
+ if (!_rw_write_unlock((rw), _tid)) \
_rw_wunlock_hard((rw), _tid, (file), (line)); \
} \
} while (0)
@@ -135,8 +141,8 @@ void _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line);
void __rw_rlock(volatile uintptr_t *c, const char *file, int line);
int __rw_try_rlock(volatile uintptr_t *c, const char *file, int line);
void _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line);
-void __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
- int line);
+void __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v, uintptr_t tid,
+ const char *file, int line);
void __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid,
const char *file, int line);
int __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line);
@@ -171,8 +177,8 @@ void __rw_assert(const volatile uintptr_t *c, int what, const char *file,
__rw_try_rlock(&(rw)->rw_lock, f, l)
#define _rw_runlock(rw, f, l) \
_rw_runlock_cookie(&(rw)->rw_lock, f, l)
-#define _rw_wlock_hard(rw, t, f, l) \
- __rw_wlock_hard(&(rw)->rw_lock, t, f, l)
+#define _rw_wlock_hard(rw, v, t, f, l) \
+ __rw_wlock_hard(&(rw)->rw_lock, v, t, f, l)
#define _rw_wunlock_hard(rw, t, f, l) \
__rw_wunlock_hard(&(rw)->rw_lock, t, f, l)
#define _rw_try_upgrade(rw, f, l) \
diff --git a/sys/sys/sx.h b/sys/sys/sx.h
index e8cffaa..a676d2a 100644
--- a/sys/sys/sx.h
+++ b/sys/sys/sx.h
@@ -109,12 +109,10 @@ int _sx_slock(struct sx *sx, int opts, const char *file, int line);
int _sx_xlock(struct sx *sx, int opts, const char *file, int line);
void _sx_sunlock(struct sx *sx, const char *file, int line);
void _sx_xunlock(struct sx *sx, const char *file, int line);
-int _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts,
+int _sx_xlock_hard(struct sx *sx, uintptr_t v, uintptr_t tid, int opts,
const char *file, int line);
-int _sx_slock_hard(struct sx *sx, int opts, const char *file, int line);
void _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int
line);
-void _sx_sunlock_hard(struct sx *sx, const char *file, int line);
#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
void _sx_assert(const struct sx *sx, int what, const char *file, int line);
#endif
@@ -153,11 +151,12 @@ __sx_xlock(struct sx *sx, struct thread *td, int opts, const char *file,
int line)
{
uintptr_t tid = (uintptr_t)td;
+ uintptr_t v;
int error = 0;
- if (sx->sx_lock != SX_LOCK_UNLOCKED ||
- !atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
- error = _sx_xlock_hard(sx, tid, opts, file, line);
+ v = SX_LOCK_UNLOCKED;
+ if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &v, tid))
+ error = _sx_xlock_hard(sx, v, tid, opts, file, line);
else
LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
0, 0, file, line, LOCKSTAT_WRITER);
@@ -174,46 +173,10 @@ __sx_xunlock(struct sx *sx, struct thread *td, const char *file, int line)
if (sx->sx_recurse == 0)
LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx,
LOCKSTAT_WRITER);
- if (sx->sx_lock != tid ||
- !atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
+ if (!atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
_sx_xunlock_hard(sx, tid, file, line);
}
-/* Acquire a shared lock. */
-static __inline int
-__sx_slock(struct sx *sx, int opts, const char *file, int line)
-{
- uintptr_t x = sx->sx_lock;
- int error = 0;
-
- if (!(x & SX_LOCK_SHARED) ||
- !atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER))
- error = _sx_slock_hard(sx, opts, file, line);
- else
- LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
- 0, 0, file, line, LOCKSTAT_READER);
-
- return (error);
-}
-
-/*
- * Release a shared lock. We can just drop a single shared lock so
- * long as we aren't trying to drop the last shared lock when other
- * threads are waiting for an exclusive lock. This takes advantage of
- * the fact that an unlocked lock is encoded as a shared lock with a
- * count of 0.
- */
-static __inline void
-__sx_sunlock(struct sx *sx, const char *file, int line)
-{
- uintptr_t x = sx->sx_lock;
-
- LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER);
- if (x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS) ||
- !atomic_cmpset_rel_ptr(&sx->sx_lock, x, x - SX_ONE_SHARER))
- _sx_sunlock_hard(sx, file, line);
-}
-
/*
* Public interface for lock operations.
*/
@@ -227,12 +190,6 @@ __sx_sunlock(struct sx *sx, const char *file, int line)
_sx_xlock((sx), SX_INTERRUPTIBLE, (file), (line))
#define sx_xunlock_(sx, file, line) \
_sx_xunlock((sx), (file), (line))
-#define sx_slock_(sx, file, line) \
- (void)_sx_slock((sx), 0, (file), (line))
-#define sx_slock_sig_(sx, file, line) \
- _sx_slock((sx), SX_INTERRUPTIBLE, (file) , (line))
-#define sx_sunlock_(sx, file, line) \
- _sx_sunlock((sx), (file), (line))
#else
#define sx_xlock_(sx, file, line) \
(void)__sx_xlock((sx), curthread, 0, (file), (line))
@@ -240,13 +197,13 @@ __sx_sunlock(struct sx *sx, const char *file, int line)
__sx_xlock((sx), curthread, SX_INTERRUPTIBLE, (file), (line))
#define sx_xunlock_(sx, file, line) \
__sx_xunlock((sx), curthread, (file), (line))
+#endif /* LOCK_DEBUG > 0 || SX_NOINLINE */
#define sx_slock_(sx, file, line) \
- (void)__sx_slock((sx), 0, (file), (line))
+ (void)_sx_slock((sx), 0, (file), (line))
#define sx_slock_sig_(sx, file, line) \
- __sx_slock((sx), SX_INTERRUPTIBLE, (file), (line))
+ _sx_slock((sx), SX_INTERRUPTIBLE, (file) , (line))
#define sx_sunlock_(sx, file, line) \
- __sx_sunlock((sx), (file), (line))
-#endif /* LOCK_DEBUG > 0 || SX_NOINLINE */
+ _sx_sunlock((sx), (file), (line))
#define sx_try_slock(sx) sx_try_slock_((sx), LOCK_FILE, LOCK_LINE)
#define sx_try_xlock(sx) sx_try_xlock_((sx), LOCK_FILE, LOCK_LINE)
#define sx_try_upgrade(sx) sx_try_upgrade_((sx), LOCK_FILE, LOCK_LINE)
OpenPOWER on IntegriCloud