summaryrefslogtreecommitdiffstats
path: root/sys/kern/kern_sx.c
diff options
context:
space:
mode:
authormjg <mjg@FreeBSD.org>2018-05-31 15:58:28 +0000
committermjg <mjg@FreeBSD.org>2018-05-31 15:58:28 +0000
commit8b9af5c67de5a51974b9d4bc7570e0b9700c4fcb (patch)
tree43c64264959a4d48f66a5d4d4174364f2b38535c /sys/kern/kern_sx.c
parent2330477597aab408fddf0694f764ba475156d9f5 (diff)
downloadFreeBSD-src-8b9af5c67de5a51974b9d4bc7570e0b9700c4fcb.zip
FreeBSD-src-8b9af5c67de5a51974b9d4bc7570e0b9700c4fcb.tar.gz
MFC r329276,r329451,r330294,r330414,r330415,r330418,r331109,r332394,r332398,
r333831: rwlock: diff-reduction of runlock compared to sx sunlock == Undo LOCK_PROFILING pessimisation after r313454 and r313455 With the option used to compile the kernel both sx and rw shared ops would always go to the slow path which added avoidable overhead even when the facility is disabled. Furthermore the increased time spent doing uncontested shared lock acquire would be bogusly added to total wait time, somewhat skewing the results. Restore old behaviour of going there only when profiling is enabled. This change is a no-op for kernels without LOCK_PROFILING (which is the default). == sx: fix adaptive spinning broken in r327397 The condition was flipped. In particular heavy multithreaded kernel builds on zfs started suffering due to nested sx locks. For instance make -s -j 128 buildkernel: before: 3326.67s user 1269.62s system 6981% cpu 1:05.84 total after: 3365.55s user 911.27s system 6871% cpu 1:02.24 total == locks: fix a corner case in r327399 If there were exactly rowner_retries/asx_retries (by default: 10) transitions between read and write state and the waiters still did not get the lock, the next owner -> reader transition would result in the code correctly falling back to turnstile/sleepq where it would incorrectly think it was waiting for a writer and decide to leave turnstile/sleepq to loop back. From this point it would take ts/sq trips until the lock gets released. The bug sometimes manifested itself in stalls during -j 128 package builds. Refactor the code to fix the bug, while here remove some of the gratituous differences between rw and sx locks. == sx: don't do an atomic op in upgrade if it cananot succeed The code already pays the cost of reading the lock to obtain the waiters flag. Checking whether there is more than one reader is not a problem and avoids dirtying the line. This also fixes a small corner case: if waiters were to show up between reading the flag and upgrading the lock, the operation would fail even though it should not. No correctness change here though. == mtx: tidy up recursion handling in thread lock Normally after grabbing the lock it has to be verified we got the right one to begin with. However, if we are recursing, it must not change thus the check can be avoided. In particular this avoids a lock read for non-recursing case which found out the lock was changed. While here avoid an irq trip of this happens. == locks: slightly depessimize lockstat The slow path is always taken when lockstat is enabled. This induces rdtsc (or other) calls to get the cycle count even when there was no contention. Still go to the slow path to not mess with the fast path, but avoid the heavy lifting unless necessary. This reduces sys and real time during -j 80 buildkernel: before: 3651.84s user 1105.59s system 5394% cpu 1:28.18 total after: 3685.99s user 975.74s system 5450% cpu 1:25.53 total disabled: 3697.96s user 411.13s system 5261% cpu 1:18.10 total So note this is still a significant hit. LOCK_PROFILING results are not affected. == rw: whack avoidable re-reads in try_upgrade == locks: extend speculative spin waiting for readers to drain Now that 10 years have passed since the original limit of 10000 was committed, bump it a little bit. Spinning waiting for writers is semi-informed in the sense that we always know if the owner is running and base the decision to spin on that. However, no such information is provided for read-locking. In particular this means that it is possible for a write-spinner to completely waste cpu time waiting for the lock to be released, while the reader holding it was preempted and is now waiting for the spinner to go off cpu. Nonetheless, in majority of cases it is an improvement to spin instead of instantly giving up and going to sleep. The current approach is pretty simple: snatch the number of current readers and performs that many pauses before checking again. The total number of pauses to execute is limited to 10k. If the lock is still not free by that time, go to sleep. Given the previously noted problem of not knowing whether spinning makes any sense to begin with the new limit has to remain rather conservative. But at the very least it should also be related to the machine. Waiting for writers uses parameters selected based on the number of activated hardware threads. The upper limit of pause instructions to be executed in-between re-reads of the lock is typically 16384 or 32678. It was selected as the limit of total spins. The lower bound is set to already present 10000 as to not change it for smaller machines. Bumping the limit reduces system time by few % during benchmarks like buildworld, buildkernel and others. Tested on 2 and 4 socket machines (Broadwell, Skylake). Figuring out how to make a more informed decision while not pessimizing the fast path is left as an exercise for the reader. == fix uninitialized variable warning in reader locks Approved by: re (marius)
Diffstat (limited to 'sys/kern/kern_sx.c')
-rw-r--r--sys/kern/kern_sx.c161
1 files changed, 96 insertions, 65 deletions
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 0e6d0a6..504636e 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -143,8 +143,8 @@ struct lock_class lock_class_sx = {
#endif
#ifdef ADAPTIVE_SX
-static __read_frequently u_int asx_retries = 10;
-static __read_frequently u_int asx_loops = 10000;
+static __read_frequently u_int asx_retries;
+static __read_frequently u_int asx_loops;
static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging");
SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
@@ -156,7 +156,15 @@ SYSCTL_INT(_debug_sx, OID_AUTO, delay_base, CTLFLAG_RW, &sx_delay.base,
SYSCTL_INT(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max,
0, "");
-LOCK_DELAY_SYSINIT_DEFAULT(sx_delay);
+static void
+sx_lock_delay_init(void *arg __unused)
+{
+
+ lock_delay_default_init(&sx_delay);
+ asx_retries = 10;
+ asx_loops = max(10000, sx_delay.max);
+}
+LOCK_DELAY_SYSINIT(sx_lock_delay_init);
#endif
void
@@ -411,6 +419,7 @@ int
sx_try_upgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
{
uintptr_t x;
+ uintptr_t waiters;
int success;
if (SCHEDULER_STOPPED())
@@ -425,9 +434,18 @@ sx_try_upgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
* to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
* we will wake up the exclusive waiters when we drop the lock.
*/
- x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
- success = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
- (uintptr_t)curthread | x);
+ success = 0;
+ x = SX_READ_VALUE(sx);
+ for (;;) {
+ if (SX_SHARERS(x) > 1)
+ break;
+ waiters = (x & SX_LOCK_EXCLUSIVE_WAITERS);
+ if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x,
+ (uintptr_t)curthread | waiters)) {
+ success = 1;
+ break;
+ }
+ }
LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
if (success) {
WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
@@ -531,8 +549,8 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
#ifdef ADAPTIVE_SX
volatile struct thread *owner;
u_int i, n, spintries = 0;
+ enum { READERS, WRITER } sleep_reason = READERS;
bool adaptive;
- int sleep_reason = 0;
#endif
#ifdef LOCK_PROFILING
uint64_t waittime = 0;
@@ -548,11 +566,28 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
int64_t all_time = 0;
#endif
#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
- uintptr_t state;
+ uintptr_t state = 0;
#endif
int extra_work = 0;
tid = (uintptr_t)curthread;
+
+#ifdef KDTRACE_HOOKS
+ if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
+ while (x == SX_LOCK_UNLOCKED) {
+ if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
+ goto out_lockstat;
+ }
+ extra_work = 1;
+ all_time -= lockstat_nsecs(&sx->lock_object);
+ state = x;
+ }
+#endif
+#ifdef LOCK_PROFILING
+ extra_work = 1;
+ state = x;
+#endif
+
if (SCHEDULER_STOPPED())
return (0);
@@ -582,7 +617,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
#ifdef ADAPTIVE_SX
- adaptive = ((sx->lock_object.lo_flags & SX_NOADAPTIVE) != 0);
+ adaptive = ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0);
#endif
#ifdef HWPMC_HOOKS
@@ -591,16 +626,6 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
&waittime);
-#ifdef LOCK_PROFILING
- extra_work = 1;
- state = x;
-#elif defined(KDTRACE_HOOKS)
- extra_work = lockstat_enabled;
- if (__predict_false(extra_work)) {
- all_time -= lockstat_nsecs(&sx->lock_object);
- state = x;
- }
-#endif
#ifndef INVARIANTS
GIANT_SAVE(extra_work);
#endif
@@ -626,37 +651,33 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
* running or the state of the lock changes.
*/
if ((x & SX_LOCK_SHARED) == 0) {
+ sleep_reason = WRITER;
owner = lv_sx_owner(x);
- if (TD_IS_RUNNING(owner)) {
- if (LOCK_LOG_TEST(&sx->lock_object, 0))
- CTR3(KTR_LOCK,
- "%s: spinning on %p held by %p",
- __func__, sx, owner);
- KTR_STATE1(KTR_SCHED, "thread",
- sched_tdname(curthread), "spinning",
- "lockname:\"%s\"",
- sx->lock_object.lo_name);
- do {
- lock_delay(&lda);
- x = SX_READ_VALUE(sx);
- owner = lv_sx_owner(x);
- } while (owner != NULL &&
- TD_IS_RUNNING(owner));
- KTR_STATE0(KTR_SCHED, "thread",
- sched_tdname(curthread), "running");
- continue;
- }
- sleep_reason = 1;
- } else if (SX_SHARERS(x) && spintries < asx_retries) {
- KTR_STATE1(KTR_SCHED, "thread",
- sched_tdname(curthread), "spinning",
- "lockname:\"%s\"", sx->lock_object.lo_name);
+ if (!TD_IS_RUNNING(owner))
+ goto sleepq;
+ if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
+ __func__, sx, owner);
+ KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
+ "spinning", "lockname:\"%s\"",
+ sx->lock_object.lo_name);
+ do {
+ lock_delay(&lda);
+ x = SX_READ_VALUE(sx);
+ owner = lv_sx_owner(x);
+ } while (owner != NULL && TD_IS_RUNNING(owner));
+ KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
+ "running");
+ continue;
+ } else if (SX_SHARERS(x) > 0) {
+ sleep_reason = READERS;
+ if (spintries == asx_retries)
+ goto sleepq;
spintries++;
+ KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
+ "spinning", "lockname:\"%s\"",
+ sx->lock_object.lo_name);
for (i = 0; i < asx_loops; i += n) {
- if (LOCK_LOG_TEST(&sx->lock_object, 0))
- CTR4(KTR_LOCK,
- "%s: shared spinning on %p with %u and %u",
- __func__, sx, spintries, i);
n = SX_SHARERS(x);
lock_delay_spin(n);
x = SX_READ_VALUE(sx);
@@ -667,11 +688,10 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
#ifdef KDTRACE_HOOKS
lda.spin_cnt += i;
#endif
- KTR_STATE0(KTR_SCHED, "thread",
- sched_tdname(curthread), "running");
+ KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
+ "running");
if (i < asx_loops)
continue;
- sleep_reason = 2;
}
sleepq:
#endif
@@ -703,7 +723,7 @@ retry_sleepq:
sleepq_release(&sx->lock_object);
continue;
}
- } else if (SX_SHARERS(x) > 0 && sleep_reason == 1) {
+ } else if (SX_SHARERS(x) > 0 && sleep_reason == WRITER) {
sleepq_release(&sx->lock_object);
continue;
}
@@ -793,6 +813,7 @@ retry_sleepq:
LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
+out_lockstat:
#endif
if (!error)
LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
@@ -921,10 +942,24 @@ _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
int64_t all_time = 0;
#endif
#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
- uintptr_t state;
+ uintptr_t state = 0;
#endif
int extra_work = 0;
+#ifdef KDTRACE_HOOKS
+ if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
+ if (__sx_slock_try(sx, &x LOCK_FILE_LINE_ARG))
+ goto out_lockstat;
+ extra_work = 1;
+ all_time -= lockstat_nsecs(&sx->lock_object);
+ state = x;
+ }
+#endif
+#ifdef LOCK_PROFILING
+ extra_work = 1;
+ state = x;
+#endif
+
if (SCHEDULER_STOPPED())
return (0);
@@ -935,7 +970,7 @@ _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
#endif
#ifdef ADAPTIVE_SX
- adaptive = ((sx->lock_object.lo_flags & SX_NOADAPTIVE) != 0);
+ adaptive = ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0);
#endif
#ifdef HWPMC_HOOKS
@@ -944,16 +979,6 @@ _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
&waittime);
-#ifdef LOCK_PROFILING
- extra_work = 1;
- state = x;
-#elif defined(KDTRACE_HOOKS)
- extra_work = lockstat_enabled;
- if (__predict_false(extra_work)) {
- all_time -= lockstat_nsecs(&sx->lock_object);
- state = x;
- }
-#endif
#ifndef INVARIANTS
GIANT_SAVE(extra_work);
#endif
@@ -1095,6 +1120,7 @@ retry_sleepq:
LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
+out_lockstat:
#endif
if (error == 0) {
LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
@@ -1120,9 +1146,12 @@ _sx_slock_int(struct sx *sx, int opts LOCK_FILE_LINE_ARG_DEF)
error = 0;
x = SX_READ_VALUE(sx);
- if (__predict_false(LOCKSTAT_OOL_PROFILE_ENABLED(sx__acquire) ||
+ if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__acquire) ||
!__sx_slock_try(sx, &x LOCK_FILE_LINE_ARG)))
error = _sx_slock_hard(sx, opts, x LOCK_FILE_LINE_ARG);
+ else
+ lock_profile_obtain_lock_success(&sx->lock_object, 0, 0,
+ file, line);
if (error == 0) {
LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
WITNESS_LOCK(&sx->lock_object, 0, file, line);
@@ -1250,9 +1279,11 @@ _sx_sunlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
x = SX_READ_VALUE(sx);
- if (__predict_false(LOCKSTAT_OOL_PROFILE_ENABLED(sx__release) ||
+ if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__release) ||
!_sx_sunlock_try(sx, &x)))
_sx_sunlock_hard(sx, x LOCK_FILE_LINE_ARG);
+ else
+ lock_profile_release_lock(&sx->lock_object);
TD_LOCKS_DEC(curthread);
}
OpenPOWER on IntegriCloud