summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authorRenato Botelho <renato@netgate.com>2017-01-09 12:11:05 -0200
committerRenato Botelho <renato@netgate.com>2017-01-09 12:11:05 -0200
commit681a482d8fc4bfc14a24f7a9d75cca6337f2a520 (patch)
tree08368e0c4dcea4baa16f4a34b2cc104c42e1ed27 /sys/kern
parentcbeab2a9b6b7ac70992175202f35fcc05a5821d5 (diff)
parent91f6edbb8913d163d5c16fb615e84baf8a16d390 (diff)
downloadFreeBSD-src-681a482d8fc4bfc14a24f7a9d75cca6337f2a520.zip
FreeBSD-src-681a482d8fc4bfc14a24f7a9d75cca6337f2a520.tar.gz
Merge remote-tracking branch 'origin/stable/10' into devel
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/kern_event.c162
-rw-r--r--sys/kern/kern_exit.c5
-rw-r--r--sys/kern/kern_lock.c12
-rw-r--r--sys/kern/kern_mutex.c68
-rw-r--r--sys/kern/kern_proc.c10
-rw-r--r--sys/kern/kern_rwlock.c107
-rw-r--r--sys/kern/kern_sx.c83
-rw-r--r--sys/kern/kern_thr.c7
-rw-r--r--sys/kern/subr_lock.c29
-rw-r--r--sys/kern/vfs_syscalls.c106
10 files changed, 375 insertions, 214 deletions
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index fba163f..2c99803 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -47,7 +47,6 @@ __FBSDID("$FreeBSD$");
#include <sys/fcntl.h>
#include <sys/kthread.h>
#include <sys/selinfo.h>
-#include <sys/stdatomic.h>
#include <sys/queue.h>
#include <sys/event.h>
#include <sys/eventvar.h>
@@ -66,6 +65,7 @@ __FBSDID("$FreeBSD$");
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
+#include <machine/atomic.h>
#include <vm/uma.h>
@@ -184,7 +184,7 @@ static struct filterops user_filtops = {
};
static uma_zone_t knote_zone;
-static atomic_uint kq_ncallouts = ATOMIC_VAR_INIT(0);
+static unsigned int kq_ncallouts = 0;
static unsigned int kq_calloutmax = 4 * 1024;
SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
&kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue");
@@ -556,53 +556,84 @@ knote_fork(struct knlist *list, int pid)
#define NOTE_TIMER_PRECMASK (NOTE_SECONDS|NOTE_MSECONDS|NOTE_USECONDS| \
NOTE_NSECONDS)
-static __inline sbintime_t
+static sbintime_t
timer2sbintime(intptr_t data, int flags)
{
- sbintime_t modifier;
+ /*
+ * Macros for converting to the fractional second portion of an
+ * sbintime_t using 64bit multiplication to improve precision.
+ */
+#define NS_TO_SBT(ns) (((ns) * (((uint64_t)1 << 63) / 500000000)) >> 32)
+#define US_TO_SBT(us) (((us) * (((uint64_t)1 << 63) / 500000)) >> 32)
+#define MS_TO_SBT(ms) (((ms) * (((uint64_t)1 << 63) / 500)) >> 32)
switch (flags & NOTE_TIMER_PRECMASK) {
case NOTE_SECONDS:
- modifier = SBT_1S;
- break;
+#ifdef __LP64__
+ if (data > (SBT_MAX / SBT_1S))
+ return (SBT_MAX);
+#endif
+ return ((sbintime_t)data << 32);
case NOTE_MSECONDS: /* FALLTHROUGH */
case 0:
- modifier = SBT_1MS;
- break;
+ if (data >= 1000) {
+ int64_t secs = data / 1000;
+#ifdef __LP64__
+ if (secs > (SBT_MAX / SBT_1S))
+ return (SBT_MAX);
+#endif
+ return (secs << 32 | MS_TO_SBT(data % 1000));
+ }
+ return MS_TO_SBT(data);
case NOTE_USECONDS:
- modifier = SBT_1US;
- break;
+ if (data >= 1000000) {
+ int64_t secs = data / 1000000;
+#ifdef __LP64__
+ if (secs > (SBT_MAX / SBT_1S))
+ return (SBT_MAX);
+#endif
+ return (secs << 32 | US_TO_SBT(data % 1000000));
+ }
+ return US_TO_SBT(data);
case NOTE_NSECONDS:
- modifier = SBT_1NS;
- break;
- default:
- return (-1);
- }
-
+ if (data >= 1000000000) {
+ int64_t secs = data / 1000000000;
#ifdef __LP64__
- if (data > SBT_MAX / modifier)
- return (SBT_MAX);
+ if (secs > (SBT_MAX / SBT_1S))
+ return (SBT_MAX);
#endif
- return (modifier * data);
+ return (secs << 32 | US_TO_SBT(data % 1000000000));
+ }
+ return (NS_TO_SBT(data));
+ default:
+ break;
+ }
+ return (-1);
}
+struct kq_timer_cb_data {
+ struct callout c;
+ sbintime_t next; /* next timer event fires at */
+ sbintime_t to; /* precalculated timer period */
+};
+
static void
filt_timerexpire(void *knx)
{
- struct callout *calloutp;
struct knote *kn;
+ struct kq_timer_cb_data *kc;
kn = knx;
kn->kn_data++;
KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */
- if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) {
- calloutp = (struct callout *)kn->kn_hook;
- *kn->kn_ptr.p_nexttime += timer2sbintime(kn->kn_sdata,
- kn->kn_sfflags);
- callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0,
- filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE);
- }
+ if ((kn->kn_flags & EV_ONESHOT) != 0)
+ return;
+
+ kc = kn->kn_ptr.p_v;
+ kc->next += kc->to;
+ callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn,
+ PCPU_GET(cpuid), C_ABSOLUTE);
}
/*
@@ -611,39 +642,36 @@ filt_timerexpire(void *knx)
static int
filt_timerattach(struct knote *kn)
{
- struct callout *calloutp;
+ struct kq_timer_cb_data *kc;
sbintime_t to;
unsigned int ncallouts;
- if ((intptr_t)kn->kn_sdata < 0)
+ if (kn->kn_sdata < 0)
return (EINVAL);
- if ((intptr_t)kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0)
+ if (kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0)
kn->kn_sdata = 1;
/* Only precision unit are supported in flags so far */
- if (kn->kn_sfflags & ~NOTE_TIMER_PRECMASK)
+ if ((kn->kn_sfflags & ~NOTE_TIMER_PRECMASK) != 0)
return (EINVAL);
to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags);
if (to < 0)
return (EINVAL);
- ncallouts = atomic_load_explicit(&kq_ncallouts, memory_order_relaxed);
do {
+ ncallouts = kq_ncallouts;
if (ncallouts >= kq_calloutmax)
return (ENOMEM);
- } while (!atomic_compare_exchange_weak_explicit(&kq_ncallouts,
- &ncallouts, ncallouts + 1, memory_order_relaxed,
- memory_order_relaxed));
+ } while (!atomic_cmpset_int(&kq_ncallouts, ncallouts, ncallouts + 1));
kn->kn_flags |= EV_CLEAR; /* automatically set */
kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */
- kn->kn_ptr.p_nexttime = malloc(sizeof(sbintime_t), M_KQUEUE, M_WAITOK);
- calloutp = malloc(sizeof(*calloutp), M_KQUEUE, M_WAITOK);
- callout_init(calloutp, CALLOUT_MPSAFE);
- kn->kn_hook = calloutp;
- *kn->kn_ptr.p_nexttime = to + sbinuptime();
- callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0,
- filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE);
+ kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK);
+ callout_init(&kc->c, 1);
+ kc->next = to + sbinuptime();
+ kc->to = to;
+ callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn,
+ PCPU_GET(cpuid), C_ABSOLUTE);
return (0);
}
@@ -651,14 +679,13 @@ filt_timerattach(struct knote *kn)
static void
filt_timerdetach(struct knote *kn)
{
- struct callout *calloutp;
+ struct kq_timer_cb_data *kc;
unsigned int old;
- calloutp = (struct callout *)kn->kn_hook;
- callout_drain(calloutp);
- free(calloutp, M_KQUEUE);
- free(kn->kn_ptr.p_nexttime, M_KQUEUE);
- old = atomic_fetch_sub_explicit(&kq_ncallouts, 1, memory_order_relaxed);
+ kc = kn->kn_ptr.p_v;
+ callout_drain(&kc->c);
+ free(kc, M_KQUEUE);
+ old = atomic_fetchadd_int(&kq_ncallouts, -1);
KASSERT(old > 0, ("Number of callouts cannot become negative"));
kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */
}
@@ -1912,6 +1939,7 @@ knote(struct knlist *list, long hint, int lockflags)
struct kqueue *kq;
struct knote *kn, *tkn;
int error;
+ bool own_influx;
if (list == NULL)
return;
@@ -1942,11 +1970,14 @@ knote(struct knlist *list, long hint, int lockflags)
*/
KQ_UNLOCK(kq);
} else if ((lockflags & KNF_NOKQLOCK) != 0) {
- kn->kn_status |= KN_INFLUX;
+ own_influx = (kn->kn_status & KN_INFLUX) == 0;
+ if (own_influx)
+ kn->kn_status |= KN_INFLUX;
KQ_UNLOCK(kq);
error = kn->kn_fop->f_event(kn, hint);
KQ_LOCK(kq);
- kn->kn_status &= ~KN_INFLUX;
+ if (own_influx)
+ kn->kn_status &= ~KN_INFLUX;
if (error)
KNOTE_ACTIVATE(kn, 1);
KQ_UNLOCK_FLUX(kq);
@@ -2031,12 +2062,12 @@ knlist_empty(struct knlist *knl)
{
KNL_ASSERT_LOCKED(knl);
- return SLIST_EMPTY(&knl->kl_list);
+ return (SLIST_EMPTY(&knl->kl_list));
}
-static struct mtx knlist_lock;
+static struct mtx knlist_lock;
MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects",
- MTX_DEF);
+ MTX_DEF);
static void knlist_mtx_lock(void *arg);
static void knlist_mtx_unlock(void *arg);
@@ -2146,17 +2177,8 @@ void
knlist_destroy(struct knlist *knl)
{
-#ifdef INVARIANTS
- /*
- * if we run across this error, we need to find the offending
- * driver and have it call knlist_clear or knlist_delete.
- */
- if (!SLIST_EMPTY(&knl->kl_list))
- printf("WARNING: destroying knlist w/ knotes on it!\n");
-#endif
-
- knl->kl_lockarg = knl->kl_lock = knl->kl_unlock = NULL;
- SLIST_INIT(&knl->kl_list);
+ KASSERT(KNLIST_EMPTY(knl),
+ ("destroying knlist %p with knotes on it", knl));
}
/*
@@ -2275,17 +2297,15 @@ knote_attach(struct knote *kn, struct kqueue *kq)
if (kn->kn_fop->f_isfd) {
if (kn->kn_id >= kq->kq_knlistsize)
- return ENOMEM;
+ return (ENOMEM);
list = &kq->kq_knlist[kn->kn_id];
} else {
if (kq->kq_knhash == NULL)
- return ENOMEM;
+ return (ENOMEM);
list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
}
-
SLIST_INSERT_HEAD(list, kn, kn_link);
-
- return 0;
+ return (0);
}
/*
@@ -2394,11 +2414,9 @@ kqfd_register(int fd, struct kevent *kev, struct thread *td, int waitok)
goto noacquire;
error = kqueue_register(kq, kev, td, waitok);
-
kqueue_release(kq, 0);
noacquire:
fdrop(fp, td);
-
- return error;
+ return (error);
}
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 3602cbb..e746864 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -499,6 +499,11 @@ exit1(struct thread *td, int rv)
if (!(q->p_flag & P_TRACED)) {
proc_reparent(q, q->p_reaper);
+ if (q->p_state == PRS_ZOMBIE) {
+ PROC_LOCK(q->p_reaper);
+ pksignal(q->p_reaper, SIGCHLD, q->p_ksi);
+ PROC_UNLOCK(q->p_reaper);
+ }
} else {
/*
* Traced processes are killed since their existence
diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c
index e3e946e..9681c87 100644
--- a/sys/kern/kern_lock.c
+++ b/sys/kern/kern_lock.c
@@ -792,8 +792,10 @@ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
break;
}
- while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED,
- tid)) {
+ for (;;) {
+ if (lk->lk_lock == LK_UNLOCKED &&
+ atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
+ break;
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
#endif
@@ -1129,7 +1131,11 @@ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
__func__, iwmesg, file, line);
}
- while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
+ for (;;) {
+ if (lk->lk_lock == LK_UNLOCKED &&
+ atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
+ break;
+
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
#endif
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index 8d19f2e..27038e1 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/sbuf.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/turnstile.h>
#include <sys/vmmeter.h>
@@ -140,6 +141,37 @@ struct lock_class lock_class_mtx_spin = {
#endif
};
+#ifdef ADAPTIVE_MUTEXES
+static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD, NULL, "mtx debugging");
+
+static struct lock_delay_config mtx_delay = {
+ .initial = 1000,
+ .step = 500,
+ .min = 100,
+ .max = 5000,
+};
+
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_initial, CTLFLAG_RW, &mtx_delay.initial,
+ 0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_step, CTLFLAG_RW, &mtx_delay.step,
+ 0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_min, CTLFLAG_RW, &mtx_delay.min,
+ 0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max,
+ 0, "");
+
+static void
+mtx_delay_sysinit(void *dummy)
+{
+
+ mtx_delay.initial = mp_ncpus * 25;
+ mtx_delay.step = (mp_ncpus * 25) / 2;
+ mtx_delay.min = mp_ncpus * 5;
+ mtx_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(mtx_delay_sysinit);
+#endif
+
/*
* System-wide mutexes
*/
@@ -412,9 +444,11 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
int contested = 0;
uint64_t waittime = 0;
#endif
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
- uint64_t spin_cnt = 0;
- uint64_t sleep_cnt = 0;
+ u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
#endif
@@ -422,6 +456,11 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
if (SCHEDULER_STOPPED())
return;
+#if defined(ADAPTIVE_MUTEXES)
+ lock_delay_arg_init(&lda, &mtx_delay);
+#elif defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, NULL);
+#endif
m = mtxlock2mtx(c);
if (mtx_owned(m)) {
@@ -451,9 +490,11 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
all_time -= lockstat_nsecs(&m->lock_object);
#endif
- while (!_mtx_obtain_lock(m, tid)) {
+ for (;;) {
+ if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
+ break;
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef ADAPTIVE_MUTEXES
/*
@@ -473,12 +514,8 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
"spinning", "lockname:\"%s\"",
m->lock_object.lo_name);
while (mtx_owner(m) == owner &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname((struct thread *)tid),
"running");
@@ -572,7 +609,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
/*
* Only record the loops spinning and not sleeping.
*/
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (all_time - sleep_time));
#endif
}
@@ -634,8 +671,9 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts,
#ifdef KDTRACE_HOOKS
spin_time -= lockstat_nsecs(&m->lock_object);
#endif
- while (!_mtx_obtain_lock(m, tid)) {
-
+ for (;;) {
+ if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
+ break;
/* Give interrupts a chance while we spin. */
spinlock_exit();
while (m->mtx_lock != MTX_UNOWNED) {
@@ -714,7 +752,9 @@ retry:
m->lock_object.lo_name, file, line));
WITNESS_CHECKORDER(&m->lock_object,
opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
- while (!_mtx_obtain_lock(m, tid)) {
+ for (;;) {
+ if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
+ break;
if (m->mtx_lock == tid) {
m->mtx_recurse++;
break;
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 0f60553..475895c 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -959,7 +959,14 @@ fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread)
strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
else
bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg));
- strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname));
+ if (strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)) >=
+ sizeof(kp->ki_tdname)) {
+ strlcpy(kp->ki_moretdname,
+ td->td_name + sizeof(kp->ki_tdname) - 1,
+ sizeof(kp->ki_moretdname));
+ } else {
+ bzero(kp->ki_moretdname, sizeof(kp->ki_moretdname));
+ }
if (TD_ON_LOCK(td)) {
kp->ki_kiflag |= KI_LOCKBLOCK;
strlcpy(kp->ki_lockname, td->td_lockname,
@@ -1180,6 +1187,7 @@ freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32)
bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
+ bcopy(ki->ki_moretdname, ki32->ki_moretdname, MAXCOMLEN - TDNAMLEN + 1);
CP(*ki, *ki32, ki_flag2);
CP(*ki, *ki32, ki_fibnum);
CP(*ki, *ki32, ki_cr_flags);
diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c
index 334d83d..8559840 100644
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/sched.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/turnstile.h>
@@ -66,15 +67,6 @@ PMC_SOFT_DECLARE( , , lock, failed);
*/
#define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock))
-#ifdef ADAPTIVE_RWLOCKS
-static int rowner_retries = 10;
-static int rowner_loops = 10000;
-static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
- "rwlock debugging");
-SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
-SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
-#endif
-
#ifdef DDB
#include <ddb/ddb.h>
@@ -101,6 +93,42 @@ struct lock_class lock_class_rw = {
#endif
};
+#ifdef ADAPTIVE_RWLOCKS
+static int rowner_retries = 10;
+static int rowner_loops = 10000;
+static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
+ "rwlock debugging");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
+
+static struct lock_delay_config rw_delay = {
+ .initial = 1000,
+ .step = 500,
+ .min = 100,
+ .max = 5000,
+};
+
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_initial, CTLFLAG_RW, &rw_delay.initial,
+ 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_step, CTLFLAG_RW, &rw_delay.step,
+ 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_min, CTLFLAG_RW, &rw_delay.min,
+ 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
+ 0, "");
+
+static void
+rw_delay_sysinit(void *dummy)
+{
+
+ rw_delay.initial = mp_ncpus * 25;
+ rw_delay.step = (mp_ncpus * 25) / 2;
+ rw_delay.min = mp_ncpus * 5;
+ rw_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(rw_delay_sysinit);
+#endif
+
/*
* Return a pointer to the owning thread if the lock is write-locked or
* NULL if the lock is unlocked or read-locked.
@@ -355,10 +383,12 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line)
int contested = 0;
#endif
uintptr_t v;
+#if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- uint64_t spin_cnt = 0;
- uint64_t sleep_cnt = 0;
+ u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
#endif
@@ -366,6 +396,11 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line)
if (SCHEDULER_STOPPED())
return;
+#if defined(ADAPTIVE_RWLOCKS)
+ lock_delay_arg_init(&lda, &rw_delay);
+#elif defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, NULL);
+#endif
rw = rwlock2rw(c);
KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
@@ -383,9 +418,6 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line)
state = rw->rw_lock;
#endif
for (;;) {
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
/*
* Handle the easy case. If no other thread has a write
* lock, then try to bump up the count of read locks. Note
@@ -414,6 +446,9 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line)
}
continue;
}
+#ifdef KDTRACE_HOOKS
+ lda.spin_cnt++;
+#endif
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
#endif
@@ -437,12 +472,8 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line)
sched_tdname(curthread), "spinning",
"lockname:\"%s\"", rw->lock_object.lo_name);
while ((struct thread*)RW_OWNER(rw->rw_lock) ==
- owner && TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ owner && TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname(curthread), "running");
continue;
@@ -458,6 +489,9 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line)
break;
cpu_spinwait();
}
+#ifdef KDTRACE_HOOKS
+ lda.spin_cnt += rowner_loops - i;
+#endif
KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
"running");
if (i != rowner_loops)
@@ -549,7 +583,7 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line)
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
/* Record only the loops spinning and not sleeping. */
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(LS_RW_RLOCK_SPIN, rw, all_time - sleep_time,
LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
@@ -737,10 +771,12 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
uint64_t waittime = 0;
int contested = 0;
#endif
+#if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- uint64_t spin_cnt = 0;
- uint64_t sleep_cnt = 0;
+ u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
#endif
@@ -748,6 +784,11 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
if (SCHEDULER_STOPPED())
return;
+#if defined(ADAPTIVE_RWLOCKS)
+ lock_delay_arg_init(&lda, &rw_delay);
+#elif defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, NULL);
+#endif
rw = rwlock2rw(c);
if (rw_wlocked(rw)) {
@@ -768,9 +809,11 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
all_time -= lockstat_nsecs(&rw->lock_object);
state = rw->rw_lock;
#endif
- while (!_rw_write_lock(rw, tid)) {
+ for (;;) {
+ if (rw->rw_lock == RW_UNLOCKED && _rw_write_lock(rw, tid))
+ break;
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
@@ -793,12 +836,8 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
"spinning", "lockname:\"%s\"",
rw->lock_object.lo_name);
while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
"running");
continue;
@@ -823,7 +862,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
"running");
#ifdef KDTRACE_HOOKS
- spin_cnt += rowner_loops - i;
+ lda.spin_cnt += rowner_loops - i;
#endif
if (i != rowner_loops)
continue;
@@ -913,9 +952,9 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
/* Record only the loops spinning and not sleeping. */
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(LS_RW_WLOCK_SPIN, rw, all_time - sleep_time,
- LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
+ LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
#endif
LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 952f7d4..5d418c2 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kdb.h>
+#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/mutex.h>
@@ -54,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sched.h>
#include <sys/sleepqueue.h>
#include <sys/sx.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#if defined(SMP) && !defined(NO_ADAPTIVE_SX)
@@ -147,6 +149,33 @@ static u_int asx_loops = 10000;
static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging");
SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
+
+static struct lock_delay_config sx_delay = {
+ .initial = 1000,
+ .step = 500,
+ .min = 100,
+ .max = 5000,
+};
+
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_initial, CTLFLAG_RW, &sx_delay.initial,
+ 0, "");
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_step, CTLFLAG_RW, &sx_delay.step,
+ 0, "");
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_min, CTLFLAG_RW, &sx_delay.min,
+ 0, "");
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max,
+ 0, "");
+
+static void
+sx_delay_sysinit(void *dummy)
+{
+
+ sx_delay.initial = mp_ncpus * 25;
+ sx_delay.step = (mp_ncpus * 25) / 2;
+ sx_delay.min = mp_ncpus * 5;
+ sx_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(sx_delay_sysinit);
#endif
void
@@ -516,10 +545,12 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
int contested = 0;
#endif
int error = 0;
+#if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- uint64_t spin_cnt = 0;
- uint64_t sleep_cnt = 0;
+ u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
#endif
@@ -527,6 +558,12 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
if (SCHEDULER_STOPPED())
return (0);
+#if defined(ADAPTIVE_SX)
+ lock_delay_arg_init(&lda, &sx_delay);
+#elif defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, NULL);
+#endif
+
/* If we already hold an exclusive lock, then recurse. */
if (sx_xlocked(sx)) {
KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
@@ -547,9 +584,12 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
all_time -= lockstat_nsecs(&sx->lock_object);
state = sx->sx_lock;
#endif
- while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
+ for (;;) {
+ if (sx->sx_lock == SX_LOCK_UNLOCKED &&
+ atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
+ break;
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
@@ -578,12 +618,8 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
sx->lock_object.lo_name);
GIANT_SAVE();
while (SX_OWNER(sx->sx_lock) == x &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname(curthread), "running");
continue;
@@ -605,7 +641,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
break;
cpu_spinwait();
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
}
KTR_STATE0(KTR_SCHED, "thread",
@@ -725,7 +761,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
LOCKSTAT_RECORD4(LS_SX_XLOCK_BLOCK, sx, sleep_time,
LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(LS_SX_XLOCK_SPIN, sx, all_time - sleep_time,
LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
@@ -818,10 +854,12 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
#endif
uintptr_t x;
int error = 0;
+#if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- uint64_t spin_cnt = 0;
- uint64_t sleep_cnt = 0;
+ u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
#endif
@@ -829,6 +867,11 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
if (SCHEDULER_STOPPED())
return (0);
+#if defined(ADAPTIVE_SX)
+ lock_delay_arg_init(&lda, &sx_delay);
+#elif defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, NULL);
+#endif
#ifdef KDTRACE_HOOKS
state = sx->sx_lock;
all_time -= lockstat_nsecs(&sx->lock_object);
@@ -840,7 +883,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
*/
for (;;) {
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
x = sx->sx_lock;
@@ -888,12 +931,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
"lockname:\"%s\"", sx->lock_object.lo_name);
GIANT_SAVE();
while (SX_OWNER(sx->sx_lock) == x &&
- TD_IS_RUNNING(owner)) {
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- cpu_spinwait();
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname(curthread), "running");
continue;
@@ -989,7 +1028,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
LOCKSTAT_RECORD4(LS_SX_SLOCK_BLOCK, sx, sleep_time,
LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(LS_SX_SLOCK_SPIN, sx, all_time - sleep_time,
LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c
index 74050ff..63e4ed0 100644
--- a/sys/kern/kern_thr.c
+++ b/sys/kern/kern_thr.c
@@ -569,8 +569,11 @@ sys_thr_set_name(struct thread *td, struct thr_set_name_args *uap)
error = 0;
name[0] = '\0';
if (uap->name != NULL) {
- error = copyinstr(uap->name, name, sizeof(name),
- NULL);
+ error = copyinstr(uap->name, name, sizeof(name), NULL);
+ if (error == ENAMETOOLONG) {
+ error = copyin(uap->name, name, sizeof(name) - 1);
+ name[sizeof(name) - 1] = '\0';
+ }
if (error)
return (error);
}
diff --git a/sys/kern/subr_lock.c b/sys/kern/subr_lock.c
index 8aec803..cacaf56 100644
--- a/sys/kern/subr_lock.c
+++ b/sys/kern/subr_lock.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <machine/cpufunc.h>
+#include <machine/cpu.h>
CTASSERT(LOCK_CLASS_MAX == 15);
@@ -103,6 +104,34 @@ lock_destroy(struct lock_object *lock)
lock->lo_flags &= ~LO_INITIALIZED;
}
+void
+lock_delay(struct lock_delay_arg *la)
+{
+ u_int i, delay, backoff, min, max;
+ struct lock_delay_config *lc = la->config;
+
+ delay = la->delay;
+
+ if (delay == 0)
+ delay = lc->initial;
+ else {
+ delay += lc->step;
+ max = lc->max;
+ if (delay > max)
+ delay = max;
+ }
+
+ backoff = cpu_ticks() % delay;
+ min = lc->min;
+ if (backoff < min)
+ backoff = min;
+ for (i = 0; i < backoff; i++)
+ cpu_spinwait();
+
+ la->delay = delay;
+ la->spin_cnt += backoff;
+}
+
#ifdef DDB
DB_SHOW_COMMAND(lock, db_show_lock)
{
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index f2ddf66..9f922db 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -256,6 +256,43 @@ statfs_scale_blocks(struct statfs *sf, long max_size)
sf->f_bavail >>= shift;
}
+static int
+kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf)
+{
+ struct statfs *sp;
+ int error;
+
+ if (mp == NULL)
+ return (EBADF);
+ error = vfs_busy(mp, 0);
+ vfs_rel(mp);
+ if (error != 0)
+ return (error);
+#ifdef MAC
+ error = mac_mount_check_stat(td->td_ucred, mp);
+ if (error != 0)
+ goto out;
+#endif
+ /*
+ * Set these in case the underlying filesystem fails to do so.
+ */
+ sp = &mp->mnt_stat;
+ sp->f_version = STATFS_VERSION;
+ sp->f_namemax = NAME_MAX;
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ error = VFS_STATFS(mp, sp);
+ if (error != 0)
+ goto out;
+ *buf = *sp;
+ if (priv_check(td, PRIV_VFS_GENERATION)) {
+ buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
+ prison_enforce_statfs(td->td_ucred, mp, buf);
+ }
+out:
+ vfs_unbusy(mp);
+ return (error);
+}
+
/*
* Get filesystem statistics.
*/
@@ -287,7 +324,6 @@ kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
struct statfs *buf)
{
struct mount *mp;
- struct statfs *sp, sb;
struct nameidata nd;
int error;
@@ -300,35 +336,7 @@ kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
vfs_ref(mp);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_vp);
- error = vfs_busy(mp, 0);
- vfs_rel(mp);
- if (error != 0)
- return (error);
-#ifdef MAC
- error = mac_mount_check_stat(td->td_ucred, mp);
- if (error != 0)
- goto out;
-#endif
- /*
- * Set these in case the underlying filesystem fails to do so.
- */
- sp = &mp->mnt_stat;
- sp->f_version = STATFS_VERSION;
- sp->f_namemax = NAME_MAX;
- sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
- error = VFS_STATFS(mp, sp);
- if (error != 0)
- goto out;
- if (priv_check(td, PRIV_VFS_GENERATION)) {
- bcopy(sp, &sb, sizeof(sb));
- sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
- prison_enforce_statfs(td->td_ucred, mp, &sb);
- sp = &sb;
- }
- *buf = *sp;
-out:
- vfs_unbusy(mp);
- return (error);
+ return (kern_do_statfs(td, mp, buf));
}
/*
@@ -362,7 +370,6 @@ kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
{
struct file *fp;
struct mount *mp;
- struct statfs *sp, sb;
struct vnode *vp;
cap_rights_t rights;
int error;
@@ -378,44 +385,11 @@ kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
AUDIT_ARG_VNODE1(vp);
#endif
mp = vp->v_mount;
- if (mp)
+ if (mp != NULL)
vfs_ref(mp);
VOP_UNLOCK(vp, 0);
fdrop(fp, td);
- if (mp == NULL) {
- error = EBADF;
- goto out;
- }
- error = vfs_busy(mp, 0);
- vfs_rel(mp);
- if (error != 0)
- return (error);
-#ifdef MAC
- error = mac_mount_check_stat(td->td_ucred, mp);
- if (error != 0)
- goto out;
-#endif
- /*
- * Set these in case the underlying filesystem fails to do so.
- */
- sp = &mp->mnt_stat;
- sp->f_version = STATFS_VERSION;
- sp->f_namemax = NAME_MAX;
- sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
- error = VFS_STATFS(mp, sp);
- if (error != 0)
- goto out;
- if (priv_check(td, PRIV_VFS_GENERATION)) {
- bcopy(sp, &sb, sizeof(sb));
- sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
- prison_enforce_statfs(td->td_ucred, mp, &sb);
- sp = &sb;
- }
- *buf = *sp;
-out:
- if (mp)
- vfs_unbusy(mp);
- return (error);
+ return (kern_do_statfs(td, mp, buf));
}
/*
OpenPOWER on IntegriCloud