diff options
author | Renato Botelho <renato@netgate.com> | 2017-01-09 12:11:05 -0200 |
---|---|---|
committer | Renato Botelho <renato@netgate.com> | 2017-01-09 12:11:05 -0200 |
commit | 681a482d8fc4bfc14a24f7a9d75cca6337f2a520 (patch) | |
tree | 08368e0c4dcea4baa16f4a34b2cc104c42e1ed27 /sys/kern | |
parent | cbeab2a9b6b7ac70992175202f35fcc05a5821d5 (diff) | |
parent | 91f6edbb8913d163d5c16fb615e84baf8a16d390 (diff) | |
download | FreeBSD-src-681a482d8fc4bfc14a24f7a9d75cca6337f2a520.zip FreeBSD-src-681a482d8fc4bfc14a24f7a9d75cca6337f2a520.tar.gz |
Merge remote-tracking branch 'origin/stable/10' into devel
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/kern_event.c | 162 | ||||
-rw-r--r-- | sys/kern/kern_exit.c | 5 | ||||
-rw-r--r-- | sys/kern/kern_lock.c | 12 | ||||
-rw-r--r-- | sys/kern/kern_mutex.c | 68 | ||||
-rw-r--r-- | sys/kern/kern_proc.c | 10 | ||||
-rw-r--r-- | sys/kern/kern_rwlock.c | 107 | ||||
-rw-r--r-- | sys/kern/kern_sx.c | 83 | ||||
-rw-r--r-- | sys/kern/kern_thr.c | 7 | ||||
-rw-r--r-- | sys/kern/subr_lock.c | 29 | ||||
-rw-r--r-- | sys/kern/vfs_syscalls.c | 106 |
10 files changed, 375 insertions, 214 deletions
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index fba163f..2c99803 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -47,7 +47,6 @@ __FBSDID("$FreeBSD$"); #include <sys/fcntl.h> #include <sys/kthread.h> #include <sys/selinfo.h> -#include <sys/stdatomic.h> #include <sys/queue.h> #include <sys/event.h> #include <sys/eventvar.h> @@ -66,6 +65,7 @@ __FBSDID("$FreeBSD$"); #ifdef KTRACE #include <sys/ktrace.h> #endif +#include <machine/atomic.h> #include <vm/uma.h> @@ -184,7 +184,7 @@ static struct filterops user_filtops = { }; static uma_zone_t knote_zone; -static atomic_uint kq_ncallouts = ATOMIC_VAR_INIT(0); +static unsigned int kq_ncallouts = 0; static unsigned int kq_calloutmax = 4 * 1024; SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); @@ -556,53 +556,84 @@ knote_fork(struct knlist *list, int pid) #define NOTE_TIMER_PRECMASK (NOTE_SECONDS|NOTE_MSECONDS|NOTE_USECONDS| \ NOTE_NSECONDS) -static __inline sbintime_t +static sbintime_t timer2sbintime(intptr_t data, int flags) { - sbintime_t modifier; + /* + * Macros for converting to the fractional second portion of an + * sbintime_t using 64bit multiplication to improve precision. + */ +#define NS_TO_SBT(ns) (((ns) * (((uint64_t)1 << 63) / 500000000)) >> 32) +#define US_TO_SBT(us) (((us) * (((uint64_t)1 << 63) / 500000)) >> 32) +#define MS_TO_SBT(ms) (((ms) * (((uint64_t)1 << 63) / 500)) >> 32) switch (flags & NOTE_TIMER_PRECMASK) { case NOTE_SECONDS: - modifier = SBT_1S; - break; +#ifdef __LP64__ + if (data > (SBT_MAX / SBT_1S)) + return (SBT_MAX); +#endif + return ((sbintime_t)data << 32); case NOTE_MSECONDS: /* FALLTHROUGH */ case 0: - modifier = SBT_1MS; - break; + if (data >= 1000) { + int64_t secs = data / 1000; +#ifdef __LP64__ + if (secs > (SBT_MAX / SBT_1S)) + return (SBT_MAX); +#endif + return (secs << 32 | MS_TO_SBT(data % 1000)); + } + return MS_TO_SBT(data); case NOTE_USECONDS: - modifier = SBT_1US; - break; + if (data >= 1000000) { + int64_t secs = data / 1000000; +#ifdef __LP64__ + if (secs > (SBT_MAX / SBT_1S)) + return (SBT_MAX); +#endif + return (secs << 32 | US_TO_SBT(data % 1000000)); + } + return US_TO_SBT(data); case NOTE_NSECONDS: - modifier = SBT_1NS; - break; - default: - return (-1); - } - + if (data >= 1000000000) { + int64_t secs = data / 1000000000; #ifdef __LP64__ - if (data > SBT_MAX / modifier) - return (SBT_MAX); + if (secs > (SBT_MAX / SBT_1S)) + return (SBT_MAX); #endif - return (modifier * data); + return (secs << 32 | US_TO_SBT(data % 1000000000)); + } + return (NS_TO_SBT(data)); + default: + break; + } + return (-1); } +struct kq_timer_cb_data { + struct callout c; + sbintime_t next; /* next timer event fires at */ + sbintime_t to; /* precalculated timer period */ +}; + static void filt_timerexpire(void *knx) { - struct callout *calloutp; struct knote *kn; + struct kq_timer_cb_data *kc; kn = knx; kn->kn_data++; KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ - if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) { - calloutp = (struct callout *)kn->kn_hook; - *kn->kn_ptr.p_nexttime += timer2sbintime(kn->kn_sdata, - kn->kn_sfflags); - callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0, - filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); - } + if ((kn->kn_flags & EV_ONESHOT) != 0) + return; + + kc = kn->kn_ptr.p_v; + kc->next += kc->to; + callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn, + PCPU_GET(cpuid), C_ABSOLUTE); } /* @@ -611,39 +642,36 @@ filt_timerexpire(void *knx) static int filt_timerattach(struct knote *kn) { - struct callout *calloutp; + struct kq_timer_cb_data *kc; sbintime_t to; unsigned int ncallouts; - if ((intptr_t)kn->kn_sdata < 0) + if (kn->kn_sdata < 0) return (EINVAL); - if ((intptr_t)kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0) + if (kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0) kn->kn_sdata = 1; /* Only precision unit are supported in flags so far */ - if (kn->kn_sfflags & ~NOTE_TIMER_PRECMASK) + if ((kn->kn_sfflags & ~NOTE_TIMER_PRECMASK) != 0) return (EINVAL); to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags); if (to < 0) return (EINVAL); - ncallouts = atomic_load_explicit(&kq_ncallouts, memory_order_relaxed); do { + ncallouts = kq_ncallouts; if (ncallouts >= kq_calloutmax) return (ENOMEM); - } while (!atomic_compare_exchange_weak_explicit(&kq_ncallouts, - &ncallouts, ncallouts + 1, memory_order_relaxed, - memory_order_relaxed)); + } while (!atomic_cmpset_int(&kq_ncallouts, ncallouts, ncallouts + 1)); kn->kn_flags |= EV_CLEAR; /* automatically set */ kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */ - kn->kn_ptr.p_nexttime = malloc(sizeof(sbintime_t), M_KQUEUE, M_WAITOK); - calloutp = malloc(sizeof(*calloutp), M_KQUEUE, M_WAITOK); - callout_init(calloutp, CALLOUT_MPSAFE); - kn->kn_hook = calloutp; - *kn->kn_ptr.p_nexttime = to + sbinuptime(); - callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0, - filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); + kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK); + callout_init(&kc->c, 1); + kc->next = to + sbinuptime(); + kc->to = to; + callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn, + PCPU_GET(cpuid), C_ABSOLUTE); return (0); } @@ -651,14 +679,13 @@ filt_timerattach(struct knote *kn) static void filt_timerdetach(struct knote *kn) { - struct callout *calloutp; + struct kq_timer_cb_data *kc; unsigned int old; - calloutp = (struct callout *)kn->kn_hook; - callout_drain(calloutp); - free(calloutp, M_KQUEUE); - free(kn->kn_ptr.p_nexttime, M_KQUEUE); - old = atomic_fetch_sub_explicit(&kq_ncallouts, 1, memory_order_relaxed); + kc = kn->kn_ptr.p_v; + callout_drain(&kc->c); + free(kc, M_KQUEUE); + old = atomic_fetchadd_int(&kq_ncallouts, -1); KASSERT(old > 0, ("Number of callouts cannot become negative")); kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */ } @@ -1912,6 +1939,7 @@ knote(struct knlist *list, long hint, int lockflags) struct kqueue *kq; struct knote *kn, *tkn; int error; + bool own_influx; if (list == NULL) return; @@ -1942,11 +1970,14 @@ knote(struct knlist *list, long hint, int lockflags) */ KQ_UNLOCK(kq); } else if ((lockflags & KNF_NOKQLOCK) != 0) { - kn->kn_status |= KN_INFLUX; + own_influx = (kn->kn_status & KN_INFLUX) == 0; + if (own_influx) + kn->kn_status |= KN_INFLUX; KQ_UNLOCK(kq); error = kn->kn_fop->f_event(kn, hint); KQ_LOCK(kq); - kn->kn_status &= ~KN_INFLUX; + if (own_influx) + kn->kn_status &= ~KN_INFLUX; if (error) KNOTE_ACTIVATE(kn, 1); KQ_UNLOCK_FLUX(kq); @@ -2031,12 +2062,12 @@ knlist_empty(struct knlist *knl) { KNL_ASSERT_LOCKED(knl); - return SLIST_EMPTY(&knl->kl_list); + return (SLIST_EMPTY(&knl->kl_list)); } -static struct mtx knlist_lock; +static struct mtx knlist_lock; MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects", - MTX_DEF); + MTX_DEF); static void knlist_mtx_lock(void *arg); static void knlist_mtx_unlock(void *arg); @@ -2146,17 +2177,8 @@ void knlist_destroy(struct knlist *knl) { -#ifdef INVARIANTS - /* - * if we run across this error, we need to find the offending - * driver and have it call knlist_clear or knlist_delete. - */ - if (!SLIST_EMPTY(&knl->kl_list)) - printf("WARNING: destroying knlist w/ knotes on it!\n"); -#endif - - knl->kl_lockarg = knl->kl_lock = knl->kl_unlock = NULL; - SLIST_INIT(&knl->kl_list); + KASSERT(KNLIST_EMPTY(knl), + ("destroying knlist %p with knotes on it", knl)); } /* @@ -2275,17 +2297,15 @@ knote_attach(struct knote *kn, struct kqueue *kq) if (kn->kn_fop->f_isfd) { if (kn->kn_id >= kq->kq_knlistsize) - return ENOMEM; + return (ENOMEM); list = &kq->kq_knlist[kn->kn_id]; } else { if (kq->kq_knhash == NULL) - return ENOMEM; + return (ENOMEM); list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; } - SLIST_INSERT_HEAD(list, kn, kn_link); - - return 0; + return (0); } /* @@ -2394,11 +2414,9 @@ kqfd_register(int fd, struct kevent *kev, struct thread *td, int waitok) goto noacquire; error = kqueue_register(kq, kev, td, waitok); - kqueue_release(kq, 0); noacquire: fdrop(fp, td); - - return error; + return (error); } diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 3602cbb..e746864 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -499,6 +499,11 @@ exit1(struct thread *td, int rv) if (!(q->p_flag & P_TRACED)) { proc_reparent(q, q->p_reaper); + if (q->p_state == PRS_ZOMBIE) { + PROC_LOCK(q->p_reaper); + pksignal(q->p_reaper, SIGCHLD, q->p_ksi); + PROC_UNLOCK(q->p_reaper); + } } else { /* * Traced processes are killed since their existence diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c index e3e946e..9681c87 100644 --- a/sys/kern/kern_lock.c +++ b/sys/kern/kern_lock.c @@ -792,8 +792,10 @@ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk, break; } - while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, - tid)) { + for (;;) { + if (lk->lk_lock == LK_UNLOCKED && + atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) + break; #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif @@ -1129,7 +1131,11 @@ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk, __func__, iwmesg, file, line); } - while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) { + for (;;) { + if (lk->lk_lock == LK_UNLOCKED && + atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) + break; + #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 8d19f2e..27038e1 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$"); #include <sys/resourcevar.h> #include <sys/sched.h> #include <sys/sbuf.h> +#include <sys/smp.h> #include <sys/sysctl.h> #include <sys/turnstile.h> #include <sys/vmmeter.h> @@ -140,6 +141,37 @@ struct lock_class lock_class_mtx_spin = { #endif }; +#ifdef ADAPTIVE_MUTEXES +static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD, NULL, "mtx debugging"); + +static struct lock_delay_config mtx_delay = { + .initial = 1000, + .step = 500, + .min = 100, + .max = 5000, +}; + +SYSCTL_INT(_debug_mtx, OID_AUTO, delay_initial, CTLFLAG_RW, &mtx_delay.initial, + 0, ""); +SYSCTL_INT(_debug_mtx, OID_AUTO, delay_step, CTLFLAG_RW, &mtx_delay.step, + 0, ""); +SYSCTL_INT(_debug_mtx, OID_AUTO, delay_min, CTLFLAG_RW, &mtx_delay.min, + 0, ""); +SYSCTL_INT(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max, + 0, ""); + +static void +mtx_delay_sysinit(void *dummy) +{ + + mtx_delay.initial = mp_ncpus * 25; + mtx_delay.step = (mp_ncpus * 25) / 2; + mtx_delay.min = mp_ncpus * 5; + mtx_delay.max = mp_ncpus * 25 * 10; +} +LOCK_DELAY_SYSINIT(mtx_delay_sysinit); +#endif + /* * System-wide mutexes */ @@ -412,9 +444,11 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, int contested = 0; uint64_t waittime = 0; #endif +#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) + struct lock_delay_arg lda; +#endif #ifdef KDTRACE_HOOKS - uint64_t spin_cnt = 0; - uint64_t sleep_cnt = 0; + u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif @@ -422,6 +456,11 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, if (SCHEDULER_STOPPED()) return; +#if defined(ADAPTIVE_MUTEXES) + lock_delay_arg_init(&lda, &mtx_delay); +#elif defined(KDTRACE_HOOKS) + lock_delay_arg_init(&lda, NULL); +#endif m = mtxlock2mtx(c); if (mtx_owned(m)) { @@ -451,9 +490,11 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, all_time -= lockstat_nsecs(&m->lock_object); #endif - while (!_mtx_obtain_lock(m, tid)) { + for (;;) { + if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid)) + break; #ifdef KDTRACE_HOOKS - spin_cnt++; + lda.spin_cnt++; #endif #ifdef ADAPTIVE_MUTEXES /* @@ -473,12 +514,8 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, "spinning", "lockname:\"%s\"", m->lock_object.lo_name); while (mtx_owner(m) == owner && - TD_IS_RUNNING(owner)) { - cpu_spinwait(); -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif - } + TD_IS_RUNNING(owner)) + lock_delay(&lda); KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "running"); @@ -572,7 +609,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, /* * Only record the loops spinning and not sleeping. */ - if (spin_cnt > sleep_cnt) + if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (all_time - sleep_time)); #endif } @@ -634,8 +671,9 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts, #ifdef KDTRACE_HOOKS spin_time -= lockstat_nsecs(&m->lock_object); #endif - while (!_mtx_obtain_lock(m, tid)) { - + for (;;) { + if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid)) + break; /* Give interrupts a chance while we spin. */ spinlock_exit(); while (m->mtx_lock != MTX_UNOWNED) { @@ -714,7 +752,9 @@ retry: m->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - while (!_mtx_obtain_lock(m, tid)) { + for (;;) { + if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid)) + break; if (m->mtx_lock == tid) { m->mtx_recurse++; break; diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 0f60553..475895c 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -959,7 +959,14 @@ fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread) strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg)); else bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg)); - strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)); + if (strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)) >= + sizeof(kp->ki_tdname)) { + strlcpy(kp->ki_moretdname, + td->td_name + sizeof(kp->ki_tdname) - 1, + sizeof(kp->ki_moretdname)); + } else { + bzero(kp->ki_moretdname, sizeof(kp->ki_moretdname)); + } if (TD_ON_LOCK(td)) { kp->ki_kiflag |= KI_LOCKBLOCK; strlcpy(kp->ki_lockname, td->td_lockname, @@ -1180,6 +1187,7 @@ freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32) bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1); bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1); bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1); + bcopy(ki->ki_moretdname, ki32->ki_moretdname, MAXCOMLEN - TDNAMLEN + 1); CP(*ki, *ki32, ki_flag2); CP(*ki, *ki32, ki_fibnum); CP(*ki, *ki32, ki_cr_flags); diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c index 334d83d..8559840 100644 --- a/sys/kern/kern_rwlock.c +++ b/sys/kern/kern_rwlock.c @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/rwlock.h> #include <sys/sched.h> +#include <sys/smp.h> #include <sys/sysctl.h> #include <sys/systm.h> #include <sys/turnstile.h> @@ -66,15 +67,6 @@ PMC_SOFT_DECLARE( , , lock, failed); */ #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock)) -#ifdef ADAPTIVE_RWLOCKS -static int rowner_retries = 10; -static int rowner_loops = 10000; -static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, - "rwlock debugging"); -SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); -SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); -#endif - #ifdef DDB #include <ddb/ddb.h> @@ -101,6 +93,42 @@ struct lock_class lock_class_rw = { #endif }; +#ifdef ADAPTIVE_RWLOCKS +static int rowner_retries = 10; +static int rowner_loops = 10000; +static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, + "rwlock debugging"); +SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); +SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); + +static struct lock_delay_config rw_delay = { + .initial = 1000, + .step = 500, + .min = 100, + .max = 5000, +}; + +SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_initial, CTLFLAG_RW, &rw_delay.initial, + 0, ""); +SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_step, CTLFLAG_RW, &rw_delay.step, + 0, ""); +SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_min, CTLFLAG_RW, &rw_delay.min, + 0, ""); +SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max, + 0, ""); + +static void +rw_delay_sysinit(void *dummy) +{ + + rw_delay.initial = mp_ncpus * 25; + rw_delay.step = (mp_ncpus * 25) / 2; + rw_delay.min = mp_ncpus * 5; + rw_delay.max = mp_ncpus * 25 * 10; +} +LOCK_DELAY_SYSINIT(rw_delay_sysinit); +#endif + /* * Return a pointer to the owning thread if the lock is write-locked or * NULL if the lock is unlocked or read-locked. @@ -355,10 +383,12 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) int contested = 0; #endif uintptr_t v; +#if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) + struct lock_delay_arg lda; +#endif #ifdef KDTRACE_HOOKS uintptr_t state; - uint64_t spin_cnt = 0; - uint64_t sleep_cnt = 0; + u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif @@ -366,6 +396,11 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) if (SCHEDULER_STOPPED()) return; +#if defined(ADAPTIVE_RWLOCKS) + lock_delay_arg_init(&lda, &rw_delay); +#elif defined(KDTRACE_HOOKS) + lock_delay_arg_init(&lda, NULL); +#endif rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), @@ -383,9 +418,6 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) state = rw->rw_lock; #endif for (;;) { -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif /* * Handle the easy case. If no other thread has a write * lock, then try to bump up the count of read locks. Note @@ -414,6 +446,9 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) } continue; } +#ifdef KDTRACE_HOOKS + lda.spin_cnt++; +#endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif @@ -437,12 +472,8 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == - owner && TD_IS_RUNNING(owner)) { - cpu_spinwait(); -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif - } + owner && TD_IS_RUNNING(owner)) + lock_delay(&lda); KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; @@ -458,6 +489,9 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) break; cpu_spinwait(); } +#ifdef KDTRACE_HOOKS + lda.spin_cnt += rowner_loops - i; +#endif KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); if (i != rowner_loops) @@ -549,7 +583,7 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); /* Record only the loops spinning and not sleeping. */ - if (spin_cnt > sleep_cnt) + if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(LS_RW_RLOCK_SPIN, rw, all_time - sleep_time, LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); @@ -737,10 +771,12 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, uint64_t waittime = 0; int contested = 0; #endif +#if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) + struct lock_delay_arg lda; +#endif #ifdef KDTRACE_HOOKS uintptr_t state; - uint64_t spin_cnt = 0; - uint64_t sleep_cnt = 0; + u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif @@ -748,6 +784,11 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, if (SCHEDULER_STOPPED()) return; +#if defined(ADAPTIVE_RWLOCKS) + lock_delay_arg_init(&lda, &rw_delay); +#elif defined(KDTRACE_HOOKS) + lock_delay_arg_init(&lda, NULL); +#endif rw = rwlock2rw(c); if (rw_wlocked(rw)) { @@ -768,9 +809,11 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, all_time -= lockstat_nsecs(&rw->lock_object); state = rw->rw_lock; #endif - while (!_rw_write_lock(rw, tid)) { + for (;;) { + if (rw->rw_lock == RW_UNLOCKED && _rw_write_lock(rw, tid)) + break; #ifdef KDTRACE_HOOKS - spin_cnt++; + lda.spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); @@ -793,12 +836,8 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && - TD_IS_RUNNING(owner)) { - cpu_spinwait(); -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif - } + TD_IS_RUNNING(owner)) + lock_delay(&lda); KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; @@ -823,7 +862,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); #ifdef KDTRACE_HOOKS - spin_cnt += rowner_loops - i; + lda.spin_cnt += rowner_loops - i; #endif if (i != rowner_loops) continue; @@ -913,9 +952,9 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); /* Record only the loops spinning and not sleeping. */ - if (spin_cnt > sleep_cnt) + if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(LS_RW_WLOCK_SPIN, rw, all_time - sleep_time, - LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, + LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); #endif LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested, diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c index 952f7d4..5d418c2 100644 --- a/sys/kern/kern_sx.c +++ b/sys/kern/kern_sx.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/kdb.h> +#include <sys/kernel.h> #include <sys/ktr.h> #include <sys/lock.h> #include <sys/mutex.h> @@ -54,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sched.h> #include <sys/sleepqueue.h> #include <sys/sx.h> +#include <sys/smp.h> #include <sys/sysctl.h> #if defined(SMP) && !defined(NO_ADAPTIVE_SX) @@ -147,6 +149,33 @@ static u_int asx_loops = 10000; static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging"); SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, ""); SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, ""); + +static struct lock_delay_config sx_delay = { + .initial = 1000, + .step = 500, + .min = 100, + .max = 5000, +}; + +SYSCTL_INT(_debug_sx, OID_AUTO, delay_initial, CTLFLAG_RW, &sx_delay.initial, + 0, ""); +SYSCTL_INT(_debug_sx, OID_AUTO, delay_step, CTLFLAG_RW, &sx_delay.step, + 0, ""); +SYSCTL_INT(_debug_sx, OID_AUTO, delay_min, CTLFLAG_RW, &sx_delay.min, + 0, ""); +SYSCTL_INT(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max, + 0, ""); + +static void +sx_delay_sysinit(void *dummy) +{ + + sx_delay.initial = mp_ncpus * 25; + sx_delay.step = (mp_ncpus * 25) / 2; + sx_delay.min = mp_ncpus * 5; + sx_delay.max = mp_ncpus * 25 * 10; +} +LOCK_DELAY_SYSINIT(sx_delay_sysinit); #endif void @@ -516,10 +545,12 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, int contested = 0; #endif int error = 0; +#if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS) + struct lock_delay_arg lda; +#endif #ifdef KDTRACE_HOOKS uintptr_t state; - uint64_t spin_cnt = 0; - uint64_t sleep_cnt = 0; + u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif @@ -527,6 +558,12 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, if (SCHEDULER_STOPPED()) return (0); +#if defined(ADAPTIVE_SX) + lock_delay_arg_init(&lda, &sx_delay); +#elif defined(KDTRACE_HOOKS) + lock_delay_arg_init(&lda, NULL); +#endif + /* If we already hold an exclusive lock, then recurse. */ if (sx_xlocked(sx)) { KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0, @@ -547,9 +584,12 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, all_time -= lockstat_nsecs(&sx->lock_object); state = sx->sx_lock; #endif - while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) { + for (;;) { + if (sx->sx_lock == SX_LOCK_UNLOCKED && + atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) + break; #ifdef KDTRACE_HOOKS - spin_cnt++; + lda.spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); @@ -578,12 +618,8 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, sx->lock_object.lo_name); GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && - TD_IS_RUNNING(owner)) { - cpu_spinwait(); -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif - } + TD_IS_RUNNING(owner)) + lock_delay(&lda); KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; @@ -605,7 +641,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, break; cpu_spinwait(); #ifdef KDTRACE_HOOKS - spin_cnt++; + lda.spin_cnt++; #endif } KTR_STATE0(KTR_SCHED, "thread", @@ -725,7 +761,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, LOCKSTAT_RECORD4(LS_SX_XLOCK_BLOCK, sx, sleep_time, LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); - if (spin_cnt > sleep_cnt) + if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(LS_SX_XLOCK_SPIN, sx, all_time - sleep_time, LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); @@ -818,10 +854,12 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) #endif uintptr_t x; int error = 0; +#if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS) + struct lock_delay_arg lda; +#endif #ifdef KDTRACE_HOOKS uintptr_t state; - uint64_t spin_cnt = 0; - uint64_t sleep_cnt = 0; + u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif @@ -829,6 +867,11 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) if (SCHEDULER_STOPPED()) return (0); +#if defined(ADAPTIVE_SX) + lock_delay_arg_init(&lda, &sx_delay); +#elif defined(KDTRACE_HOOKS) + lock_delay_arg_init(&lda, NULL); +#endif #ifdef KDTRACE_HOOKS state = sx->sx_lock; all_time -= lockstat_nsecs(&sx->lock_object); @@ -840,7 +883,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) */ for (;;) { #ifdef KDTRACE_HOOKS - spin_cnt++; + lda.spin_cnt++; #endif x = sx->sx_lock; @@ -888,12 +931,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) "lockname:\"%s\"", sx->lock_object.lo_name); GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && - TD_IS_RUNNING(owner)) { -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif - cpu_spinwait(); - } + TD_IS_RUNNING(owner)) + lock_delay(&lda); KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; @@ -989,7 +1028,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) LOCKSTAT_RECORD4(LS_SX_SLOCK_BLOCK, sx, sleep_time, LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); - if (spin_cnt > sleep_cnt) + if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(LS_SX_SLOCK_SPIN, sx, all_time - sleep_time, LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c index 74050ff..63e4ed0 100644 --- a/sys/kern/kern_thr.c +++ b/sys/kern/kern_thr.c @@ -569,8 +569,11 @@ sys_thr_set_name(struct thread *td, struct thr_set_name_args *uap) error = 0; name[0] = '\0'; if (uap->name != NULL) { - error = copyinstr(uap->name, name, sizeof(name), - NULL); + error = copyinstr(uap->name, name, sizeof(name), NULL); + if (error == ENAMETOOLONG) { + error = copyin(uap->name, name, sizeof(name) - 1); + name[sizeof(name) - 1] = '\0'; + } if (error) return (error); } diff --git a/sys/kern/subr_lock.c b/sys/kern/subr_lock.c index 8aec803..cacaf56 100644 --- a/sys/kern/subr_lock.c +++ b/sys/kern/subr_lock.c @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #endif #include <machine/cpufunc.h> +#include <machine/cpu.h> CTASSERT(LOCK_CLASS_MAX == 15); @@ -103,6 +104,34 @@ lock_destroy(struct lock_object *lock) lock->lo_flags &= ~LO_INITIALIZED; } +void +lock_delay(struct lock_delay_arg *la) +{ + u_int i, delay, backoff, min, max; + struct lock_delay_config *lc = la->config; + + delay = la->delay; + + if (delay == 0) + delay = lc->initial; + else { + delay += lc->step; + max = lc->max; + if (delay > max) + delay = max; + } + + backoff = cpu_ticks() % delay; + min = lc->min; + if (backoff < min) + backoff = min; + for (i = 0; i < backoff; i++) + cpu_spinwait(); + + la->delay = delay; + la->spin_cnt += backoff; +} + #ifdef DDB DB_SHOW_COMMAND(lock, db_show_lock) { diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index f2ddf66..9f922db 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -256,6 +256,43 @@ statfs_scale_blocks(struct statfs *sf, long max_size) sf->f_bavail >>= shift; } +static int +kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) +{ + struct statfs *sp; + int error; + + if (mp == NULL) + return (EBADF); + error = vfs_busy(mp, 0); + vfs_rel(mp); + if (error != 0) + return (error); +#ifdef MAC + error = mac_mount_check_stat(td->td_ucred, mp); + if (error != 0) + goto out; +#endif + /* + * Set these in case the underlying filesystem fails to do so. + */ + sp = &mp->mnt_stat; + sp->f_version = STATFS_VERSION; + sp->f_namemax = NAME_MAX; + sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + error = VFS_STATFS(mp, sp); + if (error != 0) + goto out; + *buf = *sp; + if (priv_check(td, PRIV_VFS_GENERATION)) { + buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; + prison_enforce_statfs(td->td_ucred, mp, buf); + } +out: + vfs_unbusy(mp); + return (error); +} + /* * Get filesystem statistics. */ @@ -287,7 +324,6 @@ kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, struct statfs *buf) { struct mount *mp; - struct statfs *sp, sb; struct nameidata nd; int error; @@ -300,35 +336,7 @@ kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, vfs_ref(mp); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_vp); - error = vfs_busy(mp, 0); - vfs_rel(mp); - if (error != 0) - return (error); -#ifdef MAC - error = mac_mount_check_stat(td->td_ucred, mp); - if (error != 0) - goto out; -#endif - /* - * Set these in case the underlying filesystem fails to do so. - */ - sp = &mp->mnt_stat; - sp->f_version = STATFS_VERSION; - sp->f_namemax = NAME_MAX; - sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - error = VFS_STATFS(mp, sp); - if (error != 0) - goto out; - if (priv_check(td, PRIV_VFS_GENERATION)) { - bcopy(sp, &sb, sizeof(sb)); - sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; - prison_enforce_statfs(td->td_ucred, mp, &sb); - sp = &sb; - } - *buf = *sp; -out: - vfs_unbusy(mp); - return (error); + return (kern_do_statfs(td, mp, buf)); } /* @@ -362,7 +370,6 @@ kern_fstatfs(struct thread *td, int fd, struct statfs *buf) { struct file *fp; struct mount *mp; - struct statfs *sp, sb; struct vnode *vp; cap_rights_t rights; int error; @@ -378,44 +385,11 @@ kern_fstatfs(struct thread *td, int fd, struct statfs *buf) AUDIT_ARG_VNODE1(vp); #endif mp = vp->v_mount; - if (mp) + if (mp != NULL) vfs_ref(mp); VOP_UNLOCK(vp, 0); fdrop(fp, td); - if (mp == NULL) { - error = EBADF; - goto out; - } - error = vfs_busy(mp, 0); - vfs_rel(mp); - if (error != 0) - return (error); -#ifdef MAC - error = mac_mount_check_stat(td->td_ucred, mp); - if (error != 0) - goto out; -#endif - /* - * Set these in case the underlying filesystem fails to do so. - */ - sp = &mp->mnt_stat; - sp->f_version = STATFS_VERSION; - sp->f_namemax = NAME_MAX; - sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - error = VFS_STATFS(mp, sp); - if (error != 0) - goto out; - if (priv_check(td, PRIV_VFS_GENERATION)) { - bcopy(sp, &sb, sizeof(sb)); - sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; - prison_enforce_statfs(td->td_ucred, mp, &sb); - sp = &sb; - } - *buf = *sp; -out: - if (mp) - vfs_unbusy(mp); - return (error); + return (kern_do_statfs(td, mp, buf)); } /* |