From 12adc443d67286deeee69e764d979c963403497d Mon Sep 17 00:00:00 2001 From: jeff Date: Sat, 15 Dec 2007 23:13:31 +0000 Subject: - Re-implement lock profiling in such a way that it no longer breaks the ABI when enabled. There is no longer an embedded lock_profile_object in each lock. Instead a list of lock_profile_objects is kept per-thread for each lock it may own. The cnt_hold statistic is now always 0 to facilitate this. - Support shared locking by tracking individual lock instances and statistics in the per-thread per-instance lock_profile_object. - Make the lock profiling hash table a per-cpu singly linked list with a per-cpu static lock_prof allocator. This removes the need for an array of spinlocks and reduces cache contention between cores. - Use a seperate hash for spinlocks and other locks so that only a critical_enter() is required and not a spinlock_enter() to modify the per-cpu tables. - Count time spent spinning in the lock statistics. - Remove the LOCK_PROFILE_SHARED option as it is always supported now. - Specifically drop and release the scheduler locks in both schedulers since we track owners now. In collaboration with: Kip Macy Sponsored by: Nokia --- sys/conf/options | 1 - sys/kern/kern_mutex.c | 26 +- sys/kern/kern_rwlock.c | 31 +-- sys/kern/kern_sx.c | 34 +-- sys/kern/kern_thread.c | 2 + sys/kern/sched_4bsd.c | 7 +- sys/kern/sched_ule.c | 6 + sys/kern/subr_lock.c | 656 ++++++++++++++++++++++++++++++++----------------- sys/sys/_lock.h | 25 -- sys/sys/lock_profile.h | 139 ++--------- sys/sys/proc.h | 2 + sys/sys/sx.h | 4 +- 12 files changed, 500 insertions(+), 433 deletions(-) diff --git a/sys/conf/options b/sys/conf/options index 6a00e35..e76c61e 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -540,7 +540,6 @@ MUTEX_DEBUG opt_global.h MUTEX_NOINLINE opt_global.h LOCK_PROFILING opt_global.h LOCK_PROFILING_FAST opt_global.h -LOCK_PROFILING_SHARED opt_global.h MSIZE opt_global.h REGRESSION opt_global.h RESTARTABLE_PANICS opt_global.h diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 727871d..81be32d 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -123,20 +123,6 @@ struct lock_class lock_class_mtx_spin = { struct mtx blocked_lock; struct mtx Giant; -#ifdef LOCK_PROFILING -static inline void lock_profile_init(void) -{ - int i; - /* Initialize the mutex profiling locks */ - for (i = 0; i < LPROF_LOCK_SIZE; i++) { - mtx_init(&lprof_locks[i], "mprof lock", - NULL, MTX_SPIN|MTX_QUIET|MTX_NOPROFILE); - } -} -#else -static inline void lock_profile_init(void) {;} -#endif - void assert_mtx(struct lock_object *lock, int what) { @@ -425,7 +411,7 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file, } #endif lock_profile_obtain_lock_success(&m->lock_object, contested, - waittime, (file), (line)); + waittime, file, line); } static void @@ -514,7 +500,8 @@ retry: m->mtx_recurse++; break; } - lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); + lock_profile_obtain_lock_failed(&m->lock_object, + &contested, &waittime); /* Give interrupts a chance while we spin. */ spinlock_exit(); while (m->mtx_lock != MTX_UNOWNED) { @@ -535,8 +522,9 @@ retry: break; _rel_spin_lock(m); /* does spinlock_exit() */ } - lock_profile_obtain_lock_success(&m->lock_object, contested, - waittime, (file), (line)); + if (m->mtx_recurse == 0) + lock_profile_obtain_lock_success(&m->lock_object, contested, + waittime, (file), (line)); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); } @@ -794,8 +782,6 @@ mutex_init(void) mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN | MTX_RECURSE); mtx_init(&devmtx, "cdev", NULL, MTX_DEF); mtx_lock(&Giant); - - lock_profile_init(); } #ifdef DDB diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c index 4b8b4ca..43c1d3c 100644 --- a/sys/kern/kern_rwlock.c +++ b/sys/kern/kern_rwlock.c @@ -227,10 +227,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line) #ifdef ADAPTIVE_RWLOCKS volatile struct thread *owner; #endif -#ifdef LOCK_PROFILING_SHARED uint64_t waittime = 0; int contested = 0; -#endif uintptr_t x; KASSERT(rw->rw_lock != RW_DESTROYED, @@ -273,12 +271,6 @@ _rw_rlock(struct rwlock *rw, const char *file, int line) MPASS((x & RW_LOCK_READ_WAITERS) == 0); if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) { -#ifdef LOCK_PROFILING_SHARED - if (RW_READERS(x) == 0) - lock_profile_obtain_lock_success( - &rw->lock_object, contested, - waittime, file, line); -#endif if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeed %p -> %p", __func__, @@ -289,6 +281,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line) cpu_spinwait(); continue; } + lock_profile_obtain_lock_failed(&rw->lock_object, + &contested, &waittime); #ifdef ADAPTIVE_RWLOCKS /* @@ -301,10 +295,6 @@ _rw_rlock(struct rwlock *rw, const char *file, int line) if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); -#ifdef LOCK_PROFILING_SHARED - lock_profile_obtain_lock_failed(&rw->lock_object, - &contested, &waittime); -#endif while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && TD_IS_RUNNING(owner)) cpu_spinwait(); @@ -369,10 +359,6 @@ _rw_rlock(struct rwlock *rw, const char *file, int line) if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); -#ifdef LOCK_PROFILING_SHARED - lock_profile_obtain_lock_failed(&rw->lock_object, &contested, - &waittime); -#endif turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", @@ -384,7 +370,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line) * however. turnstiles don't like owners changing between calls to * turnstile_wait() currently. */ - + lock_profile_obtain_lock_success( &rw->lock_object, contested, + waittime, file, line); LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); WITNESS_LOCK(&rw->lock_object, 0, file, line); curthread->td_locks++; @@ -431,9 +418,6 @@ _rw_runlock(struct rwlock *rw, const char *file, int line) */ KASSERT(!(x & RW_LOCK_READ_WAITERS), ("%s: waiting readers", __func__)); -#ifdef LOCK_PROFILING_SHARED - lock_profile_release_lock(&rw->lock_object); -#endif /* * If there aren't any waiters for a write lock, then try @@ -510,6 +494,7 @@ _rw_runlock(struct rwlock *rw, const char *file, int line) turnstile_chain_unlock(&rw->lock_object); break; } + lock_profile_release_lock(&rw->lock_object); } /* @@ -544,6 +529,8 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); while (!_rw_write_lock(rw, tid)) { + lock_profile_obtain_lock_failed(&rw->lock_object, + &contested, &waittime); #ifdef ADAPTIVE_RWLOCKS /* * If the lock is write locked and the owner is @@ -556,8 +543,6 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); - lock_profile_obtain_lock_failed(&rw->lock_object, - &contested, &waittime); while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && TD_IS_RUNNING(owner)) cpu_spinwait(); @@ -641,8 +626,6 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); - lock_profile_obtain_lock_failed(&rw->lock_object, &contested, - &waittime); turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c index 1e3f135..bc172e5 100644 --- a/sys/kern/kern_sx.c +++ b/sys/kern/kern_sx.c @@ -302,11 +302,8 @@ _sx_sunlock(struct sx *sx, const char *file, int line) curthread->td_locks--; WITNESS_UNLOCK(&sx->lock_object, 0, file, line); LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line); -#ifdef LOCK_PROFILING_SHARED - if (SX_SHARERS(sx->sx_lock) == 1) - lock_profile_release_lock(&sx->lock_object); -#endif __sx_sunlock(sx, file, line); + lock_profile_release_lock(&sx->lock_object); } void @@ -450,6 +447,8 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, sx->lock_object.lo_name, (void *)sx->sx_lock, file, line); while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) { + lock_profile_obtain_lock_failed(&sx->lock_object, &contested, + &waittime); #ifdef ADAPTIVE_SX /* * If the lock is write locked and the owner is @@ -467,8 +466,6 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, "%s: spinning on %p held by %p", __func__, sx, owner); GIANT_SAVE(); - lock_profile_obtain_lock_failed( - &sx->lock_object, &contested, &waittime); while (SX_OWNER(sx->sx_lock) == x && TD_IS_RUNNING(owner)) cpu_spinwait(); @@ -555,8 +552,6 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, __func__, sx); GIANT_SAVE(); - lock_profile_obtain_lock_failed(&sx->lock_object, &contested, - &waittime); sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name, SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ? SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE); @@ -648,10 +643,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) #ifdef ADAPTIVE_SX volatile struct thread *owner; #endif -#ifdef LOCK_PROFILING_SHARED uint64_t waittime = 0; int contested = 0; -#endif uintptr_t x; int error = 0; @@ -672,12 +665,6 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) MPASS(!(x & SX_LOCK_SHARED_WAITERS)); if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) { -#ifdef LOCK_PROFILING_SHARED - if (SX_SHARERS(x) == 0) - lock_profile_obtain_lock_success( - &sx->lock_object, contested, - waittime, file, line); -#endif if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeed %p -> %p", __func__, @@ -687,6 +674,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) } continue; } + lock_profile_obtain_lock_failed(&sx->lock_object, &contested, + &waittime); #ifdef ADAPTIVE_SX /* @@ -694,7 +683,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) * the owner stops running or the state of the lock * changes. */ - else if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) { + if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) { x = SX_OWNER(x); owner = (struct thread *)x; if (TD_IS_RUNNING(owner)) { @@ -703,10 +692,6 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) "%s: spinning on %p held by %p", __func__, sx, owner); GIANT_SAVE(); -#ifdef LOCK_PROFILING_SHARED - lock_profile_obtain_lock_failed( - &sx->lock_object, &contested, &waittime); -#endif while (SX_OWNER(sx->sx_lock) == x && TD_IS_RUNNING(owner)) cpu_spinwait(); @@ -772,10 +757,6 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) __func__, sx); GIANT_SAVE(); -#ifdef LOCK_PROFILING_SHARED - lock_profile_obtain_lock_failed(&sx->lock_object, &contested, - &waittime); -#endif sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name, SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ? SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE); @@ -795,6 +776,9 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) CTR2(KTR_LOCK, "%s: %p resuming from sleep queue", __func__, sx); } + if (error == 0) + lock_profile_obtain_lock_success(&sx->lock_object, contested, + waittime, file, line); GIANT_RESTORE(); return (error); diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index e176b87..93ff5a7 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -555,6 +555,8 @@ thread_link(struct thread *td, struct proc *p) td->td_flags = TDF_INMEM; LIST_INIT(&td->td_contested); + LIST_INIT(&td->td_lprof[0]); + LIST_INIT(&td->td_lprof[1]); sigqueue_init(&td->td_sigqueue, p); callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index f6e702e..e1e5c91 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -878,9 +878,11 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); #endif - /* I feel sleepy */ + lock_profile_release_lock(&sched_lock.lock_object); cpu_switch(td, newtd, td->td_lock); + lock_profile_obtain_lock_success(&sched_lock.lock_object, + 0, 0, __FILE__, __LINE__); /* * Where am I? What year is it? * We are in the same thread that went to sleep above, @@ -1375,6 +1377,7 @@ sched_throw(struct thread *td) mtx_lock_spin(&sched_lock); spinlock_exit(); } else { + lock_profile_release_lock(&sched_lock.lock_object); MPASS(td->td_lock == &sched_lock); } mtx_assert(&sched_lock, MA_OWNED); @@ -1394,6 +1397,8 @@ sched_fork_exit(struct thread *td) */ td->td_oncpu = PCPU_GET(cpuid); sched_lock.mtx_lock = (uintptr_t)td; + lock_profile_obtain_lock_success(&sched_lock.lock_object, + 0, 0, __FILE__, __LINE__); THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED); } diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index e9d9468..6841bab 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -1894,6 +1894,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); #endif + lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object); TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd; cpu_switch(td, newtd, mtx); /* @@ -1903,6 +1904,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) */ cpuid = PCPU_GET(cpuid); tdq = TDQ_CPU(cpuid); + lock_profile_obtain_lock_success( + &TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__); #ifdef HWPMC_HOOKS if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); @@ -2618,6 +2621,7 @@ sched_throw(struct thread *td) } else { MPASS(td->td_lock == TDQ_LOCKPTR(tdq)); tdq_load_rem(tdq, td->td_sched); + lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object); } KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count")); newtd = choosethread(); @@ -2650,6 +2654,8 @@ sched_fork_exit(struct thread *td) MPASS(td->td_lock == TDQ_LOCKPTR(tdq)); td->td_oncpu = cpuid; TDQ_LOCK_ASSERT(tdq, MA_OWNED | MA_NOTRECURSED); + lock_profile_obtain_lock_success( + &TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__); } static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, diff --git a/sys/kern/subr_lock.c b/sys/kern/subr_lock.c index ebb3c35..19f3639 100644 --- a/sys/kern/subr_lock.c +++ b/sys/kern/subr_lock.c @@ -40,17 +40,24 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include +#include +#include +#include +#include #include +#include #include -#include #ifdef DDB #include #endif +#include + CTASSERT(LOCK_CLASS_MAX == 15); struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = { @@ -62,136 +69,6 @@ struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = { &lock_class_lockmgr, }; -#ifdef LOCK_PROFILING -#include - -SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging"); -SYSCTL_NODE(_debug_lock, OID_AUTO, prof, CTLFLAG_RD, NULL, "lock profiling"); -int lock_prof_enable = 0; -SYSCTL_INT(_debug_lock_prof, OID_AUTO, enable, CTLFLAG_RW, - &lock_prof_enable, 0, "Enable lock profiling"); - -/* - * lprof_buf is a static pool of profiling records to avoid possible - * reentrance of the memory allocation functions. - * - * Note: NUM_LPROF_BUFFERS must be smaller than LPROF_HASH_SIZE. - */ -struct lock_prof lprof_buf[LPROF_HASH_SIZE]; -static int allocated_lprof_buf; -struct mtx lprof_locks[LPROF_LOCK_SIZE]; - - -/* SWAG: sbuf size = avg stat. line size * number of locks */ -#define LPROF_SBUF_SIZE 256 * 400 - -static int lock_prof_acquisitions; -SYSCTL_INT(_debug_lock_prof, OID_AUTO, acquisitions, CTLFLAG_RD, - &lock_prof_acquisitions, 0, "Number of lock acquistions recorded"); -static int lock_prof_records; -SYSCTL_INT(_debug_lock_prof, OID_AUTO, records, CTLFLAG_RD, - &lock_prof_records, 0, "Number of profiling records"); -static int lock_prof_maxrecords = LPROF_HASH_SIZE; -SYSCTL_INT(_debug_lock_prof, OID_AUTO, maxrecords, CTLFLAG_RD, - &lock_prof_maxrecords, 0, "Maximum number of profiling records"); -static int lock_prof_rejected; -SYSCTL_INT(_debug_lock_prof, OID_AUTO, rejected, CTLFLAG_RD, - &lock_prof_rejected, 0, "Number of rejected profiling records"); -static int lock_prof_hashsize = LPROF_HASH_SIZE; -SYSCTL_INT(_debug_lock_prof, OID_AUTO, hashsize, CTLFLAG_RD, - &lock_prof_hashsize, 0, "Hash size"); -static int lock_prof_collisions = 0; -SYSCTL_INT(_debug_lock_prof, OID_AUTO, collisions, CTLFLAG_RD, - &lock_prof_collisions, 0, "Number of hash collisions"); - -#ifndef USE_CPU_NANOSECONDS -u_int64_t -nanoseconds(void) -{ - struct timespec tv; - - nanotime(&tv); - return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec); -} -#endif - -static int -dump_lock_prof_stats(SYSCTL_HANDLER_ARGS) -{ - struct sbuf *sb; - int error, i; - static int multiplier = 1; - const char *p; - - if (allocated_lprof_buf == 0) - return (SYSCTL_OUT(req, "No locking recorded", - sizeof("No locking recorded"))); - -retry_sbufops: - sb = sbuf_new(NULL, NULL, LPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN); - sbuf_printf(sb, "\n%6s %12s %12s %11s %5s %5s %12s %12s %s\n", - "max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name"); - for (i = 0; i < LPROF_HASH_SIZE; ++i) { - if (lprof_buf[i].name == NULL) - continue; - for (p = lprof_buf[i].file; - p != NULL && strncmp(p, "../", 3) == 0; p += 3) - /* nothing */ ; - sbuf_printf(sb, "%6ju %12ju %12ju %11ju %5ju %5ju %12ju %12ju %s:%d (%s:%s)\n", - lprof_buf[i].cnt_max / 1000, - lprof_buf[i].cnt_tot / 1000, - lprof_buf[i].cnt_wait / 1000, - lprof_buf[i].cnt_cur, - lprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 : - lprof_buf[i].cnt_tot / (lprof_buf[i].cnt_cur * 1000), - lprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 : - lprof_buf[i].cnt_wait / (lprof_buf[i].cnt_cur * 1000), - lprof_buf[i].cnt_contest_holding, - lprof_buf[i].cnt_contest_locking, - p, lprof_buf[i].line, - lprof_buf[i].type, - lprof_buf[i].name); - if (sbuf_overflowed(sb)) { - sbuf_delete(sb); - multiplier++; - goto retry_sbufops; - } - } - - sbuf_finish(sb); - error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); - sbuf_delete(sb); - return (error); -} -static int -reset_lock_prof_stats(SYSCTL_HANDLER_ARGS) -{ - int error, v; - - if (allocated_lprof_buf == 0) - return (0); - - v = 0; - error = sysctl_handle_int(oidp, &v, 0, req); - if (error) - return (error); - if (req->newptr == NULL) - return (error); - if (v == 0) - return (0); - - bzero(lprof_buf, LPROF_HASH_SIZE*sizeof(*lprof_buf)); - allocated_lprof_buf = 0; - return (0); -} - -SYSCTL_PROC(_debug_lock_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD, - NULL, 0, dump_lock_prof_stats, "A", "Lock profiling statistics"); - -SYSCTL_PROC(_debug_lock_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW, - NULL, 0, reset_lock_prof_stats, "I", "Reset lock profiling statistics"); -#endif - void lock_init(struct lock_object *lock, struct lock_class *class, const char *name, const char *type, int flags) @@ -216,7 +93,6 @@ lock_init(struct lock_object *lock, struct lock_class *class, const char *name, lock->lo_flags |= flags | LO_INITIALIZED; LOCK_LOG_INIT(lock, 0); WITNESS_INIT(lock); - lock_profile_object_init(lock, class, name); } void @@ -224,7 +100,6 @@ lock_destroy(struct lock_object *lock) { KASSERT(lock_initalized(lock), ("lock %p is not initialized", lock)); - lock_profile_object_destroy(lock); WITNESS_DESTROY(lock); LOCK_LOG_DESTROY(lock, 0); lock->lo_flags &= ~LO_INITIALIZED; @@ -253,17 +128,376 @@ DB_SHOW_COMMAND(lock, db_show_lock) #endif #ifdef LOCK_PROFILING -void _lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line) + +/* + * One object per-thread for each lock the thread owns. Tracks individual + * lock instances. + */ +struct lock_profile_object { + LIST_ENTRY(lock_profile_object) lpo_link; + struct lock_object *lpo_obj; + const char *lpo_file; + int lpo_line; + uint16_t lpo_ref; + uint16_t lpo_cnt; + u_int64_t lpo_acqtime; + u_int64_t lpo_waittime; + u_int lpo_contest_locking; +}; + +/* + * One lock_prof for each (file, line, lock object) triple. + */ +struct lock_prof { + SLIST_ENTRY(lock_prof) link; + const char *file; + const char *name; + int line; + int ticks; + const char *type; + uintmax_t cnt_max; + uintmax_t cnt_tot; + uintmax_t cnt_wait; + uintmax_t cnt_cur; + uintmax_t cnt_contest_locking; +}; + +SLIST_HEAD(lphead, lock_prof); + +#define LPROF_HASH_SIZE 4096 +#define LPROF_HASH_MASK (LPROF_HASH_SIZE - 1) +#define LPROF_CACHE_SIZE 4096 + +/* + * Array of objects and profs for each type of object for each cpu. Spinlocks + * are handled seperately because a thread may be preempted and acquire a + * spinlock while in the lock profiling code of a non-spinlock. In this way + * we only need a critical section to protect the per-cpu lists. + */ +struct lock_prof_type { + struct lphead lpt_lpalloc; + struct lpohead lpt_lpoalloc; + struct lphead lpt_hash[LPROF_HASH_SIZE]; + struct lock_prof lpt_prof[LPROF_CACHE_SIZE]; + struct lock_profile_object lpt_objs[LPROF_CACHE_SIZE]; +}; + +struct lock_prof_cpu { + struct lock_prof_type lpc_types[2]; /* One for spin one for other. */ +}; + +struct lock_prof_cpu *lp_cpu[MAXCPU]; + +int lock_prof_enable = 0; + +/* SWAG: sbuf size = avg stat. line size * number of locks */ +#define LPROF_SBUF_SIZE 256 * 400 + +static int lock_prof_rejected; +static int lock_prof_skipspin; +static int lock_prof_skipcount; + +#ifndef USE_CPU_NANOSECONDS +u_int64_t +nanoseconds(void) { - struct lock_profile_object *l = &lo->lo_profile_obj; + struct bintime bt; + u_int64_t ns; + + binuptime(&bt); + /* From bintime2timespec */ + ns = bt.sec * (u_int64_t)1000000000; + ns += ((uint64_t)1000000000 * (uint32_t)(bt.frac >> 32)) >> 32; + return (ns); +} +#endif - lo->lo_profile_obj.lpo_contest_holding = 0; - - if (contested) - lo->lo_profile_obj.lpo_contest_locking++; +static void +lock_prof_init_type(struct lock_prof_type *type) +{ + int i; + + SLIST_INIT(&type->lpt_lpalloc); + LIST_INIT(&type->lpt_lpoalloc); + for (i = 0; i < LPROF_CACHE_SIZE; i++) { + SLIST_INSERT_HEAD(&type->lpt_lpalloc, &type->lpt_prof[i], + link); + LIST_INSERT_HEAD(&type->lpt_lpoalloc, &type->lpt_objs[i], + lpo_link); + } +} + +static void +lock_prof_init(void *arg) +{ + int cpu; + + for (cpu = 0; cpu <= mp_maxid; cpu++) { + lp_cpu[cpu] = malloc(sizeof(*lp_cpu[cpu]), M_DEVBUF, + M_WAITOK | M_ZERO); + lock_prof_init_type(&lp_cpu[cpu]->lpc_types[0]); + lock_prof_init_type(&lp_cpu[cpu]->lpc_types[1]); + } +} +SYSINIT(lockprof, SI_SUB_SMP, SI_ORDER_ANY, lock_prof_init, NULL); + +static void +lock_prof_reset(void) +{ + struct lock_prof_cpu *lpc; + int enabled, i, cpu; + + enabled = lock_prof_enable; + lock_prof_enable = 0; + for (cpu = 0; cpu <= mp_maxid; cpu++) { + lpc = lp_cpu[cpu]; + for (i = 0; i < LPROF_CACHE_SIZE; i++) { + LIST_REMOVE(&lpc->lpc_types[0].lpt_objs[i], lpo_link); + LIST_REMOVE(&lpc->lpc_types[1].lpt_objs[i], lpo_link); + } + bzero(lpc, sizeof(*lpc)); + lock_prof_init_type(&lpc->lpc_types[0]); + lock_prof_init_type(&lpc->lpc_types[1]); + } + lock_prof_enable = enabled; +} + +static void +lock_prof_output(struct lock_prof *lp, struct sbuf *sb) +{ + const char *p; + + for (p = lp->file; p != NULL && strncmp(p, "../", 3) == 0; p += 3); + sbuf_printf(sb, + "%6ju %12ju %12ju %11ju %5ju %5ju %12ju %12ju %s:%d (%s:%s)\n", + lp->cnt_max / 1000, lp->cnt_tot / 1000, + lp->cnt_wait / 1000, lp->cnt_cur, + lp->cnt_cur == 0 ? (uintmax_t)0 : + lp->cnt_tot / (lp->cnt_cur * 1000), + lp->cnt_cur == 0 ? (uintmax_t)0 : + lp->cnt_wait / (lp->cnt_cur * 1000), + (uintmax_t)0, lp->cnt_contest_locking, + p, lp->line, lp->type, lp->name); +} + +static void +lock_prof_sum(struct lock_prof *match, struct lock_prof *dst, int hash, + int spin, int t) +{ + struct lock_prof_type *type; + struct lock_prof *l; + int cpu; + + dst->file = match->file; + dst->line = match->line; + dst->type = match->type; + dst->name = match->name; + + for (cpu = 0; cpu <= mp_maxid; cpu++) { + if (lp_cpu[cpu] == NULL) + continue; + type = &lp_cpu[cpu]->lpc_types[spin]; + SLIST_FOREACH(l, &type->lpt_hash[hash], link) { + if (l->ticks == t) + continue; + if (l->file != match->file || l->line != match->line || + l->name != match->name || l->type != match->type) + continue; + l->ticks = t; + if (l->cnt_max > dst->cnt_max) + dst->cnt_max = l->cnt_max; + dst->cnt_tot += l->cnt_tot; + dst->cnt_wait += l->cnt_wait; + dst->cnt_cur += l->cnt_cur; + dst->cnt_contest_locking += l->cnt_contest_locking; + } + } - l->lpo_filename = file; - l->lpo_lineno = line; +} + +static void +lock_prof_type_stats(struct lock_prof_type *type, struct sbuf *sb, int spin, + int t) +{ + struct lock_prof *l; + int i; + + for (i = 0; i < LPROF_HASH_SIZE; ++i) { + SLIST_FOREACH(l, &type->lpt_hash[i], link) { + struct lock_prof lp = {}; + + if (l->ticks == t) + continue; + lock_prof_sum(l, &lp, i, spin, t); + lock_prof_output(&lp, sb); + if (sbuf_overflowed(sb)) + return; + } + } +} + +static int +dump_lock_prof_stats(SYSCTL_HANDLER_ARGS) +{ + static int multiplier = 1; + struct sbuf *sb; + int error, cpu, t; + +retry_sbufops: + sb = sbuf_new(NULL, NULL, LPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN); + sbuf_printf(sb, "\n%6s %12s %12s %11s %5s %5s %12s %12s %s\n", + "max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name"); + t = ticks; + for (cpu = 0; cpu <= mp_maxid; cpu++) { + if (lp_cpu[cpu] == NULL) + continue; + lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[0], sb, 0, t); + lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[1], sb, 1, t); + if (sbuf_overflowed(sb)) { + sbuf_delete(sb); + multiplier++; + goto retry_sbufops; + } + } + + sbuf_finish(sb); + error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); + sbuf_delete(sb); + return (error); +} + +static int +enable_lock_prof(SYSCTL_HANDLER_ARGS) +{ + int error, v; + + v = lock_prof_enable; + error = sysctl_handle_int(oidp, &v, v, req); + if (error) + return (error); + if (req->newptr == NULL) + return (error); + if (v == lock_prof_enable) + return (0); + if (v == 1) + lock_prof_reset(); + lock_prof_enable = !!v; + + return (0); +} + +static int +reset_lock_prof_stats(SYSCTL_HANDLER_ARGS) +{ + int error, v; + + v = 0; + error = sysctl_handle_int(oidp, &v, 0, req); + if (error) + return (error); + if (req->newptr == NULL) + return (error); + if (v == 0) + return (0); + lock_prof_reset(); + + return (0); +} + +static struct lock_prof * +lock_profile_lookup(struct lock_object *lo, int spin, const char *file, + int line) +{ + const char *unknown = "(unknown)"; + struct lock_prof_type *type; + struct lock_prof *lp; + struct lphead *head; + const char *p; + u_int hash; + + p = file; + if (p == NULL || *p == '\0') + p = unknown; + hash = (uintptr_t)lo->lo_name * 31 + (uintptr_t)p * 31 + line; + hash &= LPROF_HASH_MASK; + type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin]; + head = &type->lpt_hash[hash]; + SLIST_FOREACH(lp, head, link) { + if (lp->line == line && lp->file == p && + lp->name == lo->lo_name) + return (lp); + + } + lp = SLIST_FIRST(&type->lpt_lpalloc); + if (lp == NULL) { + lock_prof_rejected++; + return (lp); + } + SLIST_REMOVE_HEAD(&type->lpt_lpalloc, link); + lp->file = p; + lp->line = line; + lp->type = lo->lo_type; + lp->name = lo->lo_name; + SLIST_INSERT_HEAD(&type->lpt_hash[hash], lp, link); + return (lp); +} + +static struct lock_profile_object * +lock_profile_object_lookup(struct lock_object *lo, int spin, const char *file, + int line) +{ + struct lock_profile_object *l; + struct lock_prof_type *type; + struct lpohead *head; + + head = &curthread->td_lprof[spin]; + LIST_FOREACH(l, head, lpo_link) + if (l->lpo_obj == lo && l->lpo_file == file && + l->lpo_line == line) + return (l); + critical_enter(); + type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin]; + l = LIST_FIRST(&type->lpt_lpoalloc); + if (l == NULL) { + lock_prof_rejected++; + critical_exit(); + return (NULL); + } + LIST_REMOVE(l, lpo_link); + critical_exit(); + l->lpo_obj = lo; + l->lpo_file = file; + l->lpo_line = line; + l->lpo_cnt = 0; + LIST_INSERT_HEAD(head, l, lpo_link); + + return (l); +} + +void +lock_profile_obtain_lock_success(struct lock_object *lo, int contested, + uint64_t waittime, const char *file, int line) +{ + static int lock_prof_count; + struct lock_profile_object *l; + int spin; + + /* don't reset the timer when/if recursing */ + if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE)) + return; + if (lock_prof_skipcount && + (++lock_prof_count % lock_prof_skipcount) == 0) + return; + spin = LOCK_CLASS(lo) == &lock_class_mtx_spin; + if (spin && lock_prof_skipspin == 1) + return; + l = lock_profile_object_lookup(lo, spin, file, line); + if (l == NULL) + return; + l->lpo_cnt++; + if (++l->lpo_ref > 1) + return; + l->lpo_contest_locking = contested; l->lpo_acqtime = nanoseconds(); if (waittime && (l->lpo_acqtime > waittime)) l->lpo_waittime = l->lpo_acqtime - waittime; @@ -271,87 +505,65 @@ void _lock_profile_obtain_lock_success(struct lock_object *lo, int contested, ui l->lpo_waittime = 0; } -void _lock_profile_release_lock(struct lock_object *lo) +void +lock_profile_release_lock(struct lock_object *lo) { - struct lock_profile_object *l = &lo->lo_profile_obj; - - if (l->lpo_acqtime) { - const char *unknown = "(unknown)"; - u_int64_t acqtime, now, waittime; - struct lock_prof *mpp; - u_int hash; - const char *p = l->lpo_filename; - int collision = 0; - - now = nanoseconds(); - acqtime = l->lpo_acqtime; - waittime = l->lpo_waittime; - if (now <= acqtime) - return; - if (p == NULL || *p == '\0') - p = unknown; - hash = (l->lpo_namehash * 31 * 31 + (uintptr_t)p * 31 + l->lpo_lineno) & LPROF_HASH_MASK; - mpp = &lprof_buf[hash]; - while (mpp->name != NULL) { - if (mpp->line == l->lpo_lineno && - mpp->file == p && - mpp->namehash == l->lpo_namehash) - break; - /* If the lprof_hash entry is allocated to someone - * else, try the next one - */ - collision = 1; - hash = (hash + 1) & LPROF_HASH_MASK; - mpp = &lprof_buf[hash]; - } - if (mpp->name == NULL) { - int buf; - - buf = atomic_fetchadd_int(&allocated_lprof_buf, 1); - /* Just exit if we cannot get a trace buffer */ - if (buf >= LPROF_HASH_SIZE) { - ++lock_prof_rejected; - return; - } - mpp->file = p; - mpp->line = l->lpo_lineno; - mpp->namehash = l->lpo_namehash; - mpp->type = l->lpo_type; - mpp->name = lo->lo_name; - - if (collision) - ++lock_prof_collisions; - - /* - * We might have raced someone else but who cares, - * they'll try again next time - */ - ++lock_prof_records; - } - LPROF_LOCK(hash); - /* - * Record if the lock has been held longer now than ever - * before. - */ - if (now - acqtime > mpp->cnt_max) - mpp->cnt_max = now - acqtime; - mpp->cnt_tot += now - acqtime; - mpp->cnt_wait += waittime; - mpp->cnt_cur++; - /* - * There's a small race, really we should cmpxchg - * 0 with the current value, but that would bill - * the contention to the wrong lock instance if - * it followed this also. - */ - mpp->cnt_contest_holding += l->lpo_contest_holding; - mpp->cnt_contest_locking += l->lpo_contest_locking; - LPROF_UNLOCK(hash); - - } - l->lpo_acqtime = 0; - l->lpo_waittime = 0; - l->lpo_contest_locking = 0; - l->lpo_contest_holding = 0; + struct lock_profile_object *l; + struct lock_prof_type *type; + struct lock_prof *lp; + u_int64_t holdtime; + struct lpohead *head; + int spin; + + if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE)) + return; + spin = LOCK_CLASS(lo) == &lock_class_mtx_spin; + head = &curthread->td_lprof[spin]; + critical_enter(); + LIST_FOREACH(l, head, lpo_link) + if (l->lpo_obj == lo) + break; + if (l == NULL) + goto out; + if (--l->lpo_ref > 0) + goto out; + lp = lock_profile_lookup(lo, spin, l->lpo_file, l->lpo_line); + if (lp == NULL) + goto release; + holdtime = nanoseconds() - l->lpo_acqtime; + if (holdtime < 0) + goto release; + /* + * Record if the lock has been held longer now than ever + * before. + */ + if (holdtime > lp->cnt_max) + lp->cnt_max = holdtime; + lp->cnt_tot += holdtime; + lp->cnt_wait += l->lpo_waittime; + lp->cnt_contest_locking += l->lpo_contest_locking; + lp->cnt_cur += l->lpo_cnt; +release: + LIST_REMOVE(l, lpo_link); + type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin]; + LIST_INSERT_HEAD(&type->lpt_lpoalloc, l, lpo_link); +out: + critical_exit(); } + +SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging"); +SYSCTL_NODE(_debug_lock, OID_AUTO, prof, CTLFLAG_RD, NULL, "lock profiling"); +SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipspin, CTLFLAG_RW, + &lock_prof_skipspin, 0, "Skip profiling on spinlocks."); +SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipcount, CTLFLAG_RW, + &lock_prof_skipcount, 0, "Sample approximately every N lock acquisitions."); +SYSCTL_INT(_debug_lock_prof, OID_AUTO, rejected, CTLFLAG_RD, + &lock_prof_rejected, 0, "Number of rejected profiling records"); +SYSCTL_PROC(_debug_lock_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD, + NULL, 0, dump_lock_prof_stats, "A", "Lock profiling statistics"); +SYSCTL_PROC(_debug_lock_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW, + NULL, 0, reset_lock_prof_stats, "I", "Reset lock profiling statistics"); +SYSCTL_PROC(_debug_lock_prof, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW, + NULL, 0, enable_lock_prof, "I", "Enable lock profiling"); + #endif diff --git a/sys/sys/_lock.h b/sys/sys/_lock.h index cc1ea0a..dd0b071 100644 --- a/sys/sys/_lock.h +++ b/sys/sys/_lock.h @@ -31,35 +31,10 @@ #ifndef _SYS__LOCK_H_ #define _SYS__LOCK_H_ -struct lock_profile_object { - /* - * This does not result in variant structure sizes because - * MUTEX_PROFILING is in opt_global.h - */ - u_int64_t lpo_acqtime; - u_int64_t lpo_waittime; - const char *lpo_filename; - u_int lpo_namehash; - int lpo_lineno; - const char *lpo_type; - /* - * Fields relating to measuring contention on mutexes. - * holding must be accessed atomically since it's - * modified by threads that don't yet hold the mutex. - * locking is only modified and referenced while - * the mutex is held. - */ - u_int lpo_contest_holding; - u_int lpo_contest_locking; -}; - struct lock_object { const char *lo_name; /* Individual lock name. */ const char *lo_type; /* General lock type. */ u_int lo_flags; -#ifdef LOCK_PROFILING - struct lock_profile_object lo_profile_obj; -#endif union { /* Data for witness. */ STAILQ_ENTRY(lock_object) lod_list; struct witness *lod_witness; diff --git a/sys/sys/lock_profile.h b/sys/sys/lock_profile.h index f2861ac..f1e2659 100644 --- a/sys/sys/lock_profile.h +++ b/sys/sys/lock_profile.h @@ -31,139 +31,54 @@ #ifndef _SYS_LOCK_PROFILE_H_ #define _SYS_LOCK_PROFILE_H_ +#ifdef _KERNEL + +struct lock_profile_object; +LIST_HEAD(lpohead, lock_profile_object); + #ifdef LOCK_PROFILING -#include -#include -#include -#include -#include - -#ifndef LPROF_HASH_SIZE -#define LPROF_HASH_SIZE 4096 -#define LPROF_HASH_MASK (LPROF_HASH_SIZE - 1) -#endif +#include #ifndef USE_CPU_NANOSECONDS u_int64_t nanoseconds(void); #endif -struct lock_prof { - const char *name; - const char *type; - const char *file; - u_int namehash; - int line; - uintmax_t cnt_max; - uintmax_t cnt_tot; - uintmax_t cnt_wait; - uintmax_t cnt_cur; - uintmax_t cnt_contest_holding; - uintmax_t cnt_contest_locking; -}; - -extern struct lock_prof lprof_buf[LPROF_HASH_SIZE]; -#define LPROF_SBUF_SIZE 256 * 400 - -/* We keep a smaller pool of spin mutexes for protecting the lprof hash entries */ -#define LPROF_LOCK_SIZE 16 -#define LPROF_LOCK_MASK (LPROF_LOCK_SIZE - 1) -#define LPROF_LHASH(hash) ((hash) & LPROF_LOCK_MASK) - -#define LPROF_LOCK(hash) mtx_lock_spin(&lprof_locks[LPROF_LHASH(hash)]) -#define LPROF_UNLOCK(hash) mtx_unlock_spin(&lprof_locks[LPROF_LHASH(hash)]) - -#ifdef _KERNEL -extern struct mtx lprof_locks[LPROF_LOCK_SIZE]; extern int lock_prof_enable; -void _lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line); -void _lock_profile_update_wait(struct lock_object *lo, uint64_t waitstart); -void _lock_profile_release_lock(struct lock_object *lo); - -static inline void lock_profile_object_init(struct lock_object *lo, struct lock_class *class, const char *name) { - const char *p; - u_int hash = 0; - struct lock_profile_object *l = &lo->lo_profile_obj; - - l->lpo_acqtime = 0; - l->lpo_waittime = 0; - l->lpo_filename = NULL; - l->lpo_lineno = 0; - l->lpo_contest_holding = 0; - l->lpo_contest_locking = 0; - l->lpo_type = class->lc_name; - - /* Hash the mutex name to an int so we don't have to strcmp() it repeatedly */ - for (p = name; *p != '\0'; p++) - hash = 31 * hash + *p; - l->lpo_namehash = hash; -#if 0 - if (opts & MTX_PROFILE) - l->lpo_stack = stack_create(); -#endif -} - +void lock_profile_obtain_lock_success(struct lock_object *lo, int contested, + uint64_t waittime, const char *file, int line); +void lock_profile_release_lock(struct lock_object *lo); -static inline void -lock_profile_object_destroy(struct lock_object *lo) +static inline void +lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested, + uint64_t *waittime) { -#if 0 - struct lock_profile_object *l = &lo->lo_profile_obj; - if (lo->lo_flags & LO_PROFILE) - stack_destroy(l->lpo_stack); -#endif + if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE) || *contested) + return; + *waittime = nanoseconds(); + *contested = 1; } -static inline void lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested, - uint64_t *waittime) +#else /* !LOCK_PROFILING */ + +static inline void +lock_profile_release_lock(struct lock_object *lo) { - struct lock_profile_object *l = &lo->lo_profile_obj; - - if (!(lo->lo_flags & LO_NOPROFILE) && lock_prof_enable && - *contested == 0) { - *waittime = nanoseconds(); - atomic_add_int(&l->lpo_contest_holding, 1); - *contested = 1; - } } -static inline void lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line) +static inline void +lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested, uint64_t *waittime) { - - /* don't reset the timer when/if recursing */ - if (!(lo->lo_flags & LO_NOPROFILE) && lock_prof_enable && - lo->lo_profile_obj.lpo_acqtime == 0) { -#ifdef LOCK_PROFILING_FAST - if (contested == 0) - return; -#endif - _lock_profile_obtain_lock_success(lo, contested, waittime, file, line); - } } -static inline void lock_profile_release_lock(struct lock_object *lo) -{ - struct lock_profile_object *l = &lo->lo_profile_obj; - if (!(lo->lo_flags & LO_NOPROFILE) && l->lpo_acqtime) - _lock_profile_release_lock(lo); +static inline void +lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, + const char *file, int line) +{ } -#endif /* _KERNEL */ - -#else /* !LOCK_PROFILING */ - -#ifdef _KERNEL -static inline void lock_profile_update_wait(struct lock_object *lo, uint64_t waitstart) {;} -static inline void lock_profile_update_contest_locking(struct lock_object *lo, int contested) {;} -static inline void lock_profile_release_lock(struct lock_object *lo) {;} -static inline void lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested, uint64_t *waittime) {;} -static inline void lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, - const char *file, int line) {;} -static inline void lock_profile_object_destroy(struct lock_object *lo) {;} -static inline void lock_profile_object_init(struct lock_object *lo, struct lock_class *class, const char *name) {;} +#endif /* !LOCK_PROFILING */ #endif /* _KERNEL */ -#endif /* !LOCK_PROFILING */ - #endif /* _SYS_LOCK_PROFILE_H_ */ diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 7d0aca4..7ebb8c2 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -45,6 +45,7 @@ #endif #include #include +#include #include #include #include /* XXX. */ @@ -298,6 +299,7 @@ struct thread { struct td_sched *td_sched; /* (*) Scheduler-specific data. */ struct kaudit_record *td_ar; /* (k) Active audit record, if any. */ int td_syscalls; /* per-thread syscall count (used by NFS :)) */ + struct lpohead td_lprof[2]; /* (a) lock profiling objects. */ }; struct mtx *thread_lock_block(struct thread *); diff --git a/sys/sys/sx.h b/sys/sys/sx.h index 47fdae6..5df5f36 100644 --- a/sys/sys/sx.h +++ b/sys/sys/sx.h @@ -178,11 +178,9 @@ __sx_slock(struct sx *sx, int opts, const char *file, int line) if (!(x & SX_LOCK_SHARED) || !atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) error = _sx_slock_hard(sx, opts, file, line); -#ifdef LOCK_PROFILING_SHARED - else if (SX_SHARERS(x) == 0) + else lock_profile_obtain_lock_success(&sx->lock_object, 0, 0, file, line); -#endif return (error); } -- cgit v1.1