summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjeff <jeff@FreeBSD.org>2007-12-15 23:13:31 +0000
committerjeff <jeff@FreeBSD.org>2007-12-15 23:13:31 +0000
commit12adc443d67286deeee69e764d979c963403497d (patch)
tree5aa1ecb0fadd118191701a2b1c611fcee7216753
parent96bf4f52953dddc12e3490919ba932dcfb5bc76d (diff)
downloadFreeBSD-src-12adc443d67286deeee69e764d979c963403497d.zip
FreeBSD-src-12adc443d67286deeee69e764d979c963403497d.tar.gz
- Re-implement lock profiling in such a way that it no longer breaks
the ABI when enabled. There is no longer an embedded lock_profile_object in each lock. Instead a list of lock_profile_objects is kept per-thread for each lock it may own. The cnt_hold statistic is now always 0 to facilitate this. - Support shared locking by tracking individual lock instances and statistics in the per-thread per-instance lock_profile_object. - Make the lock profiling hash table a per-cpu singly linked list with a per-cpu static lock_prof allocator. This removes the need for an array of spinlocks and reduces cache contention between cores. - Use a seperate hash for spinlocks and other locks so that only a critical_enter() is required and not a spinlock_enter() to modify the per-cpu tables. - Count time spent spinning in the lock statistics. - Remove the LOCK_PROFILE_SHARED option as it is always supported now. - Specifically drop and release the scheduler locks in both schedulers since we track owners now. In collaboration with: Kip Macy Sponsored by: Nokia
-rw-r--r--sys/conf/options1
-rw-r--r--sys/kern/kern_mutex.c26
-rw-r--r--sys/kern/kern_rwlock.c31
-rw-r--r--sys/kern/kern_sx.c34
-rw-r--r--sys/kern/kern_thread.c2
-rw-r--r--sys/kern/sched_4bsd.c7
-rw-r--r--sys/kern/sched_ule.c6
-rw-r--r--sys/kern/subr_lock.c656
-rw-r--r--sys/sys/_lock.h25
-rw-r--r--sys/sys/lock_profile.h139
-rw-r--r--sys/sys/proc.h2
-rw-r--r--sys/sys/sx.h4
12 files changed, 500 insertions, 433 deletions
diff --git a/sys/conf/options b/sys/conf/options
index 6a00e35..e76c61e 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -540,7 +540,6 @@ MUTEX_DEBUG opt_global.h
MUTEX_NOINLINE opt_global.h
LOCK_PROFILING opt_global.h
LOCK_PROFILING_FAST opt_global.h
-LOCK_PROFILING_SHARED opt_global.h
MSIZE opt_global.h
REGRESSION opt_global.h
RESTARTABLE_PANICS opt_global.h
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index 727871d..81be32d 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -123,20 +123,6 @@ struct lock_class lock_class_mtx_spin = {
struct mtx blocked_lock;
struct mtx Giant;
-#ifdef LOCK_PROFILING
-static inline void lock_profile_init(void)
-{
- int i;
- /* Initialize the mutex profiling locks */
- for (i = 0; i < LPROF_LOCK_SIZE; i++) {
- mtx_init(&lprof_locks[i], "mprof lock",
- NULL, MTX_SPIN|MTX_QUIET|MTX_NOPROFILE);
- }
-}
-#else
-static inline void lock_profile_init(void) {;}
-#endif
-
void
assert_mtx(struct lock_object *lock, int what)
{
@@ -425,7 +411,7 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file,
}
#endif
lock_profile_obtain_lock_success(&m->lock_object, contested,
- waittime, (file), (line));
+ waittime, file, line);
}
static void
@@ -514,7 +500,8 @@ retry:
m->mtx_recurse++;
break;
}
- lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
+ lock_profile_obtain_lock_failed(&m->lock_object,
+ &contested, &waittime);
/* Give interrupts a chance while we spin. */
spinlock_exit();
while (m->mtx_lock != MTX_UNOWNED) {
@@ -535,8 +522,9 @@ retry:
break;
_rel_spin_lock(m); /* does spinlock_exit() */
}
- lock_profile_obtain_lock_success(&m->lock_object, contested,
- waittime, (file), (line));
+ if (m->mtx_recurse == 0)
+ lock_profile_obtain_lock_success(&m->lock_object, contested,
+ waittime, (file), (line));
WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
}
@@ -794,8 +782,6 @@ mutex_init(void)
mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN | MTX_RECURSE);
mtx_init(&devmtx, "cdev", NULL, MTX_DEF);
mtx_lock(&Giant);
-
- lock_profile_init();
}
#ifdef DDB
diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c
index 4b8b4ca..43c1d3c 100644
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
@@ -227,10 +227,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
#ifdef ADAPTIVE_RWLOCKS
volatile struct thread *owner;
#endif
-#ifdef LOCK_PROFILING_SHARED
uint64_t waittime = 0;
int contested = 0;
-#endif
uintptr_t x;
KASSERT(rw->rw_lock != RW_DESTROYED,
@@ -273,12 +271,6 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
MPASS((x & RW_LOCK_READ_WAITERS) == 0);
if (atomic_cmpset_acq_ptr(&rw->rw_lock, x,
x + RW_ONE_READER)) {
-#ifdef LOCK_PROFILING_SHARED
- if (RW_READERS(x) == 0)
- lock_profile_obtain_lock_success(
- &rw->lock_object, contested,
- waittime, file, line);
-#endif
if (LOCK_LOG_TEST(&rw->lock_object, 0))
CTR4(KTR_LOCK,
"%s: %p succeed %p -> %p", __func__,
@@ -289,6 +281,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
cpu_spinwait();
continue;
}
+ lock_profile_obtain_lock_failed(&rw->lock_object,
+ &contested, &waittime);
#ifdef ADAPTIVE_RWLOCKS
/*
@@ -301,10 +295,6 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
if (LOCK_LOG_TEST(&rw->lock_object, 0))
CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
__func__, rw, owner);
-#ifdef LOCK_PROFILING_SHARED
- lock_profile_obtain_lock_failed(&rw->lock_object,
- &contested, &waittime);
-#endif
while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
TD_IS_RUNNING(owner))
cpu_spinwait();
@@ -369,10 +359,6 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
if (LOCK_LOG_TEST(&rw->lock_object, 0))
CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
rw);
-#ifdef LOCK_PROFILING_SHARED
- lock_profile_obtain_lock_failed(&rw->lock_object, &contested,
- &waittime);
-#endif
turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
if (LOCK_LOG_TEST(&rw->lock_object, 0))
CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
@@ -384,7 +370,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
* however. turnstiles don't like owners changing between calls to
* turnstile_wait() currently.
*/
-
+ lock_profile_obtain_lock_success( &rw->lock_object, contested,
+ waittime, file, line);
LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
WITNESS_LOCK(&rw->lock_object, 0, file, line);
curthread->td_locks++;
@@ -431,9 +418,6 @@ _rw_runlock(struct rwlock *rw, const char *file, int line)
*/
KASSERT(!(x & RW_LOCK_READ_WAITERS),
("%s: waiting readers", __func__));
-#ifdef LOCK_PROFILING_SHARED
- lock_profile_release_lock(&rw->lock_object);
-#endif
/*
* If there aren't any waiters for a write lock, then try
@@ -510,6 +494,7 @@ _rw_runlock(struct rwlock *rw, const char *file, int line)
turnstile_chain_unlock(&rw->lock_object);
break;
}
+ lock_profile_release_lock(&rw->lock_object);
}
/*
@@ -544,6 +529,8 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
while (!_rw_write_lock(rw, tid)) {
+ lock_profile_obtain_lock_failed(&rw->lock_object,
+ &contested, &waittime);
#ifdef ADAPTIVE_RWLOCKS
/*
* If the lock is write locked and the owner is
@@ -556,8 +543,6 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
if (LOCK_LOG_TEST(&rw->lock_object, 0))
CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
__func__, rw, owner);
- lock_profile_obtain_lock_failed(&rw->lock_object,
- &contested, &waittime);
while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
TD_IS_RUNNING(owner))
cpu_spinwait();
@@ -641,8 +626,6 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
if (LOCK_LOG_TEST(&rw->lock_object, 0))
CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
rw);
- lock_profile_obtain_lock_failed(&rw->lock_object, &contested,
- &waittime);
turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
if (LOCK_LOG_TEST(&rw->lock_object, 0))
CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 1e3f135..bc172e5 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -302,11 +302,8 @@ _sx_sunlock(struct sx *sx, const char *file, int line)
curthread->td_locks--;
WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
-#ifdef LOCK_PROFILING_SHARED
- if (SX_SHARERS(sx->sx_lock) == 1)
- lock_profile_release_lock(&sx->lock_object);
-#endif
__sx_sunlock(sx, file, line);
+ lock_profile_release_lock(&sx->lock_object);
}
void
@@ -450,6 +447,8 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
+ lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
+ &waittime);
#ifdef ADAPTIVE_SX
/*
* If the lock is write locked and the owner is
@@ -467,8 +466,6 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
"%s: spinning on %p held by %p",
__func__, sx, owner);
GIANT_SAVE();
- lock_profile_obtain_lock_failed(
- &sx->lock_object, &contested, &waittime);
while (SX_OWNER(sx->sx_lock) == x &&
TD_IS_RUNNING(owner))
cpu_spinwait();
@@ -555,8 +552,6 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
__func__, sx);
GIANT_SAVE();
- lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
- &waittime);
sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
@@ -648,10 +643,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
#ifdef ADAPTIVE_SX
volatile struct thread *owner;
#endif
-#ifdef LOCK_PROFILING_SHARED
uint64_t waittime = 0;
int contested = 0;
-#endif
uintptr_t x;
int error = 0;
@@ -672,12 +665,6 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
MPASS(!(x & SX_LOCK_SHARED_WAITERS));
if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
x + SX_ONE_SHARER)) {
-#ifdef LOCK_PROFILING_SHARED
- if (SX_SHARERS(x) == 0)
- lock_profile_obtain_lock_success(
- &sx->lock_object, contested,
- waittime, file, line);
-#endif
if (LOCK_LOG_TEST(&sx->lock_object, 0))
CTR4(KTR_LOCK,
"%s: %p succeed %p -> %p", __func__,
@@ -687,6 +674,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
}
continue;
}
+ lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
+ &waittime);
#ifdef ADAPTIVE_SX
/*
@@ -694,7 +683,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
* the owner stops running or the state of the lock
* changes.
*/
- else if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) {
+ if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) {
x = SX_OWNER(x);
owner = (struct thread *)x;
if (TD_IS_RUNNING(owner)) {
@@ -703,10 +692,6 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
"%s: spinning on %p held by %p",
__func__, sx, owner);
GIANT_SAVE();
-#ifdef LOCK_PROFILING_SHARED
- lock_profile_obtain_lock_failed(
- &sx->lock_object, &contested, &waittime);
-#endif
while (SX_OWNER(sx->sx_lock) == x &&
TD_IS_RUNNING(owner))
cpu_spinwait();
@@ -772,10 +757,6 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
__func__, sx);
GIANT_SAVE();
-#ifdef LOCK_PROFILING_SHARED
- lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
- &waittime);
-#endif
sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
@@ -795,6 +776,9 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
__func__, sx);
}
+ if (error == 0)
+ lock_profile_obtain_lock_success(&sx->lock_object, contested,
+ waittime, file, line);
GIANT_RESTORE();
return (error);
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index e176b87..93ff5a7 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -555,6 +555,8 @@ thread_link(struct thread *td, struct proc *p)
td->td_flags = TDF_INMEM;
LIST_INIT(&td->td_contested);
+ LIST_INIT(&td->td_lprof[0]);
+ LIST_INIT(&td->td_lprof[1]);
sigqueue_init(&td->td_sigqueue, p);
callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index f6e702e..e1e5c91 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -878,9 +878,11 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
#endif
-
/* I feel sleepy */
+ lock_profile_release_lock(&sched_lock.lock_object);
cpu_switch(td, newtd, td->td_lock);
+ lock_profile_obtain_lock_success(&sched_lock.lock_object,
+ 0, 0, __FILE__, __LINE__);
/*
* Where am I? What year is it?
* We are in the same thread that went to sleep above,
@@ -1375,6 +1377,7 @@ sched_throw(struct thread *td)
mtx_lock_spin(&sched_lock);
spinlock_exit();
} else {
+ lock_profile_release_lock(&sched_lock.lock_object);
MPASS(td->td_lock == &sched_lock);
}
mtx_assert(&sched_lock, MA_OWNED);
@@ -1394,6 +1397,8 @@ sched_fork_exit(struct thread *td)
*/
td->td_oncpu = PCPU_GET(cpuid);
sched_lock.mtx_lock = (uintptr_t)td;
+ lock_profile_obtain_lock_success(&sched_lock.lock_object,
+ 0, 0, __FILE__, __LINE__);
THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
}
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index e9d9468..6841bab 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -1894,6 +1894,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
#endif
+ lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);
TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd;
cpu_switch(td, newtd, mtx);
/*
@@ -1903,6 +1904,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
*/
cpuid = PCPU_GET(cpuid);
tdq = TDQ_CPU(cpuid);
+ lock_profile_obtain_lock_success(
+ &TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
@@ -2618,6 +2621,7 @@ sched_throw(struct thread *td)
} else {
MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
tdq_load_rem(tdq, td->td_sched);
+ lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);
}
KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
newtd = choosethread();
@@ -2650,6 +2654,8 @@ sched_fork_exit(struct thread *td)
MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
td->td_oncpu = cpuid;
TDQ_LOCK_ASSERT(tdq, MA_OWNED | MA_NOTRECURSED);
+ lock_profile_obtain_lock_success(
+ &TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);
}
static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0,
diff --git a/sys/kern/subr_lock.c b/sys/kern/subr_lock.c
index ebb3c35..19f3639 100644
--- a/sys/kern/subr_lock.c
+++ b/sys/kern/subr_lock.c
@@ -40,17 +40,24 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/linker_set.h>
#include <sys/lock.h>
+#include <sys/lock_profile.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
#include <sys/sbuf.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
-#include <sys/lock_profile.h>
#ifdef DDB
#include <ddb/ddb.h>
#endif
+#include <machine/cpufunc.h>
+
CTASSERT(LOCK_CLASS_MAX == 15);
struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = {
@@ -62,136 +69,6 @@ struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = {
&lock_class_lockmgr,
};
-#ifdef LOCK_PROFILING
-#include <machine/cpufunc.h>
-
-SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging");
-SYSCTL_NODE(_debug_lock, OID_AUTO, prof, CTLFLAG_RD, NULL, "lock profiling");
-int lock_prof_enable = 0;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, enable, CTLFLAG_RW,
- &lock_prof_enable, 0, "Enable lock profiling");
-
-/*
- * lprof_buf is a static pool of profiling records to avoid possible
- * reentrance of the memory allocation functions.
- *
- * Note: NUM_LPROF_BUFFERS must be smaller than LPROF_HASH_SIZE.
- */
-struct lock_prof lprof_buf[LPROF_HASH_SIZE];
-static int allocated_lprof_buf;
-struct mtx lprof_locks[LPROF_LOCK_SIZE];
-
-
-/* SWAG: sbuf size = avg stat. line size * number of locks */
-#define LPROF_SBUF_SIZE 256 * 400
-
-static int lock_prof_acquisitions;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
- &lock_prof_acquisitions, 0, "Number of lock acquistions recorded");
-static int lock_prof_records;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, records, CTLFLAG_RD,
- &lock_prof_records, 0, "Number of profiling records");
-static int lock_prof_maxrecords = LPROF_HASH_SIZE;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
- &lock_prof_maxrecords, 0, "Maximum number of profiling records");
-static int lock_prof_rejected;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, rejected, CTLFLAG_RD,
- &lock_prof_rejected, 0, "Number of rejected profiling records");
-static int lock_prof_hashsize = LPROF_HASH_SIZE;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, hashsize, CTLFLAG_RD,
- &lock_prof_hashsize, 0, "Hash size");
-static int lock_prof_collisions = 0;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, collisions, CTLFLAG_RD,
- &lock_prof_collisions, 0, "Number of hash collisions");
-
-#ifndef USE_CPU_NANOSECONDS
-u_int64_t
-nanoseconds(void)
-{
- struct timespec tv;
-
- nanotime(&tv);
- return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
-}
-#endif
-
-static int
-dump_lock_prof_stats(SYSCTL_HANDLER_ARGS)
-{
- struct sbuf *sb;
- int error, i;
- static int multiplier = 1;
- const char *p;
-
- if (allocated_lprof_buf == 0)
- return (SYSCTL_OUT(req, "No locking recorded",
- sizeof("No locking recorded")));
-
-retry_sbufops:
- sb = sbuf_new(NULL, NULL, LPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN);
- sbuf_printf(sb, "\n%6s %12s %12s %11s %5s %5s %12s %12s %s\n",
- "max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name");
- for (i = 0; i < LPROF_HASH_SIZE; ++i) {
- if (lprof_buf[i].name == NULL)
- continue;
- for (p = lprof_buf[i].file;
- p != NULL && strncmp(p, "../", 3) == 0; p += 3)
- /* nothing */ ;
- sbuf_printf(sb, "%6ju %12ju %12ju %11ju %5ju %5ju %12ju %12ju %s:%d (%s:%s)\n",
- lprof_buf[i].cnt_max / 1000,
- lprof_buf[i].cnt_tot / 1000,
- lprof_buf[i].cnt_wait / 1000,
- lprof_buf[i].cnt_cur,
- lprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 :
- lprof_buf[i].cnt_tot / (lprof_buf[i].cnt_cur * 1000),
- lprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 :
- lprof_buf[i].cnt_wait / (lprof_buf[i].cnt_cur * 1000),
- lprof_buf[i].cnt_contest_holding,
- lprof_buf[i].cnt_contest_locking,
- p, lprof_buf[i].line,
- lprof_buf[i].type,
- lprof_buf[i].name);
- if (sbuf_overflowed(sb)) {
- sbuf_delete(sb);
- multiplier++;
- goto retry_sbufops;
- }
- }
-
- sbuf_finish(sb);
- error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
- sbuf_delete(sb);
- return (error);
-}
-static int
-reset_lock_prof_stats(SYSCTL_HANDLER_ARGS)
-{
- int error, v;
-
- if (allocated_lprof_buf == 0)
- return (0);
-
- v = 0;
- error = sysctl_handle_int(oidp, &v, 0, req);
- if (error)
- return (error);
- if (req->newptr == NULL)
- return (error);
- if (v == 0)
- return (0);
-
- bzero(lprof_buf, LPROF_HASH_SIZE*sizeof(*lprof_buf));
- allocated_lprof_buf = 0;
- return (0);
-}
-
-SYSCTL_PROC(_debug_lock_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
- NULL, 0, dump_lock_prof_stats, "A", "Lock profiling statistics");
-
-SYSCTL_PROC(_debug_lock_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
- NULL, 0, reset_lock_prof_stats, "I", "Reset lock profiling statistics");
-#endif
-
void
lock_init(struct lock_object *lock, struct lock_class *class, const char *name,
const char *type, int flags)
@@ -216,7 +93,6 @@ lock_init(struct lock_object *lock, struct lock_class *class, const char *name,
lock->lo_flags |= flags | LO_INITIALIZED;
LOCK_LOG_INIT(lock, 0);
WITNESS_INIT(lock);
- lock_profile_object_init(lock, class, name);
}
void
@@ -224,7 +100,6 @@ lock_destroy(struct lock_object *lock)
{
KASSERT(lock_initalized(lock), ("lock %p is not initialized", lock));
- lock_profile_object_destroy(lock);
WITNESS_DESTROY(lock);
LOCK_LOG_DESTROY(lock, 0);
lock->lo_flags &= ~LO_INITIALIZED;
@@ -253,17 +128,376 @@ DB_SHOW_COMMAND(lock, db_show_lock)
#endif
#ifdef LOCK_PROFILING
-void _lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line)
+
+/*
+ * One object per-thread for each lock the thread owns. Tracks individual
+ * lock instances.
+ */
+struct lock_profile_object {
+ LIST_ENTRY(lock_profile_object) lpo_link;
+ struct lock_object *lpo_obj;
+ const char *lpo_file;
+ int lpo_line;
+ uint16_t lpo_ref;
+ uint16_t lpo_cnt;
+ u_int64_t lpo_acqtime;
+ u_int64_t lpo_waittime;
+ u_int lpo_contest_locking;
+};
+
+/*
+ * One lock_prof for each (file, line, lock object) triple.
+ */
+struct lock_prof {
+ SLIST_ENTRY(lock_prof) link;
+ const char *file;
+ const char *name;
+ int line;
+ int ticks;
+ const char *type;
+ uintmax_t cnt_max;
+ uintmax_t cnt_tot;
+ uintmax_t cnt_wait;
+ uintmax_t cnt_cur;
+ uintmax_t cnt_contest_locking;
+};
+
+SLIST_HEAD(lphead, lock_prof);
+
+#define LPROF_HASH_SIZE 4096
+#define LPROF_HASH_MASK (LPROF_HASH_SIZE - 1)
+#define LPROF_CACHE_SIZE 4096
+
+/*
+ * Array of objects and profs for each type of object for each cpu. Spinlocks
+ * are handled seperately because a thread may be preempted and acquire a
+ * spinlock while in the lock profiling code of a non-spinlock. In this way
+ * we only need a critical section to protect the per-cpu lists.
+ */
+struct lock_prof_type {
+ struct lphead lpt_lpalloc;
+ struct lpohead lpt_lpoalloc;
+ struct lphead lpt_hash[LPROF_HASH_SIZE];
+ struct lock_prof lpt_prof[LPROF_CACHE_SIZE];
+ struct lock_profile_object lpt_objs[LPROF_CACHE_SIZE];
+};
+
+struct lock_prof_cpu {
+ struct lock_prof_type lpc_types[2]; /* One for spin one for other. */
+};
+
+struct lock_prof_cpu *lp_cpu[MAXCPU];
+
+int lock_prof_enable = 0;
+
+/* SWAG: sbuf size = avg stat. line size * number of locks */
+#define LPROF_SBUF_SIZE 256 * 400
+
+static int lock_prof_rejected;
+static int lock_prof_skipspin;
+static int lock_prof_skipcount;
+
+#ifndef USE_CPU_NANOSECONDS
+u_int64_t
+nanoseconds(void)
{
- struct lock_profile_object *l = &lo->lo_profile_obj;
+ struct bintime bt;
+ u_int64_t ns;
+
+ binuptime(&bt);
+ /* From bintime2timespec */
+ ns = bt.sec * (u_int64_t)1000000000;
+ ns += ((uint64_t)1000000000 * (uint32_t)(bt.frac >> 32)) >> 32;
+ return (ns);
+}
+#endif
- lo->lo_profile_obj.lpo_contest_holding = 0;
-
- if (contested)
- lo->lo_profile_obj.lpo_contest_locking++;
+static void
+lock_prof_init_type(struct lock_prof_type *type)
+{
+ int i;
+
+ SLIST_INIT(&type->lpt_lpalloc);
+ LIST_INIT(&type->lpt_lpoalloc);
+ for (i = 0; i < LPROF_CACHE_SIZE; i++) {
+ SLIST_INSERT_HEAD(&type->lpt_lpalloc, &type->lpt_prof[i],
+ link);
+ LIST_INSERT_HEAD(&type->lpt_lpoalloc, &type->lpt_objs[i],
+ lpo_link);
+ }
+}
+
+static void
+lock_prof_init(void *arg)
+{
+ int cpu;
+
+ for (cpu = 0; cpu <= mp_maxid; cpu++) {
+ lp_cpu[cpu] = malloc(sizeof(*lp_cpu[cpu]), M_DEVBUF,
+ M_WAITOK | M_ZERO);
+ lock_prof_init_type(&lp_cpu[cpu]->lpc_types[0]);
+ lock_prof_init_type(&lp_cpu[cpu]->lpc_types[1]);
+ }
+}
+SYSINIT(lockprof, SI_SUB_SMP, SI_ORDER_ANY, lock_prof_init, NULL);
+
+static void
+lock_prof_reset(void)
+{
+ struct lock_prof_cpu *lpc;
+ int enabled, i, cpu;
+
+ enabled = lock_prof_enable;
+ lock_prof_enable = 0;
+ for (cpu = 0; cpu <= mp_maxid; cpu++) {
+ lpc = lp_cpu[cpu];
+ for (i = 0; i < LPROF_CACHE_SIZE; i++) {
+ LIST_REMOVE(&lpc->lpc_types[0].lpt_objs[i], lpo_link);
+ LIST_REMOVE(&lpc->lpc_types[1].lpt_objs[i], lpo_link);
+ }
+ bzero(lpc, sizeof(*lpc));
+ lock_prof_init_type(&lpc->lpc_types[0]);
+ lock_prof_init_type(&lpc->lpc_types[1]);
+ }
+ lock_prof_enable = enabled;
+}
+
+static void
+lock_prof_output(struct lock_prof *lp, struct sbuf *sb)
+{
+ const char *p;
+
+ for (p = lp->file; p != NULL && strncmp(p, "../", 3) == 0; p += 3);
+ sbuf_printf(sb,
+ "%6ju %12ju %12ju %11ju %5ju %5ju %12ju %12ju %s:%d (%s:%s)\n",
+ lp->cnt_max / 1000, lp->cnt_tot / 1000,
+ lp->cnt_wait / 1000, lp->cnt_cur,
+ lp->cnt_cur == 0 ? (uintmax_t)0 :
+ lp->cnt_tot / (lp->cnt_cur * 1000),
+ lp->cnt_cur == 0 ? (uintmax_t)0 :
+ lp->cnt_wait / (lp->cnt_cur * 1000),
+ (uintmax_t)0, lp->cnt_contest_locking,
+ p, lp->line, lp->type, lp->name);
+}
+
+static void
+lock_prof_sum(struct lock_prof *match, struct lock_prof *dst, int hash,
+ int spin, int t)
+{
+ struct lock_prof_type *type;
+ struct lock_prof *l;
+ int cpu;
+
+ dst->file = match->file;
+ dst->line = match->line;
+ dst->type = match->type;
+ dst->name = match->name;
+
+ for (cpu = 0; cpu <= mp_maxid; cpu++) {
+ if (lp_cpu[cpu] == NULL)
+ continue;
+ type = &lp_cpu[cpu]->lpc_types[spin];
+ SLIST_FOREACH(l, &type->lpt_hash[hash], link) {
+ if (l->ticks == t)
+ continue;
+ if (l->file != match->file || l->line != match->line ||
+ l->name != match->name || l->type != match->type)
+ continue;
+ l->ticks = t;
+ if (l->cnt_max > dst->cnt_max)
+ dst->cnt_max = l->cnt_max;
+ dst->cnt_tot += l->cnt_tot;
+ dst->cnt_wait += l->cnt_wait;
+ dst->cnt_cur += l->cnt_cur;
+ dst->cnt_contest_locking += l->cnt_contest_locking;
+ }
+ }
- l->lpo_filename = file;
- l->lpo_lineno = line;
+}
+
+static void
+lock_prof_type_stats(struct lock_prof_type *type, struct sbuf *sb, int spin,
+ int t)
+{
+ struct lock_prof *l;
+ int i;
+
+ for (i = 0; i < LPROF_HASH_SIZE; ++i) {
+ SLIST_FOREACH(l, &type->lpt_hash[i], link) {
+ struct lock_prof lp = {};
+
+ if (l->ticks == t)
+ continue;
+ lock_prof_sum(l, &lp, i, spin, t);
+ lock_prof_output(&lp, sb);
+ if (sbuf_overflowed(sb))
+ return;
+ }
+ }
+}
+
+static int
+dump_lock_prof_stats(SYSCTL_HANDLER_ARGS)
+{
+ static int multiplier = 1;
+ struct sbuf *sb;
+ int error, cpu, t;
+
+retry_sbufops:
+ sb = sbuf_new(NULL, NULL, LPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN);
+ sbuf_printf(sb, "\n%6s %12s %12s %11s %5s %5s %12s %12s %s\n",
+ "max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name");
+ t = ticks;
+ for (cpu = 0; cpu <= mp_maxid; cpu++) {
+ if (lp_cpu[cpu] == NULL)
+ continue;
+ lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[0], sb, 0, t);
+ lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[1], sb, 1, t);
+ if (sbuf_overflowed(sb)) {
+ sbuf_delete(sb);
+ multiplier++;
+ goto retry_sbufops;
+ }
+ }
+
+ sbuf_finish(sb);
+ error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
+ sbuf_delete(sb);
+ return (error);
+}
+
+static int
+enable_lock_prof(SYSCTL_HANDLER_ARGS)
+{
+ int error, v;
+
+ v = lock_prof_enable;
+ error = sysctl_handle_int(oidp, &v, v, req);
+ if (error)
+ return (error);
+ if (req->newptr == NULL)
+ return (error);
+ if (v == lock_prof_enable)
+ return (0);
+ if (v == 1)
+ lock_prof_reset();
+ lock_prof_enable = !!v;
+
+ return (0);
+}
+
+static int
+reset_lock_prof_stats(SYSCTL_HANDLER_ARGS)
+{
+ int error, v;
+
+ v = 0;
+ error = sysctl_handle_int(oidp, &v, 0, req);
+ if (error)
+ return (error);
+ if (req->newptr == NULL)
+ return (error);
+ if (v == 0)
+ return (0);
+ lock_prof_reset();
+
+ return (0);
+}
+
+static struct lock_prof *
+lock_profile_lookup(struct lock_object *lo, int spin, const char *file,
+ int line)
+{
+ const char *unknown = "(unknown)";
+ struct lock_prof_type *type;
+ struct lock_prof *lp;
+ struct lphead *head;
+ const char *p;
+ u_int hash;
+
+ p = file;
+ if (p == NULL || *p == '\0')
+ p = unknown;
+ hash = (uintptr_t)lo->lo_name * 31 + (uintptr_t)p * 31 + line;
+ hash &= LPROF_HASH_MASK;
+ type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
+ head = &type->lpt_hash[hash];
+ SLIST_FOREACH(lp, head, link) {
+ if (lp->line == line && lp->file == p &&
+ lp->name == lo->lo_name)
+ return (lp);
+
+ }
+ lp = SLIST_FIRST(&type->lpt_lpalloc);
+ if (lp == NULL) {
+ lock_prof_rejected++;
+ return (lp);
+ }
+ SLIST_REMOVE_HEAD(&type->lpt_lpalloc, link);
+ lp->file = p;
+ lp->line = line;
+ lp->type = lo->lo_type;
+ lp->name = lo->lo_name;
+ SLIST_INSERT_HEAD(&type->lpt_hash[hash], lp, link);
+ return (lp);
+}
+
+static struct lock_profile_object *
+lock_profile_object_lookup(struct lock_object *lo, int spin, const char *file,
+ int line)
+{
+ struct lock_profile_object *l;
+ struct lock_prof_type *type;
+ struct lpohead *head;
+
+ head = &curthread->td_lprof[spin];
+ LIST_FOREACH(l, head, lpo_link)
+ if (l->lpo_obj == lo && l->lpo_file == file &&
+ l->lpo_line == line)
+ return (l);
+ critical_enter();
+ type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
+ l = LIST_FIRST(&type->lpt_lpoalloc);
+ if (l == NULL) {
+ lock_prof_rejected++;
+ critical_exit();
+ return (NULL);
+ }
+ LIST_REMOVE(l, lpo_link);
+ critical_exit();
+ l->lpo_obj = lo;
+ l->lpo_file = file;
+ l->lpo_line = line;
+ l->lpo_cnt = 0;
+ LIST_INSERT_HEAD(head, l, lpo_link);
+
+ return (l);
+}
+
+void
+lock_profile_obtain_lock_success(struct lock_object *lo, int contested,
+ uint64_t waittime, const char *file, int line)
+{
+ static int lock_prof_count;
+ struct lock_profile_object *l;
+ int spin;
+
+ /* don't reset the timer when/if recursing */
+ if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE))
+ return;
+ if (lock_prof_skipcount &&
+ (++lock_prof_count % lock_prof_skipcount) == 0)
+ return;
+ spin = LOCK_CLASS(lo) == &lock_class_mtx_spin;
+ if (spin && lock_prof_skipspin == 1)
+ return;
+ l = lock_profile_object_lookup(lo, spin, file, line);
+ if (l == NULL)
+ return;
+ l->lpo_cnt++;
+ if (++l->lpo_ref > 1)
+ return;
+ l->lpo_contest_locking = contested;
l->lpo_acqtime = nanoseconds();
if (waittime && (l->lpo_acqtime > waittime))
l->lpo_waittime = l->lpo_acqtime - waittime;
@@ -271,87 +505,65 @@ void _lock_profile_obtain_lock_success(struct lock_object *lo, int contested, ui
l->lpo_waittime = 0;
}
-void _lock_profile_release_lock(struct lock_object *lo)
+void
+lock_profile_release_lock(struct lock_object *lo)
{
- struct lock_profile_object *l = &lo->lo_profile_obj;
-
- if (l->lpo_acqtime) {
- const char *unknown = "(unknown)";
- u_int64_t acqtime, now, waittime;
- struct lock_prof *mpp;
- u_int hash;
- const char *p = l->lpo_filename;
- int collision = 0;
-
- now = nanoseconds();
- acqtime = l->lpo_acqtime;
- waittime = l->lpo_waittime;
- if (now <= acqtime)
- return;
- if (p == NULL || *p == '\0')
- p = unknown;
- hash = (l->lpo_namehash * 31 * 31 + (uintptr_t)p * 31 + l->lpo_lineno) & LPROF_HASH_MASK;
- mpp = &lprof_buf[hash];
- while (mpp->name != NULL) {
- if (mpp->line == l->lpo_lineno &&
- mpp->file == p &&
- mpp->namehash == l->lpo_namehash)
- break;
- /* If the lprof_hash entry is allocated to someone
- * else, try the next one
- */
- collision = 1;
- hash = (hash + 1) & LPROF_HASH_MASK;
- mpp = &lprof_buf[hash];
- }
- if (mpp->name == NULL) {
- int buf;
-
- buf = atomic_fetchadd_int(&allocated_lprof_buf, 1);
- /* Just exit if we cannot get a trace buffer */
- if (buf >= LPROF_HASH_SIZE) {
- ++lock_prof_rejected;
- return;
- }
- mpp->file = p;
- mpp->line = l->lpo_lineno;
- mpp->namehash = l->lpo_namehash;
- mpp->type = l->lpo_type;
- mpp->name = lo->lo_name;
-
- if (collision)
- ++lock_prof_collisions;
-
- /*
- * We might have raced someone else but who cares,
- * they'll try again next time
- */
- ++lock_prof_records;
- }
- LPROF_LOCK(hash);
- /*
- * Record if the lock has been held longer now than ever
- * before.
- */
- if (now - acqtime > mpp->cnt_max)
- mpp->cnt_max = now - acqtime;
- mpp->cnt_tot += now - acqtime;
- mpp->cnt_wait += waittime;
- mpp->cnt_cur++;
- /*
- * There's a small race, really we should cmpxchg
- * 0 with the current value, but that would bill
- * the contention to the wrong lock instance if
- * it followed this also.
- */
- mpp->cnt_contest_holding += l->lpo_contest_holding;
- mpp->cnt_contest_locking += l->lpo_contest_locking;
- LPROF_UNLOCK(hash);
-
- }
- l->lpo_acqtime = 0;
- l->lpo_waittime = 0;
- l->lpo_contest_locking = 0;
- l->lpo_contest_holding = 0;
+ struct lock_profile_object *l;
+ struct lock_prof_type *type;
+ struct lock_prof *lp;
+ u_int64_t holdtime;
+ struct lpohead *head;
+ int spin;
+
+ if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE))
+ return;
+ spin = LOCK_CLASS(lo) == &lock_class_mtx_spin;
+ head = &curthread->td_lprof[spin];
+ critical_enter();
+ LIST_FOREACH(l, head, lpo_link)
+ if (l->lpo_obj == lo)
+ break;
+ if (l == NULL)
+ goto out;
+ if (--l->lpo_ref > 0)
+ goto out;
+ lp = lock_profile_lookup(lo, spin, l->lpo_file, l->lpo_line);
+ if (lp == NULL)
+ goto release;
+ holdtime = nanoseconds() - l->lpo_acqtime;
+ if (holdtime < 0)
+ goto release;
+ /*
+ * Record if the lock has been held longer now than ever
+ * before.
+ */
+ if (holdtime > lp->cnt_max)
+ lp->cnt_max = holdtime;
+ lp->cnt_tot += holdtime;
+ lp->cnt_wait += l->lpo_waittime;
+ lp->cnt_contest_locking += l->lpo_contest_locking;
+ lp->cnt_cur += l->lpo_cnt;
+release:
+ LIST_REMOVE(l, lpo_link);
+ type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
+ LIST_INSERT_HEAD(&type->lpt_lpoalloc, l, lpo_link);
+out:
+ critical_exit();
}
+
+SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging");
+SYSCTL_NODE(_debug_lock, OID_AUTO, prof, CTLFLAG_RD, NULL, "lock profiling");
+SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipspin, CTLFLAG_RW,
+ &lock_prof_skipspin, 0, "Skip profiling on spinlocks.");
+SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipcount, CTLFLAG_RW,
+ &lock_prof_skipcount, 0, "Sample approximately every N lock acquisitions.");
+SYSCTL_INT(_debug_lock_prof, OID_AUTO, rejected, CTLFLAG_RD,
+ &lock_prof_rejected, 0, "Number of rejected profiling records");
+SYSCTL_PROC(_debug_lock_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
+ NULL, 0, dump_lock_prof_stats, "A", "Lock profiling statistics");
+SYSCTL_PROC(_debug_lock_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
+ NULL, 0, reset_lock_prof_stats, "I", "Reset lock profiling statistics");
+SYSCTL_PROC(_debug_lock_prof, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
+ NULL, 0, enable_lock_prof, "I", "Enable lock profiling");
+
#endif
diff --git a/sys/sys/_lock.h b/sys/sys/_lock.h
index cc1ea0a..dd0b071 100644
--- a/sys/sys/_lock.h
+++ b/sys/sys/_lock.h
@@ -31,35 +31,10 @@
#ifndef _SYS__LOCK_H_
#define _SYS__LOCK_H_
-struct lock_profile_object {
- /*
- * This does not result in variant structure sizes because
- * MUTEX_PROFILING is in opt_global.h
- */
- u_int64_t lpo_acqtime;
- u_int64_t lpo_waittime;
- const char *lpo_filename;
- u_int lpo_namehash;
- int lpo_lineno;
- const char *lpo_type;
- /*
- * Fields relating to measuring contention on mutexes.
- * holding must be accessed atomically since it's
- * modified by threads that don't yet hold the mutex.
- * locking is only modified and referenced while
- * the mutex is held.
- */
- u_int lpo_contest_holding;
- u_int lpo_contest_locking;
-};
-
struct lock_object {
const char *lo_name; /* Individual lock name. */
const char *lo_type; /* General lock type. */
u_int lo_flags;
-#ifdef LOCK_PROFILING
- struct lock_profile_object lo_profile_obj;
-#endif
union { /* Data for witness. */
STAILQ_ENTRY(lock_object) lod_list;
struct witness *lod_witness;
diff --git a/sys/sys/lock_profile.h b/sys/sys/lock_profile.h
index f2861ac..f1e2659 100644
--- a/sys/sys/lock_profile.h
+++ b/sys/sys/lock_profile.h
@@ -31,139 +31,54 @@
#ifndef _SYS_LOCK_PROFILE_H_
#define _SYS_LOCK_PROFILE_H_
+#ifdef _KERNEL
+
+struct lock_profile_object;
+LIST_HEAD(lpohead, lock_profile_object);
+
#ifdef LOCK_PROFILING
-#include <sys/stdint.h>
-#include <sys/ktr.h>
-#include <sys/mutex.h>
-#include <machine/atomic.h>
-#include <machine/cpufunc.h>
-
-#ifndef LPROF_HASH_SIZE
-#define LPROF_HASH_SIZE 4096
-#define LPROF_HASH_MASK (LPROF_HASH_SIZE - 1)
-#endif
+#include <sys/lock.h>
#ifndef USE_CPU_NANOSECONDS
u_int64_t nanoseconds(void);
#endif
-struct lock_prof {
- const char *name;
- const char *type;
- const char *file;
- u_int namehash;
- int line;
- uintmax_t cnt_max;
- uintmax_t cnt_tot;
- uintmax_t cnt_wait;
- uintmax_t cnt_cur;
- uintmax_t cnt_contest_holding;
- uintmax_t cnt_contest_locking;
-};
-
-extern struct lock_prof lprof_buf[LPROF_HASH_SIZE];
-#define LPROF_SBUF_SIZE 256 * 400
-
-/* We keep a smaller pool of spin mutexes for protecting the lprof hash entries */
-#define LPROF_LOCK_SIZE 16
-#define LPROF_LOCK_MASK (LPROF_LOCK_SIZE - 1)
-#define LPROF_LHASH(hash) ((hash) & LPROF_LOCK_MASK)
-
-#define LPROF_LOCK(hash) mtx_lock_spin(&lprof_locks[LPROF_LHASH(hash)])
-#define LPROF_UNLOCK(hash) mtx_unlock_spin(&lprof_locks[LPROF_LHASH(hash)])
-
-#ifdef _KERNEL
-extern struct mtx lprof_locks[LPROF_LOCK_SIZE];
extern int lock_prof_enable;
-void _lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line);
-void _lock_profile_update_wait(struct lock_object *lo, uint64_t waitstart);
-void _lock_profile_release_lock(struct lock_object *lo);
-
-static inline void lock_profile_object_init(struct lock_object *lo, struct lock_class *class, const char *name) {
- const char *p;
- u_int hash = 0;
- struct lock_profile_object *l = &lo->lo_profile_obj;
-
- l->lpo_acqtime = 0;
- l->lpo_waittime = 0;
- l->lpo_filename = NULL;
- l->lpo_lineno = 0;
- l->lpo_contest_holding = 0;
- l->lpo_contest_locking = 0;
- l->lpo_type = class->lc_name;
-
- /* Hash the mutex name to an int so we don't have to strcmp() it repeatedly */
- for (p = name; *p != '\0'; p++)
- hash = 31 * hash + *p;
- l->lpo_namehash = hash;
-#if 0
- if (opts & MTX_PROFILE)
- l->lpo_stack = stack_create();
-#endif
-}
-
+void lock_profile_obtain_lock_success(struct lock_object *lo, int contested,
+ uint64_t waittime, const char *file, int line);
+void lock_profile_release_lock(struct lock_object *lo);
-static inline void
-lock_profile_object_destroy(struct lock_object *lo)
+static inline void
+lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested,
+ uint64_t *waittime)
{
-#if 0
- struct lock_profile_object *l = &lo->lo_profile_obj;
- if (lo->lo_flags & LO_PROFILE)
- stack_destroy(l->lpo_stack);
-#endif
+ if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE) || *contested)
+ return;
+ *waittime = nanoseconds();
+ *contested = 1;
}
-static inline void lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested,
- uint64_t *waittime)
+#else /* !LOCK_PROFILING */
+
+static inline void
+lock_profile_release_lock(struct lock_object *lo)
{
- struct lock_profile_object *l = &lo->lo_profile_obj;
-
- if (!(lo->lo_flags & LO_NOPROFILE) && lock_prof_enable &&
- *contested == 0) {
- *waittime = nanoseconds();
- atomic_add_int(&l->lpo_contest_holding, 1);
- *contested = 1;
- }
}
-static inline void lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line)
+static inline void
+lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested, uint64_t *waittime)
{
-
- /* don't reset the timer when/if recursing */
- if (!(lo->lo_flags & LO_NOPROFILE) && lock_prof_enable &&
- lo->lo_profile_obj.lpo_acqtime == 0) {
-#ifdef LOCK_PROFILING_FAST
- if (contested == 0)
- return;
-#endif
- _lock_profile_obtain_lock_success(lo, contested, waittime, file, line);
- }
}
-static inline void lock_profile_release_lock(struct lock_object *lo)
-{
- struct lock_profile_object *l = &lo->lo_profile_obj;
- if (!(lo->lo_flags & LO_NOPROFILE) && l->lpo_acqtime)
- _lock_profile_release_lock(lo);
+static inline void
+lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime,
+ const char *file, int line)
+{
}
-#endif /* _KERNEL */
-
-#else /* !LOCK_PROFILING */
-
-#ifdef _KERNEL
-static inline void lock_profile_update_wait(struct lock_object *lo, uint64_t waitstart) {;}
-static inline void lock_profile_update_contest_locking(struct lock_object *lo, int contested) {;}
-static inline void lock_profile_release_lock(struct lock_object *lo) {;}
-static inline void lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested, uint64_t *waittime) {;}
-static inline void lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime,
- const char *file, int line) {;}
-static inline void lock_profile_object_destroy(struct lock_object *lo) {;}
-static inline void lock_profile_object_init(struct lock_object *lo, struct lock_class *class, const char *name) {;}
+#endif /* !LOCK_PROFILING */
#endif /* _KERNEL */
-#endif /* !LOCK_PROFILING */
-
#endif /* _SYS_LOCK_PROFILE_H_ */
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 7d0aca4..7ebb8c2 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -45,6 +45,7 @@
#endif
#include <sys/queue.h>
#include <sys/_lock.h>
+#include <sys/lock_profile.h>
#include <sys/_mutex.h>
#include <sys/priority.h>
#include <sys/rtprio.h> /* XXX. */
@@ -298,6 +299,7 @@ struct thread {
struct td_sched *td_sched; /* (*) Scheduler-specific data. */
struct kaudit_record *td_ar; /* (k) Active audit record, if any. */
int td_syscalls; /* per-thread syscall count (used by NFS :)) */
+ struct lpohead td_lprof[2]; /* (a) lock profiling objects. */
};
struct mtx *thread_lock_block(struct thread *);
diff --git a/sys/sys/sx.h b/sys/sys/sx.h
index 47fdae6..5df5f36 100644
--- a/sys/sys/sx.h
+++ b/sys/sys/sx.h
@@ -178,11 +178,9 @@ __sx_slock(struct sx *sx, int opts, const char *file, int line)
if (!(x & SX_LOCK_SHARED) ||
!atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER))
error = _sx_slock_hard(sx, opts, file, line);
-#ifdef LOCK_PROFILING_SHARED
- else if (SX_SHARERS(x) == 0)
+ else
lock_profile_obtain_lock_success(&sx->lock_object, 0, 0, file,
line);
-#endif
return (error);
}
OpenPOWER on IntegriCloud