From 12adc443d67286deeee69e764d979c963403497d Mon Sep 17 00:00:00 2001
From: jeff <jeff@FreeBSD.org>
Date: Sat, 15 Dec 2007 23:13:31 +0000
Subject:  - Re-implement lock profiling in such a way that it no longer breaks
    the ABI when enabled.  There is no longer an embedded lock_profile_object 
   in each lock.  Instead a list of lock_profile_objects is kept per-thread   
 for each lock it may own.  The cnt_hold statistic is now always 0 to   
 facilitate this.  - Support shared locking by tracking individual lock
 instances and    statistics in the per-thread per-instance
 lock_profile_object.  - Make the lock profiling hash table a per-cpu singly
 linked list with a    per-cpu static lock_prof allocator.  This removes the
 need for an array    of spinlocks and reduces cache contention between cores.
  - Use a seperate hash for spinlocks and other locks so that only a   
 critical_enter() is required and not a spinlock_enter() to modify the   
 per-cpu tables.  - Count time spent spinning in the lock statistics.  -
 Remove the LOCK_PROFILE_SHARED option as it is always supported now.  -
 Specifically drop and release the scheduler locks in both schedulers    since
 we track owners now.

In collaboration with:	Kip Macy
Sponsored by:	Nokia
---
 sys/conf/options       |   1 -
 sys/kern/kern_mutex.c  |  26 +-
 sys/kern/kern_rwlock.c |  31 +--
 sys/kern/kern_sx.c     |  34 +--
 sys/kern/kern_thread.c |   2 +
 sys/kern/sched_4bsd.c  |   7 +-
 sys/kern/sched_ule.c   |   6 +
 sys/kern/subr_lock.c   | 656 ++++++++++++++++++++++++++++++++-----------------
 sys/sys/_lock.h        |  25 --
 sys/sys/lock_profile.h | 139 ++---------
 sys/sys/proc.h         |   2 +
 sys/sys/sx.h           |   4 +-
 12 files changed, 500 insertions(+), 433 deletions(-)

diff --git a/sys/conf/options b/sys/conf/options
index 6a00e35..e76c61e 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -540,7 +540,6 @@ MUTEX_DEBUG		opt_global.h
 MUTEX_NOINLINE		opt_global.h
 LOCK_PROFILING		opt_global.h
 LOCK_PROFILING_FAST	opt_global.h
-LOCK_PROFILING_SHARED	opt_global.h
 MSIZE			opt_global.h
 REGRESSION		opt_global.h
 RESTARTABLE_PANICS	opt_global.h
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index 727871d..81be32d 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -123,20 +123,6 @@ struct lock_class lock_class_mtx_spin = {
 struct mtx blocked_lock;
 struct mtx Giant;
 
-#ifdef LOCK_PROFILING
-static inline void lock_profile_init(void)
-{
-        int i;
-        /* Initialize the mutex profiling locks */
-        for (i = 0; i < LPROF_LOCK_SIZE; i++) {
-                mtx_init(&lprof_locks[i], "mprof lock",
-                    NULL, MTX_SPIN|MTX_QUIET|MTX_NOPROFILE);
-        }
-}
-#else
-static inline void lock_profile_init(void) {;}
-#endif
-
 void
 assert_mtx(struct lock_object *lock, int what)
 {
@@ -425,7 +411,7 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file,
 	}
 #endif
 	lock_profile_obtain_lock_success(&m->lock_object, contested,	
-	    waittime, (file), (line));					
+	    waittime, file, line);					
 }
 
 static void
@@ -514,7 +500,8 @@ retry:
 				m->mtx_recurse++;
 				break;
 			}
-			lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
+			lock_profile_obtain_lock_failed(&m->lock_object,
+			    &contested, &waittime);
 			/* Give interrupts a chance while we spin. */
 			spinlock_exit();
 			while (m->mtx_lock != MTX_UNOWNED) {
@@ -535,8 +522,9 @@ retry:
 			break;
 		_rel_spin_lock(m);	/* does spinlock_exit() */
 	}
-	lock_profile_obtain_lock_success(&m->lock_object, contested,	
-	    waittime, (file), (line));
+	if (m->mtx_recurse == 0)
+		lock_profile_obtain_lock_success(&m->lock_object, contested,	
+		    waittime, (file), (line));
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 }
 
@@ -794,8 +782,6 @@ mutex_init(void)
 	mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN | MTX_RECURSE);
 	mtx_init(&devmtx, "cdev", NULL, MTX_DEF);
 	mtx_lock(&Giant);
-	
-	lock_profile_init();
 }
 
 #ifdef DDB
diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c
index 4b8b4ca..43c1d3c 100644
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
@@ -227,10 +227,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
 #ifdef ADAPTIVE_RWLOCKS
 	volatile struct thread *owner;
 #endif
-#ifdef LOCK_PROFILING_SHARED
 	uint64_t waittime = 0;
 	int contested = 0;
-#endif
 	uintptr_t x;
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
@@ -273,12 +271,6 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
 			MPASS((x & RW_LOCK_READ_WAITERS) == 0);
 			if (atomic_cmpset_acq_ptr(&rw->rw_lock, x,
 			    x + RW_ONE_READER)) {
-#ifdef LOCK_PROFILING_SHARED
-				if (RW_READERS(x) == 0)
-					lock_profile_obtain_lock_success(
-					    &rw->lock_object, contested,
-					    waittime, file, line);
-#endif
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeed %p -> %p", __func__,
@@ -289,6 +281,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
 			cpu_spinwait();
 			continue;
 		}
+		lock_profile_obtain_lock_failed(&rw->lock_object,
+		    &contested, &waittime);
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
@@ -301,10 +295,6 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
 				    __func__, rw, owner);
-#ifdef LOCK_PROFILING_SHARED
-			lock_profile_obtain_lock_failed(&rw->lock_object,
-			    &contested, &waittime);
-#endif
 			while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
 			    TD_IS_RUNNING(owner))
 				cpu_spinwait();
@@ -369,10 +359,6 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
-#ifdef LOCK_PROFILING_SHARED
-		lock_profile_obtain_lock_failed(&rw->lock_object, &contested,
-		    &waittime);
-#endif
 		turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
@@ -384,7 +370,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
 	 * however.  turnstiles don't like owners changing between calls to
 	 * turnstile_wait() currently.
 	 */
-
+	lock_profile_obtain_lock_success( &rw->lock_object, contested,
+	    waittime, file, line);
 	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
 	WITNESS_LOCK(&rw->lock_object, 0, file, line);
 	curthread->td_locks++;
@@ -431,9 +418,6 @@ _rw_runlock(struct rwlock *rw, const char *file, int line)
 		 */
 		KASSERT(!(x & RW_LOCK_READ_WAITERS),
 		    ("%s: waiting readers", __func__));
-#ifdef LOCK_PROFILING_SHARED
-		lock_profile_release_lock(&rw->lock_object);
-#endif
 
 		/*
 		 * If there aren't any waiters for a write lock, then try
@@ -510,6 +494,7 @@ _rw_runlock(struct rwlock *rw, const char *file, int line)
 		turnstile_chain_unlock(&rw->lock_object);
 		break;
 	}
+	lock_profile_release_lock(&rw->lock_object);
 }
 
 /*
@@ -544,6 +529,8 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
 		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
 
 	while (!_rw_write_lock(rw, tid)) {
+		lock_profile_obtain_lock_failed(&rw->lock_object,
+		    &contested, &waittime);
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * If the lock is write locked and the owner is
@@ -556,8 +543,6 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
 				    __func__, rw, owner);
-			lock_profile_obtain_lock_failed(&rw->lock_object,
-			    &contested, &waittime);
 			while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
 			    TD_IS_RUNNING(owner))
 				cpu_spinwait();
@@ -641,8 +626,6 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
-		lock_profile_obtain_lock_failed(&rw->lock_object, &contested,
-		    &waittime);
 		turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 1e3f135..bc172e5 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -302,11 +302,8 @@ _sx_sunlock(struct sx *sx, const char *file, int line)
 	curthread->td_locks--;
 	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
-#ifdef LOCK_PROFILING_SHARED
-	if (SX_SHARERS(sx->sx_lock) == 1)
-		lock_profile_release_lock(&sx->lock_object);
-#endif
 	__sx_sunlock(sx, file, line);
+	lock_profile_release_lock(&sx->lock_object);
 }
 
 void
@@ -450,6 +447,8 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
 		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
 
 	while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
+		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
+		    &waittime);
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the lock is write locked and the owner is
@@ -467,8 +466,6 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
 					    "%s: spinning on %p held by %p",
 					    __func__, sx, owner);
 				GIANT_SAVE();
-				lock_profile_obtain_lock_failed(
-				    &sx->lock_object, &contested, &waittime);
 				while (SX_OWNER(sx->sx_lock) == x &&
 				    TD_IS_RUNNING(owner))
 					cpu_spinwait();
@@ -555,8 +552,6 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
 			    __func__, sx);
 
 		GIANT_SAVE();
-		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
-		    &waittime);
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
@@ -648,10 +643,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
 #ifdef ADAPTIVE_SX
 	volatile struct thread *owner;
 #endif
-#ifdef LOCK_PROFILING_SHARED
 	uint64_t waittime = 0;
 	int contested = 0;
-#endif
 	uintptr_t x;
 	int error = 0;
 
@@ -672,12 +665,6 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
 			MPASS(!(x & SX_LOCK_SHARED_WAITERS));
 			if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
 			    x + SX_ONE_SHARER)) {
-#ifdef LOCK_PROFILING_SHARED
-				if (SX_SHARERS(x) == 0)
-					lock_profile_obtain_lock_success(
-					    &sx->lock_object, contested,
-					    waittime, file, line);
-#endif
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeed %p -> %p", __func__,
@@ -687,6 +674,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
 			}
 			continue;
 		}
+		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
+		    &waittime);
 
 #ifdef ADAPTIVE_SX
 		/*
@@ -694,7 +683,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
-		else if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) {
+		if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) {
 			x = SX_OWNER(x);
 			owner = (struct thread *)x;
 			if (TD_IS_RUNNING(owner)) {
@@ -703,10 +692,6 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
 					    "%s: spinning on %p held by %p",
 					    __func__, sx, owner);
 				GIANT_SAVE();
-#ifdef LOCK_PROFILING_SHARED
-				lock_profile_obtain_lock_failed(
-				    &sx->lock_object, &contested, &waittime);
-#endif
 				while (SX_OWNER(sx->sx_lock) == x &&
 				    TD_IS_RUNNING(owner))
 					cpu_spinwait();
@@ -772,10 +757,6 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
 			    __func__, sx);
 
 		GIANT_SAVE();
-#ifdef LOCK_PROFILING_SHARED
-		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
-		    &waittime);
-#endif
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
@@ -795,6 +776,9 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 	}
+	if (error == 0)
+		lock_profile_obtain_lock_success(&sx->lock_object, contested,
+		    waittime, file, line);
 
 	GIANT_RESTORE();
 	return (error);
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index e176b87..93ff5a7 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -555,6 +555,8 @@ thread_link(struct thread *td, struct proc *p)
 	td->td_flags    = TDF_INMEM;
 
 	LIST_INIT(&td->td_contested);
+	LIST_INIT(&td->td_lprof[0]);
+	LIST_INIT(&td->td_lprof[1]);
 	sigqueue_init(&td->td_sigqueue, p);
 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
 	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index f6e702e..e1e5c91 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -878,9 +878,11 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
 		if (PMC_PROC_IS_USING_PMCS(td->td_proc))
 			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
 #endif
-
                 /* I feel sleepy */
+		lock_profile_release_lock(&sched_lock.lock_object);
 		cpu_switch(td, newtd, td->td_lock);
+		lock_profile_obtain_lock_success(&sched_lock.lock_object,
+		    0, 0, __FILE__, __LINE__);
 		/*
 		 * Where am I?  What year is it?
 		 * We are in the same thread that went to sleep above,
@@ -1375,6 +1377,7 @@ sched_throw(struct thread *td)
 		mtx_lock_spin(&sched_lock);
 		spinlock_exit();
 	} else {
+		lock_profile_release_lock(&sched_lock.lock_object);
 		MPASS(td->td_lock == &sched_lock);
 	}
 	mtx_assert(&sched_lock, MA_OWNED);
@@ -1394,6 +1397,8 @@ sched_fork_exit(struct thread *td)
 	 */
 	td->td_oncpu = PCPU_GET(cpuid);
 	sched_lock.mtx_lock = (uintptr_t)td;
+	lock_profile_obtain_lock_success(&sched_lock.lock_object,
+	    0, 0, __FILE__, __LINE__);
 	THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
 }
 
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index e9d9468..6841bab 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -1894,6 +1894,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
 		if (PMC_PROC_IS_USING_PMCS(td->td_proc))
 			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
 #endif
+		lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);
 		TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd;
 		cpu_switch(td, newtd, mtx);
 		/*
@@ -1903,6 +1904,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
 		 */
 		cpuid = PCPU_GET(cpuid);
 		tdq = TDQ_CPU(cpuid);
+		lock_profile_obtain_lock_success(
+		    &TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);
 #ifdef	HWPMC_HOOKS
 		if (PMC_PROC_IS_USING_PMCS(td->td_proc))
 			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
@@ -2618,6 +2621,7 @@ sched_throw(struct thread *td)
 	} else {
 		MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
 		tdq_load_rem(tdq, td->td_sched);
+		lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);
 	}
 	KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
 	newtd = choosethread();
@@ -2650,6 +2654,8 @@ sched_fork_exit(struct thread *td)
 	MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
 	td->td_oncpu = cpuid;
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED | MA_NOTRECURSED);
+	lock_profile_obtain_lock_success(
+	    &TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);
 }
 
 static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0,
diff --git a/sys/kern/subr_lock.c b/sys/kern/subr_lock.c
index ebb3c35..19f3639 100644
--- a/sys/kern/subr_lock.c
+++ b/sys/kern/subr_lock.c
@@ -40,17 +40,24 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker_set.h>
 #include <sys/lock.h>
+#include <sys/lock_profile.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
 #include <sys/sbuf.h>
+#include <sys/smp.h>
 #include <sys/sysctl.h>
-#include <sys/lock_profile.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
+#include <machine/cpufunc.h>
+
 CTASSERT(LOCK_CLASS_MAX == 15);
 
 struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = {
@@ -62,136 +69,6 @@ struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = {
 	&lock_class_lockmgr,
 };
 
-#ifdef LOCK_PROFILING
-#include <machine/cpufunc.h>
-
-SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging");
-SYSCTL_NODE(_debug_lock, OID_AUTO, prof, CTLFLAG_RD, NULL, "lock profiling");
-int lock_prof_enable = 0;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, enable, CTLFLAG_RW,
-    &lock_prof_enable, 0, "Enable lock profiling");
-
-/*
- * lprof_buf is a static pool of profiling records to avoid possible
- * reentrance of the memory allocation functions.
- *
- * Note: NUM_LPROF_BUFFERS must be smaller than LPROF_HASH_SIZE.
- */
-struct lock_prof lprof_buf[LPROF_HASH_SIZE];
-static int allocated_lprof_buf;
-struct mtx lprof_locks[LPROF_LOCK_SIZE];
-
-
-/* SWAG: sbuf size = avg stat. line size * number of locks */
-#define LPROF_SBUF_SIZE		256 * 400
-
-static int lock_prof_acquisitions;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
-    &lock_prof_acquisitions, 0, "Number of lock acquistions recorded");
-static int lock_prof_records;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, records, CTLFLAG_RD,
-    &lock_prof_records, 0, "Number of profiling records");
-static int lock_prof_maxrecords = LPROF_HASH_SIZE;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
-    &lock_prof_maxrecords, 0, "Maximum number of profiling records");
-static int lock_prof_rejected;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, rejected, CTLFLAG_RD,
-    &lock_prof_rejected, 0, "Number of rejected profiling records");
-static int lock_prof_hashsize = LPROF_HASH_SIZE;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, hashsize, CTLFLAG_RD,
-    &lock_prof_hashsize, 0, "Hash size");
-static int lock_prof_collisions = 0;
-SYSCTL_INT(_debug_lock_prof, OID_AUTO, collisions, CTLFLAG_RD,
-    &lock_prof_collisions, 0, "Number of hash collisions");
-
-#ifndef USE_CPU_NANOSECONDS
-u_int64_t
-nanoseconds(void)
-{
-	struct timespec tv;
-
-	nanotime(&tv);
-	return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
-}
-#endif
-
-static int
-dump_lock_prof_stats(SYSCTL_HANDLER_ARGS)
-{
-        struct sbuf *sb;
-        int error, i;
-        static int multiplier = 1;
-        const char *p;
-
-        if (allocated_lprof_buf == 0)
-                return (SYSCTL_OUT(req, "No locking recorded",
-                    sizeof("No locking recorded")));
-
-retry_sbufops:
-        sb = sbuf_new(NULL, NULL, LPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN);
-        sbuf_printf(sb, "\n%6s %12s %12s %11s %5s %5s %12s %12s %s\n",
-            "max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name");
-        for (i = 0; i < LPROF_HASH_SIZE; ++i) {
-                if (lprof_buf[i].name == NULL)
-                        continue;
-                for (p = lprof_buf[i].file;
-                        p != NULL && strncmp(p, "../", 3) == 0; p += 3)
-                                /* nothing */ ;
-                sbuf_printf(sb, "%6ju %12ju %12ju %11ju %5ju %5ju %12ju %12ju %s:%d (%s:%s)\n",
-                    lprof_buf[i].cnt_max / 1000,
-                    lprof_buf[i].cnt_tot / 1000,
-                    lprof_buf[i].cnt_wait / 1000,
-                    lprof_buf[i].cnt_cur,
-                    lprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 :
-                        lprof_buf[i].cnt_tot / (lprof_buf[i].cnt_cur * 1000),
-                    lprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 :
-                        lprof_buf[i].cnt_wait / (lprof_buf[i].cnt_cur * 1000),
-                    lprof_buf[i].cnt_contest_holding,
-                    lprof_buf[i].cnt_contest_locking,
-                    p, lprof_buf[i].line, 
-			    lprof_buf[i].type,
-			    lprof_buf[i].name);
-                if (sbuf_overflowed(sb)) {
-                        sbuf_delete(sb);
-                        multiplier++;
-                        goto retry_sbufops;
-                }
-        }
-
-        sbuf_finish(sb);
-        error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
-        sbuf_delete(sb);
-        return (error);
-}
-static int
-reset_lock_prof_stats(SYSCTL_HANDLER_ARGS)
-{
-        int error, v;
-
-        if (allocated_lprof_buf == 0)
-                return (0);
-
-        v = 0;
-        error = sysctl_handle_int(oidp, &v, 0, req);
-        if (error)
-                return (error);
-        if (req->newptr == NULL)
-                return (error);
-        if (v == 0)
-                return (0);
-
-        bzero(lprof_buf, LPROF_HASH_SIZE*sizeof(*lprof_buf));
-        allocated_lprof_buf = 0;
-        return (0);
-}
-
-SYSCTL_PROC(_debug_lock_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
-    NULL, 0, dump_lock_prof_stats, "A", "Lock profiling statistics");
-
-SYSCTL_PROC(_debug_lock_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
-    NULL, 0, reset_lock_prof_stats, "I", "Reset lock profiling statistics");
-#endif
-
 void
 lock_init(struct lock_object *lock, struct lock_class *class, const char *name,
     const char *type, int flags)
@@ -216,7 +93,6 @@ lock_init(struct lock_object *lock, struct lock_class *class, const char *name,
 	lock->lo_flags |= flags | LO_INITIALIZED;
 	LOCK_LOG_INIT(lock, 0);
 	WITNESS_INIT(lock);
-	lock_profile_object_init(lock, class, name);
 }
 
 void
@@ -224,7 +100,6 @@ lock_destroy(struct lock_object *lock)
 {
 
 	KASSERT(lock_initalized(lock), ("lock %p is not initialized", lock));
-	lock_profile_object_destroy(lock);
 	WITNESS_DESTROY(lock);
 	LOCK_LOG_DESTROY(lock, 0);
 	lock->lo_flags &= ~LO_INITIALIZED;
@@ -253,17 +128,376 @@ DB_SHOW_COMMAND(lock, db_show_lock)
 #endif
 
 #ifdef LOCK_PROFILING
-void _lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line)
+
+/*
+ * One object per-thread for each lock the thread owns.  Tracks individual
+ * lock instances.
+ */
+struct lock_profile_object {
+	LIST_ENTRY(lock_profile_object) lpo_link;
+	struct lock_object *lpo_obj;
+	const char	*lpo_file;
+	int		lpo_line;
+	uint16_t	lpo_ref;
+	uint16_t	lpo_cnt;
+	u_int64_t	lpo_acqtime;
+	u_int64_t	lpo_waittime;
+	u_int		lpo_contest_locking;
+};
+
+/*
+ * One lock_prof for each (file, line, lock object) triple.
+ */
+struct lock_prof {
+	SLIST_ENTRY(lock_prof) link;
+	const char	*file;
+	const char	*name;
+	int		line;
+	int		ticks;
+	const char	*type;
+	uintmax_t	cnt_max;
+	uintmax_t	cnt_tot;
+	uintmax_t	cnt_wait;
+	uintmax_t	cnt_cur;
+	uintmax_t	cnt_contest_locking;
+};
+
+SLIST_HEAD(lphead, lock_prof);
+
+#define	LPROF_HASH_SIZE		4096
+#define	LPROF_HASH_MASK		(LPROF_HASH_SIZE - 1)
+#define	LPROF_CACHE_SIZE	4096
+
+/*
+ * Array of objects and profs for each type of object for each cpu.  Spinlocks
+ * are handled seperately because a thread may be preempted and acquire a
+ * spinlock while in the lock profiling code of a non-spinlock.  In this way
+ * we only need a critical section to protect the per-cpu lists.
+ */
+struct lock_prof_type {
+	struct lphead		lpt_lpalloc;
+	struct lpohead		lpt_lpoalloc;
+	struct lphead		lpt_hash[LPROF_HASH_SIZE];
+	struct lock_prof	lpt_prof[LPROF_CACHE_SIZE];
+	struct lock_profile_object lpt_objs[LPROF_CACHE_SIZE];
+};
+
+struct lock_prof_cpu {
+	struct lock_prof_type	lpc_types[2]; /* One for spin one for other. */
+};
+
+struct lock_prof_cpu *lp_cpu[MAXCPU];
+
+int lock_prof_enable = 0;
+
+/* SWAG: sbuf size = avg stat. line size * number of locks */
+#define LPROF_SBUF_SIZE		256 * 400
+
+static int lock_prof_rejected;
+static int lock_prof_skipspin;
+static int lock_prof_skipcount;
+
+#ifndef USE_CPU_NANOSECONDS
+u_int64_t
+nanoseconds(void)
 {
-        struct lock_profile_object *l = &lo->lo_profile_obj;
+	struct bintime bt;
+	u_int64_t ns;
+
+	binuptime(&bt);
+	/* From bintime2timespec */
+	ns = bt.sec * (u_int64_t)1000000000;
+	ns += ((uint64_t)1000000000 * (uint32_t)(bt.frac >> 32)) >> 32;
+	return (ns);
+}
+#endif
 
-	lo->lo_profile_obj.lpo_contest_holding = 0;
-	
-	if (contested)
-		lo->lo_profile_obj.lpo_contest_locking++;		
+static void
+lock_prof_init_type(struct lock_prof_type *type)
+{
+	int i;
+
+	SLIST_INIT(&type->lpt_lpalloc);
+	LIST_INIT(&type->lpt_lpoalloc);
+	for (i = 0; i < LPROF_CACHE_SIZE; i++) {
+		SLIST_INSERT_HEAD(&type->lpt_lpalloc, &type->lpt_prof[i],
+		    link);
+		LIST_INSERT_HEAD(&type->lpt_lpoalloc, &type->lpt_objs[i],
+		    lpo_link);
+	}
+}
+
+static void
+lock_prof_init(void *arg)
+{
+	int cpu;
+
+	for (cpu = 0; cpu <= mp_maxid; cpu++) {
+		lp_cpu[cpu] = malloc(sizeof(*lp_cpu[cpu]), M_DEVBUF,
+		    M_WAITOK | M_ZERO);
+		lock_prof_init_type(&lp_cpu[cpu]->lpc_types[0]);
+		lock_prof_init_type(&lp_cpu[cpu]->lpc_types[1]);
+	}
+}
+SYSINIT(lockprof, SI_SUB_SMP, SI_ORDER_ANY, lock_prof_init, NULL);
+
+static void
+lock_prof_reset(void)
+{
+	struct lock_prof_cpu *lpc;
+	int enabled, i, cpu;
+
+	enabled = lock_prof_enable;
+	lock_prof_enable = 0;
+	for (cpu = 0; cpu <= mp_maxid; cpu++) {
+		lpc = lp_cpu[cpu];
+		for (i = 0; i < LPROF_CACHE_SIZE; i++) {
+			LIST_REMOVE(&lpc->lpc_types[0].lpt_objs[i], lpo_link);
+			LIST_REMOVE(&lpc->lpc_types[1].lpt_objs[i], lpo_link);
+		}
+		bzero(lpc, sizeof(*lpc));
+		lock_prof_init_type(&lpc->lpc_types[0]);
+		lock_prof_init_type(&lpc->lpc_types[1]);
+	}
+	lock_prof_enable = enabled;
+}
+
+static void
+lock_prof_output(struct lock_prof *lp, struct sbuf *sb)
+{
+	const char *p;
+
+	for (p = lp->file; p != NULL && strncmp(p, "../", 3) == 0; p += 3);
+	sbuf_printf(sb,
+	    "%6ju %12ju %12ju %11ju %5ju %5ju %12ju %12ju %s:%d (%s:%s)\n",
+	    lp->cnt_max / 1000, lp->cnt_tot / 1000,
+	    lp->cnt_wait / 1000, lp->cnt_cur,
+	    lp->cnt_cur == 0 ? (uintmax_t)0 :
+	    lp->cnt_tot / (lp->cnt_cur * 1000),
+	    lp->cnt_cur == 0 ? (uintmax_t)0 :
+	    lp->cnt_wait / (lp->cnt_cur * 1000),
+	    (uintmax_t)0, lp->cnt_contest_locking,
+	    p, lp->line, lp->type, lp->name);
+}
+
+static void
+lock_prof_sum(struct lock_prof *match, struct lock_prof *dst, int hash,
+    int spin, int t)
+{
+	struct lock_prof_type *type;
+	struct lock_prof *l;
+	int cpu;
+
+	dst->file = match->file;
+	dst->line = match->line;
+	dst->type = match->type;
+	dst->name = match->name;
+
+	for (cpu = 0; cpu <= mp_maxid; cpu++) {
+		if (lp_cpu[cpu] == NULL)
+			continue;
+		type = &lp_cpu[cpu]->lpc_types[spin];
+		SLIST_FOREACH(l, &type->lpt_hash[hash], link) {
+			if (l->ticks == t)
+				continue;
+			if (l->file != match->file || l->line != match->line ||
+			    l->name != match->name || l->type != match->type)
+				continue;
+			l->ticks = t;
+			if (l->cnt_max > dst->cnt_max)
+				dst->cnt_max = l->cnt_max;
+			dst->cnt_tot += l->cnt_tot;
+			dst->cnt_wait += l->cnt_wait;
+			dst->cnt_cur += l->cnt_cur;
+			dst->cnt_contest_locking += l->cnt_contest_locking;
+		}
+	}
 	
-	l->lpo_filename = file;
-	l->lpo_lineno = line;
+}
+
+static void
+lock_prof_type_stats(struct lock_prof_type *type, struct sbuf *sb, int spin,
+    int t)
+{
+	struct lock_prof *l;
+	int i;
+
+	for (i = 0; i < LPROF_HASH_SIZE; ++i) {
+		SLIST_FOREACH(l, &type->lpt_hash[i], link) {
+			struct lock_prof lp = {};
+
+			if (l->ticks == t)
+				continue;
+			lock_prof_sum(l, &lp, i, spin, t);
+			lock_prof_output(&lp, sb);
+			if (sbuf_overflowed(sb))
+				return;
+		}
+	}
+}
+
+static int
+dump_lock_prof_stats(SYSCTL_HANDLER_ARGS)
+{
+	static int multiplier = 1;
+	struct sbuf *sb;
+	int error, cpu, t;
+
+retry_sbufops:
+	sb = sbuf_new(NULL, NULL, LPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN);
+	sbuf_printf(sb, "\n%6s %12s %12s %11s %5s %5s %12s %12s %s\n",
+	    "max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name");
+	t = ticks;
+	for (cpu = 0; cpu <= mp_maxid; cpu++) {
+		if (lp_cpu[cpu] == NULL)
+			continue;
+		lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[0], sb, 0, t);
+		lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[1], sb, 1, t);
+		if (sbuf_overflowed(sb)) {
+			sbuf_delete(sb);
+			multiplier++;
+			goto retry_sbufops;
+		}
+	}
+
+	sbuf_finish(sb);
+	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
+	sbuf_delete(sb);
+	return (error);
+}
+
+static int
+enable_lock_prof(SYSCTL_HANDLER_ARGS)
+{
+	int error, v;
+
+	v = lock_prof_enable;
+	error = sysctl_handle_int(oidp, &v, v, req);
+	if (error)
+		return (error);
+	if (req->newptr == NULL)
+		return (error);
+	if (v == lock_prof_enable)
+		return (0);
+	if (v == 1)
+		lock_prof_reset();
+	lock_prof_enable = !!v;
+
+	return (0);
+}
+
+static int
+reset_lock_prof_stats(SYSCTL_HANDLER_ARGS)
+{
+	int error, v;
+
+	v = 0;
+	error = sysctl_handle_int(oidp, &v, 0, req);
+	if (error)
+		return (error);
+	if (req->newptr == NULL)
+		return (error);
+	if (v == 0)
+		return (0);
+	lock_prof_reset();
+
+	return (0);
+}
+
+static struct lock_prof *
+lock_profile_lookup(struct lock_object *lo, int spin, const char *file,
+    int line)
+{
+	const char *unknown = "(unknown)";
+	struct lock_prof_type *type;
+	struct lock_prof *lp;
+	struct lphead *head;
+	const char *p;
+	u_int hash;
+
+	p = file;
+	if (p == NULL || *p == '\0')
+		p = unknown;
+	hash = (uintptr_t)lo->lo_name * 31 + (uintptr_t)p * 31 + line;
+	hash &= LPROF_HASH_MASK;
+	type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
+	head = &type->lpt_hash[hash];
+	SLIST_FOREACH(lp, head, link) {
+		if (lp->line == line && lp->file == p &&
+		    lp->name == lo->lo_name)
+			return (lp);
+
+	}
+	lp = SLIST_FIRST(&type->lpt_lpalloc);
+	if (lp == NULL) {
+		lock_prof_rejected++;
+		return (lp);
+	}
+	SLIST_REMOVE_HEAD(&type->lpt_lpalloc, link);
+	lp->file = p;
+	lp->line = line;
+	lp->type = lo->lo_type;
+	lp->name = lo->lo_name;
+	SLIST_INSERT_HEAD(&type->lpt_hash[hash], lp, link);
+	return (lp);
+}
+
+static struct lock_profile_object *
+lock_profile_object_lookup(struct lock_object *lo, int spin, const char *file,
+    int line)
+{
+	struct lock_profile_object *l;
+	struct lock_prof_type *type;
+	struct lpohead *head;
+
+	head = &curthread->td_lprof[spin];
+	LIST_FOREACH(l, head, lpo_link)
+		if (l->lpo_obj == lo && l->lpo_file == file &&
+		    l->lpo_line == line)
+			return (l);
+	critical_enter();
+	type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
+	l = LIST_FIRST(&type->lpt_lpoalloc);
+	if (l == NULL) {
+		lock_prof_rejected++;
+		critical_exit();
+		return (NULL);
+	}
+	LIST_REMOVE(l, lpo_link);
+	critical_exit();
+	l->lpo_obj = lo;
+	l->lpo_file = file;
+	l->lpo_line = line;
+	l->lpo_cnt = 0;
+	LIST_INSERT_HEAD(head, l, lpo_link);
+
+	return (l);
+}
+
+void
+lock_profile_obtain_lock_success(struct lock_object *lo, int contested,
+    uint64_t waittime, const char *file, int line)
+{
+	static int lock_prof_count;
+	struct lock_profile_object *l;
+	int spin;
+
+	/* don't reset the timer when/if recursing */
+	if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE))
+		return;
+	if (lock_prof_skipcount &&
+	    (++lock_prof_count % lock_prof_skipcount) == 0)
+		return;
+	spin = LOCK_CLASS(lo) == &lock_class_mtx_spin;
+	if (spin && lock_prof_skipspin == 1)
+		return;
+	l = lock_profile_object_lookup(lo, spin, file, line);
+	if (l == NULL)
+		return;
+	l->lpo_cnt++;
+	if (++l->lpo_ref > 1)
+		return;
+	l->lpo_contest_locking = contested;
 	l->lpo_acqtime = nanoseconds(); 
 	if (waittime && (l->lpo_acqtime > waittime))
 		l->lpo_waittime = l->lpo_acqtime - waittime;
@@ -271,87 +505,65 @@ void _lock_profile_obtain_lock_success(struct lock_object *lo, int contested, ui
 		l->lpo_waittime = 0;
 }
 
-void _lock_profile_release_lock(struct lock_object *lo)
+void
+lock_profile_release_lock(struct lock_object *lo)
 {
-        struct lock_profile_object *l = &lo->lo_profile_obj;
-
-        if (l->lpo_acqtime) {
-                const char *unknown = "(unknown)";
-                u_int64_t acqtime, now, waittime;
-                struct lock_prof *mpp;
-                u_int hash;
-                const char *p = l->lpo_filename;
-                int collision = 0;
-
-                now = nanoseconds();
-                acqtime = l->lpo_acqtime;
-                waittime = l->lpo_waittime;
-                if (now <= acqtime)
-                        return;
-                if (p == NULL || *p == '\0')
-                        p = unknown;
-                hash = (l->lpo_namehash * 31 * 31 + (uintptr_t)p * 31 + l->lpo_lineno) & LPROF_HASH_MASK;
-                mpp = &lprof_buf[hash];
-                while (mpp->name != NULL) {
-                        if (mpp->line == l->lpo_lineno &&
-                          mpp->file == p &&
-                          mpp->namehash == l->lpo_namehash)
-                                break;
-                        /* If the lprof_hash entry is allocated to someone 
-			 * else, try the next one 
-			 */
-                        collision = 1;
-                        hash = (hash + 1) & LPROF_HASH_MASK;
-                        mpp = &lprof_buf[hash];
-                }
-                if (mpp->name == NULL) {
-                        int buf;
-
-                        buf = atomic_fetchadd_int(&allocated_lprof_buf, 1);
-                        /* Just exit if we cannot get a trace buffer */
-                        if (buf >= LPROF_HASH_SIZE) {
-                                ++lock_prof_rejected;
-                                return;
-                        }
-			mpp->file = p;
-			mpp->line = l->lpo_lineno;
-			mpp->namehash = l->lpo_namehash;
-			mpp->type = l->lpo_type;
-			mpp->name = lo->lo_name;
-
-			if (collision)
-				++lock_prof_collisions;
-			
-                        /* 
-			 * We might have raced someone else but who cares, 
-			 * they'll try again next time 
-			 */
-                        ++lock_prof_records;
-                }
-                LPROF_LOCK(hash);
-                /*
-                 * Record if the lock has been held longer now than ever
-                 * before.
-                 */
-                if (now - acqtime > mpp->cnt_max)
-                        mpp->cnt_max = now - acqtime;
-                mpp->cnt_tot += now - acqtime;
-                mpp->cnt_wait += waittime;
-                mpp->cnt_cur++;
-                /*
-                 * There's a small race, really we should cmpxchg
-                 * 0 with the current value, but that would bill
-                 * the contention to the wrong lock instance if
-                 * it followed this also.
-                 */
-                mpp->cnt_contest_holding += l->lpo_contest_holding;
-                mpp->cnt_contest_locking += l->lpo_contest_locking;
-                LPROF_UNLOCK(hash);
-
-        }
-        l->lpo_acqtime = 0;
-        l->lpo_waittime = 0;
-        l->lpo_contest_locking = 0;
-        l->lpo_contest_holding = 0;
+	struct lock_profile_object *l;
+	struct lock_prof_type *type;
+	struct lock_prof *lp;
+	u_int64_t holdtime;
+	struct lpohead *head;
+	int spin;
+
+	if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE))
+		return;
+	spin = LOCK_CLASS(lo) == &lock_class_mtx_spin;
+	head = &curthread->td_lprof[spin];
+	critical_enter();
+	LIST_FOREACH(l, head, lpo_link)
+		if (l->lpo_obj == lo)
+			break;
+	if (l == NULL)
+		goto out;
+	if (--l->lpo_ref > 0)
+		goto out;
+	lp = lock_profile_lookup(lo, spin, l->lpo_file, l->lpo_line);
+	if (lp == NULL)
+		goto release;
+	holdtime = nanoseconds() - l->lpo_acqtime;
+	if (holdtime < 0)
+		goto release;
+	/*
+	 * Record if the lock has been held longer now than ever
+	 * before.
+	 */
+	if (holdtime > lp->cnt_max)
+		lp->cnt_max = holdtime;
+	lp->cnt_tot += holdtime;
+	lp->cnt_wait += l->lpo_waittime;
+	lp->cnt_contest_locking += l->lpo_contest_locking;
+	lp->cnt_cur += l->lpo_cnt;
+release:
+	LIST_REMOVE(l, lpo_link);
+	type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
+	LIST_INSERT_HEAD(&type->lpt_lpoalloc, l, lpo_link);
+out:
+	critical_exit();
 }
+
+SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging");
+SYSCTL_NODE(_debug_lock, OID_AUTO, prof, CTLFLAG_RD, NULL, "lock profiling");
+SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipspin, CTLFLAG_RW,
+    &lock_prof_skipspin, 0, "Skip profiling on spinlocks.");
+SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipcount, CTLFLAG_RW,
+    &lock_prof_skipcount, 0, "Sample approximately every N lock acquisitions.");
+SYSCTL_INT(_debug_lock_prof, OID_AUTO, rejected, CTLFLAG_RD,
+    &lock_prof_rejected, 0, "Number of rejected profiling records");
+SYSCTL_PROC(_debug_lock_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
+    NULL, 0, dump_lock_prof_stats, "A", "Lock profiling statistics");
+SYSCTL_PROC(_debug_lock_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
+    NULL, 0, reset_lock_prof_stats, "I", "Reset lock profiling statistics");
+SYSCTL_PROC(_debug_lock_prof, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
+    NULL, 0, enable_lock_prof, "I", "Enable lock profiling");
+
 #endif
diff --git a/sys/sys/_lock.h b/sys/sys/_lock.h
index cc1ea0a..dd0b071 100644
--- a/sys/sys/_lock.h
+++ b/sys/sys/_lock.h
@@ -31,35 +31,10 @@
 #ifndef _SYS__LOCK_H_
 #define	_SYS__LOCK_H_
 
-struct lock_profile_object {
-        /*
-         * This does not result in variant structure sizes because
-         * MUTEX_PROFILING is in opt_global.h
-         */
-	u_int64_t               lpo_acqtime;
-	u_int64_t               lpo_waittime;
-	const char              *lpo_filename;
-	u_int                   lpo_namehash;
-	int                     lpo_lineno;
-	const char              *lpo_type;
-        /*
-         * Fields relating to measuring contention on mutexes.
-         * holding must be accessed atomically since it's
-         * modified by threads that don't yet hold the mutex.
-         * locking is only modified and referenced while
-         * the mutex is held.
-         */
-        u_int                   lpo_contest_holding;
-        u_int                   lpo_contest_locking;
-};
-
 struct lock_object {
 	const	char *lo_name;		/* Individual lock name. */
 	const	char *lo_type;		/* General lock type. */
 	u_int	lo_flags;
-#ifdef LOCK_PROFILING
-        struct  lock_profile_object lo_profile_obj;
-#endif
 	union {				/* Data for witness. */
 		STAILQ_ENTRY(lock_object) lod_list;
 		struct	witness *lod_witness;
diff --git a/sys/sys/lock_profile.h b/sys/sys/lock_profile.h
index f2861ac..f1e2659 100644
--- a/sys/sys/lock_profile.h
+++ b/sys/sys/lock_profile.h
@@ -31,139 +31,54 @@
 #ifndef _SYS_LOCK_PROFILE_H_
 #define _SYS_LOCK_PROFILE_H_
 
+#ifdef _KERNEL
+
+struct lock_profile_object;
+LIST_HEAD(lpohead, lock_profile_object);
+
 #ifdef LOCK_PROFILING
-#include <sys/stdint.h>
-#include <sys/ktr.h>
-#include <sys/mutex.h>
-#include <machine/atomic.h>
-#include <machine/cpufunc.h>
-
-#ifndef LPROF_HASH_SIZE
-#define LPROF_HASH_SIZE		4096
-#define LPROF_HASH_MASK		(LPROF_HASH_SIZE - 1)
-#endif
+#include <sys/lock.h>
 
 #ifndef USE_CPU_NANOSECONDS
 u_int64_t nanoseconds(void);
 #endif
 
-struct lock_prof {
-	const char	*name;
-	const char      *type;
-	const char	*file;
-	u_int		 namehash;
-	int		line;
-	uintmax_t	cnt_max;
-	uintmax_t	cnt_tot;
-	uintmax_t       cnt_wait;
-	uintmax_t	cnt_cur;
-	uintmax_t	cnt_contest_holding;
-	uintmax_t	cnt_contest_locking;
-};
-
-extern struct lock_prof lprof_buf[LPROF_HASH_SIZE];
-#define LPROF_SBUF_SIZE		256 * 400
-
-/* We keep a smaller pool of spin mutexes for protecting the lprof hash entries */
-#define LPROF_LOCK_SIZE         16	
-#define LPROF_LOCK_MASK         (LPROF_LOCK_SIZE - 1)
-#define LPROF_LHASH(hash)       ((hash) & LPROF_LOCK_MASK)
-
-#define LPROF_LOCK(hash)        mtx_lock_spin(&lprof_locks[LPROF_LHASH(hash)])
-#define LPROF_UNLOCK(hash)      mtx_unlock_spin(&lprof_locks[LPROF_LHASH(hash)])
-
-#ifdef _KERNEL
-extern struct mtx lprof_locks[LPROF_LOCK_SIZE];
 extern int lock_prof_enable;
 
-void _lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line); 
-void _lock_profile_update_wait(struct lock_object *lo, uint64_t waitstart);
-void _lock_profile_release_lock(struct lock_object *lo);
-
-static inline void lock_profile_object_init(struct lock_object *lo, struct lock_class *class, const char *name) {
-	const char *p;
-	u_int hash = 0;
-	struct lock_profile_object *l = &lo->lo_profile_obj;
-
-	l->lpo_acqtime = 0;
-	l->lpo_waittime = 0;
-	l->lpo_filename = NULL;
-	l->lpo_lineno = 0;
-	l->lpo_contest_holding = 0;
-	l->lpo_contest_locking = 0;
-	l->lpo_type = class->lc_name;
-
-	/* Hash the mutex name to an int so we don't have to strcmp() it repeatedly */
-	for (p = name; *p != '\0'; p++)
-		hash = 31 * hash + *p;
-	l->lpo_namehash = hash;
-#if 0
-	if (opts & MTX_PROFILE)
-		l->lpo_stack = stack_create();
-#endif
-}
-
+void lock_profile_obtain_lock_success(struct lock_object *lo, int contested,
+    uint64_t waittime, const char *file, int line);
+void lock_profile_release_lock(struct lock_object *lo);
 
-static inline void 
-lock_profile_object_destroy(struct lock_object *lo) 
+static inline void
+lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested,
+    uint64_t *waittime)
 {
-#if 0
-	struct lock_profile_object *l = &lo->lo_profile_obj;
-	if (lo->lo_flags & LO_PROFILE)
-		stack_destroy(l->lpo_stack);
-#endif
+	if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE) || *contested)
+		return;
+	*waittime = nanoseconds();
+	*contested = 1;
 }
 
-static inline void lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested,
-    uint64_t *waittime) 
+#else /* !LOCK_PROFILING */
+
+static inline void
+lock_profile_release_lock(struct lock_object *lo)
 {
-	struct lock_profile_object *l = &lo->lo_profile_obj;
-
-	if (!(lo->lo_flags & LO_NOPROFILE) && lock_prof_enable &&
-	    *contested == 0) {
-		*waittime = nanoseconds();
-		atomic_add_int(&l->lpo_contest_holding, 1);
-		*contested = 1;
-	}
 }
 
-static inline void lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line) 
+static inline void
+lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested, uint64_t *waittime)
 {
-	
-	/* don't reset the timer when/if recursing */
-	if (!(lo->lo_flags & LO_NOPROFILE) && lock_prof_enable &&
-	    lo->lo_profile_obj.lpo_acqtime == 0) {
-#ifdef LOCK_PROFILING_FAST
-               if (contested == 0)
-                       return;
-#endif
-	       _lock_profile_obtain_lock_success(lo, contested, waittime, file, line);
-	}
 }
-static inline void lock_profile_release_lock(struct lock_object *lo)
-{
-	struct lock_profile_object *l = &lo->lo_profile_obj;
 
-	if (!(lo->lo_flags & LO_NOPROFILE) && l->lpo_acqtime) 
-		_lock_profile_release_lock(lo);
+static inline void
+lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime,  
+    const char *file, int line)
+{
 }
 
-#endif /* _KERNEL */
-
-#else /* !LOCK_PROFILING */
-
-#ifdef _KERNEL
-static inline void lock_profile_update_wait(struct lock_object *lo, uint64_t waitstart) {;}
-static inline void lock_profile_update_contest_locking(struct lock_object *lo, int contested) {;}
-static inline void lock_profile_release_lock(struct lock_object *lo) {;}
-static inline void lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested, uint64_t *waittime) {;}
-static inline void lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime,  
-						    const char *file, int line) {;}
-static inline void lock_profile_object_destroy(struct lock_object *lo) {;}
-static inline void lock_profile_object_init(struct lock_object *lo, struct lock_class *class, const char *name) {;}
+#endif  /* !LOCK_PROFILING */
 
 #endif /* _KERNEL */
 
-#endif  /* !LOCK_PROFILING */
-
 #endif /* _SYS_LOCK_PROFILE_H_ */
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 7d0aca4..7ebb8c2 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -45,6 +45,7 @@
 #endif
 #include <sys/queue.h>
 #include <sys/_lock.h>
+#include <sys/lock_profile.h>
 #include <sys/_mutex.h>
 #include <sys/priority.h>
 #include <sys/rtprio.h>			/* XXX. */
@@ -298,6 +299,7 @@ struct thread {
 	struct td_sched	*td_sched;	/* (*) Scheduler-specific data. */
 	struct kaudit_record	*td_ar;	/* (k) Active audit record, if any. */
 	int		td_syscalls;	/* per-thread syscall count (used by NFS :)) */
+	struct lpohead	td_lprof[2];	/* (a) lock profiling objects. */
 };
 
 struct mtx *thread_lock_block(struct thread *);
diff --git a/sys/sys/sx.h b/sys/sys/sx.h
index 47fdae6..5df5f36 100644
--- a/sys/sys/sx.h
+++ b/sys/sys/sx.h
@@ -178,11 +178,9 @@ __sx_slock(struct sx *sx, int opts, const char *file, int line)
 	if (!(x & SX_LOCK_SHARED) ||
 	    !atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER))
 		error = _sx_slock_hard(sx, opts, file, line);
-#ifdef LOCK_PROFILING_SHARED
-	else if (SX_SHARERS(x) == 0)
+	else
 		lock_profile_obtain_lock_success(&sx->lock_object, 0, 0, file,
 		    line);
-#endif
 
 	return (error);
 }
-- 
cgit v1.1