1 files changed, 1156 insertions, 212 deletions
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index ac46856..45aa053 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
+#include <sys/smp.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
@@ -83,8 +84,10 @@ SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, "struct vnode *");
 SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *");
 SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *",
     "struct vnode *");
-SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *",
-    "char *");
+SDT_PROBE_DEFINE3(vfs, namecache, zap_negative, done, "struct vnode *",
+    "char *", "int");
+SDT_PROBE_DEFINE3(vfs, namecache, shrink_negative, done, "struct vnode *",
+    "char *", "int");
 
 /*
  * This structure describes the elements in the cache of recent
@@ -96,7 +99,10 @@ struct	namecache {
 	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
 	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
 	struct	vnode *nc_dvp;		/* vnode of parent of name */
-	struct	vnode *nc_vp;		/* vnode the name refers to */
+	union {
+		struct	vnode *nu_vp;	/* vnode the name refers to */
+		u_int	nu_neghits;	/* negative entry hits */
+	} n_un;
 	u_char	nc_flag;		/* flag bits */
 	u_char	nc_nlen;		/* length of name */
 	char	nc_name[0];		/* segment name + nul */
@@ -115,7 +121,10 @@ struct	namecache_ts {
 	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
 	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
 	struct	vnode *nc_dvp;		/* vnode of parent of name */
-	struct	vnode *nc_vp;		/* vnode the name refers to */
+	union {
+		struct	vnode *nu_vp;	/* vnode the name refers to */
+		u_int	nu_neghits;	/* negative entry hits */
+	} n_un;
 	u_char	nc_flag;		/* flag bits */
 	u_char	nc_nlen;		/* length of name */
 	struct	timespec nc_time;	/* timespec provided by fs */
@@ -124,6 +133,9 @@ struct	namecache_ts {
 	char	nc_name[0];		/* segment name + nul */
 };
 
+#define	nc_vp		n_un.nu_vp
+#define	nc_neghits	n_un.nu_neghits
+
 /*
  * Flags in namecache.nc_flag
  */
@@ -131,6 +143,9 @@ struct	namecache_ts {
 #define NCF_ISDOTDOT	0x02
 #define	NCF_TS		0x04
 #define	NCF_DTS		0x08
+#define	NCF_DVDROP	0x10
+#define	NCF_NEGATIVE	0x20
+#define	NCF_HOTNEGATIVE	0x40
 
 /*
  * Name caching works as follows:
@@ -147,6 +162,37 @@ struct	namecache_ts {
  * Upon reaching the last segment of a path, if the reference
  * is for DELETE, or NOCACHE is set (rewrite), and the
  * name is located in the cache, it will be dropped.
+ *
+ * These locks are used (in the order in which they can be taken):
+ * NAME		TYPE	ROLE
+ * vnodelock	mtx	vnode lists and v_cache_dd field protection
+ * bucketlock	rwlock	for access to given set of hash buckets
+ * neglist	mtx	negative entry LRU management
+ *
+ * Additionally, ncneg_shrink_lock mtx is used to have at most one thread
+ * shrinking the LRU list.
+ *
+ * It is legal to take multiple vnodelock and bucketlock locks. The locking
+ * order is lower address first. Both are recursive.
+ *
+ * "." lookups are lockless.
+ *
+ * ".." and vnode -> name lookups require vnodelock.
+ *
+ * name -> vnode lookup requires the relevant bucketlock to be held for reading.
+ *
+ * Insertions and removals of entries require involved vnodes and bucketlocks
+ * to be write-locked to prevent other threads from seeing the entry.
+ *
+ * Some lookups result in removal of the found entry (e.g. getting rid of a
+ * negative entry with the intent to create a positive one), which poses a
+ * problem when multiple threads reach the state. Similarly, two different
+ * threads can purge two different vnodes and try to remove the same name.
+ *
+ * If the already held vnode lock is lower than the second required lock, we
+ * can just take the other lock. However, in the opposite case, this could
+ * deadlock. As such, this is resolved by trylocking and if that fails unlocking
+ * the first node, locking everything in order and revalidating the state.
  */
 
 /*
@@ -155,7 +201,6 @@ struct	namecache_ts {
 #define NCHHASH(hash) \
 	(&nchashtbl[(hash) & nchash])
 static LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
-static TAILQ_HEAD(, namecache) ncneg;	/* Hash Table */
 static u_long	nchash;			/* size of hash table */
 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0,
     "Size of namecache hash table");
@@ -174,17 +219,54 @@ SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0,
 u_int	ncsizefactor = 2;
 SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0,
     "Size factor for namecache");
+static u_int	ncpurgeminvnodes;
+SYSCTL_UINT(_vfs, OID_AUTO, ncpurgeminvnodes, CTLFLAG_RW, &ncpurgeminvnodes, 0,
+    "Number of vnodes below which purgevfs ignores the request");
+static u_int	ncneghitsrequeue = 8;
+SYSCTL_UINT(_vfs, OID_AUTO, ncneghitsrequeue, CTLFLAG_RW, &ncneghitsrequeue, 0,
+    "Number of hits to requeue a negative entry in the LRU list");
 
 struct nchstats	nchstats;		/* cache effectiveness statistics */
 
-static struct rwlock cache_lock;
-RW_SYSINIT(vfscache, &cache_lock, "Name Cache");
+static struct mtx       ncneg_shrink_lock;
+MTX_SYSINIT(vfscache_shrink_neg, &ncneg_shrink_lock, "Name Cache shrink neg",
+    MTX_DEF);
+
+struct neglist {
+	struct mtx		nl_lock;
+	TAILQ_HEAD(, namecache) nl_list;
+} __aligned(CACHE_LINE_SIZE);
+
+static struct neglist *neglists;
+static struct neglist ncneg_hot;
 
-#define	CACHE_UPGRADE_LOCK()	rw_try_upgrade(&cache_lock)
-#define	CACHE_RLOCK()		rw_rlock(&cache_lock)
-#define	CACHE_RUNLOCK()		rw_runlock(&cache_lock)
-#define	CACHE_WLOCK()		rw_wlock(&cache_lock)
-#define	CACHE_WUNLOCK()		rw_wunlock(&cache_lock)
+static int	shrink_list_turn;
+
+static u_int	numneglists;
+static inline struct neglist *
+NCP2NEGLIST(struct namecache *ncp)
+{
+
+	return (&neglists[(((uintptr_t)(ncp) >> 8) % numneglists)]);
+}
+
+static u_int   numbucketlocks;
+static struct rwlock_padalign  *bucketlocks;
+#define	HASH2BUCKETLOCK(hash) \
+	((struct rwlock *)(&bucketlocks[((hash) % numbucketlocks)]))
+
+static u_int   numvnodelocks;
+static struct mtx *vnodelocks;
+static inline struct mtx *
+VP2VNODELOCK(struct vnode *vp)
+{
+	struct mtx *vlp;
+
+	if (vp == NULL)
+		return (NULL);
+	vlp = &vnodelocks[(((uintptr_t)(vp) >> 8) % numvnodelocks)];
+	return (vlp);
+}
 
 /*
  * UMA zones for the VFS cache.
@@ -224,6 +306,8 @@ cache_free(struct namecache *ncp)
 	if (ncp == NULL)
 		return;
 	ts = ncp->nc_flag & NCF_TS;
+	if ((ncp->nc_flag & NCF_DVDROP) != 0)
+		vdrop(ncp->nc_dvp);
 	if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) {
 		if (ts)
 			uma_zfree(cache_zone_small_ts, ncp);
@@ -299,17 +383,49 @@ STATNODE_COUNTER(numfullpathfail2,
     "Number of fullpath search errors (VOP_VPTOCNP failures)");
 STATNODE_COUNTER(numfullpathfail4, "Number of fullpath search errors (ENOMEM)");
 STATNODE_COUNTER(numfullpathfound, "Number of successful fullpath calls");
-static long numupgrades; STATNODE_ULONG(numupgrades,
-    "Number of updates of the cache after lookup (write lock + retry)");
+static long zap_and_exit_bucket_fail; STATNODE_ULONG(zap_and_exit_bucket_fail,
+    "Number of times zap_and_exit failed to lock");
+static long cache_lock_vnodes_cel_3_failures;
+STATNODE_ULONG(cache_lock_vnodes_cel_3_failures,
+    "Number of times 3-way vnode locking failed");
 
-static void cache_zap(struct namecache *ncp);
-static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
-    u_int *buflen);
+static void cache_zap_locked(struct namecache *ncp, bool neg_locked);
 static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
     char *buf, char **retbuf, u_int buflen);
 
 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
 
+static int cache_yield;
+SYSCTL_INT(_vfs_cache, OID_AUTO, yield, CTLFLAG_RD, &cache_yield, 0,
+    "Number of times cache called yield");
+
+static void
+cache_maybe_yield(void)
+{
+
+	if (should_yield()) {
+		cache_yield++;
+		kern_yield(PRI_USER);
+	}
+}
+
+static inline void
+cache_assert_vlp_locked(struct mtx *vlp)
+{
+
+	if (vlp != NULL)
+		mtx_assert(vlp, MA_OWNED);
+}
+
+static inline void
+cache_assert_vnode_locked(struct vnode *vp)
+{
+	struct mtx *vlp;
+
+	vlp = VP2VNODELOCK(vp);
+	cache_assert_vlp_locked(vlp);
+}
+
 static uint32_t
 cache_get_hash(char *name, u_char len, struct vnode *dvp)
 {
@@ -320,6 +436,109 @@ cache_get_hash(char *name, u_char len, struct vnode *dvp)
 	return (hash);
 }
 
+static inline struct rwlock *
+NCP2BUCKETLOCK(struct namecache *ncp)
+{
+	uint32_t hash;
+
+	hash = cache_get_hash(nc_get_name(ncp), ncp->nc_nlen, ncp->nc_dvp);
+	return (HASH2BUCKETLOCK(hash));
+}
+
+#ifdef INVARIANTS
+static void
+cache_assert_bucket_locked(struct namecache *ncp, int mode)
+{
+	struct rwlock *blp;
+
+	blp = NCP2BUCKETLOCK(ncp);
+	rw_assert(blp, mode);
+}
+#else
+#define cache_assert_bucket_locked(x, y) do { } while (0)
+#endif
+
+#define cache_sort(x, y)	_cache_sort((void **)(x), (void **)(y))
+static void
+_cache_sort(void **p1, void **p2)
+{
+	void *tmp;
+
+	if (*p1 > *p2) {
+		tmp = *p2;
+		*p2 = *p1;
+		*p1 = tmp;
+	}
+}
+
+static void
+cache_lock_all_buckets(void)
+{
+	u_int i;
+
+	for (i = 0; i < numbucketlocks; i++)
+		rw_wlock(&bucketlocks[i]);
+}
+
+static void
+cache_unlock_all_buckets(void)
+{
+	u_int i;
+
+	for (i = 0; i < numbucketlocks; i++)
+		rw_wunlock(&bucketlocks[i]);
+}
+
+static void
+cache_lock_all_vnodes(void)
+{
+	u_int i;
+
+	for (i = 0; i < numvnodelocks; i++)
+		mtx_lock(&vnodelocks[i]);
+}
+
+static void
+cache_unlock_all_vnodes(void)
+{
+	u_int i;
+
+	for (i = 0; i < numvnodelocks; i++)
+		mtx_unlock(&vnodelocks[i]);
+}
+
+static int
+cache_trylock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
+{
+
+	cache_sort(&vlp1, &vlp2);
+	MPASS(vlp2 != NULL);
+
+	if (vlp1 != NULL) {
+		if (!mtx_trylock(vlp1))
+			return (EAGAIN);
+	}
+	if (!mtx_trylock(vlp2)) {
+		if (vlp1 != NULL)
+			mtx_unlock(vlp1);
+		return (EAGAIN);
+	}
+
+	return (0);
+}
+
+static void
+cache_unlock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
+{
+
+	MPASS(vlp1 != NULL || vlp2 != NULL);
+
+	if (vlp1 != NULL)
+		mtx_unlock(vlp1);
+	if (vlp2 != NULL)
+		mtx_unlock(vlp2);
+}
+
 static int
 sysctl_nchstats(SYSCTL_HANDLER_ARGS)
 {
@@ -361,9 +580,9 @@ retry:
 	if (req->oldptr == NULL)
 		return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
 	cntbuf = malloc(n_nchash * sizeof(int), M_TEMP, M_ZERO | M_WAITOK);
-	CACHE_RLOCK();
+	cache_lock_all_buckets();
 	if (n_nchash != nchash + 1) {
-		CACHE_RUNLOCK();
+		cache_unlock_all_buckets();
 		free(cntbuf, M_TEMP);
 		goto retry;
 	}
@@ -371,7 +590,7 @@ retry:
 	for (ncpp = nchashtbl, i = 0; i < n_nchash; ncpp++, i++)
 		LIST_FOREACH(ncp, ncpp, nc_hash)
 			cntbuf[i]++;
-	CACHE_RUNLOCK();
+	cache_unlock_all_buckets();
 	for (error = 0, i = 0; i < n_nchash; i++)
 		if ((error = SYSCTL_OUT(req, &cntbuf[i], sizeof(int))) != 0)
 			break;
@@ -394,7 +613,7 @@ sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
 	if (!req->oldptr)
 		return SYSCTL_OUT(req, 0, 4 * sizeof(int));
 
-	CACHE_RLOCK();
+	cache_lock_all_buckets();
 	n_nchash = nchash + 1;	/* nchash is max index, not count */
 	used = 0;
 	maxlength = 0;
@@ -411,7 +630,7 @@ sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
 			maxlength = count;
 	}
 	n_nchash = nchash + 1;
-	CACHE_RUNLOCK();
+	cache_unlock_all_buckets();
 	pct = (used * 100) / (n_nchash / 100);
 	error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
 	if (error)
@@ -433,49 +652,432 @@ SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD|
 #endif
 
 /*
- * cache_zap():
+ * Negative entries management
+ *
+ * A variation of LRU scheme is used. New entries are hashed into one of
+ * numneglists cold lists. Entries get promoted to the hot list on first hit.
+ * Partial LRU for the hot list is maintained by requeueing them every
+ * ncneghitsrequeue hits.
+ *
+ * The shrinker will demote hot list head and evict from the cold list in a
+ * round-robin manner.
+ */
+static void
+cache_negative_hit(struct namecache *ncp)
+{
+	struct neglist *neglist;
+	u_int hits;
+
+	MPASS(ncp->nc_flag & NCF_NEGATIVE);
+	hits = atomic_fetchadd_int(&ncp->nc_neghits, 1);
+	if (ncp->nc_flag & NCF_HOTNEGATIVE) {
+		if ((hits % ncneghitsrequeue) != 0)
+			return;
+		mtx_lock(&ncneg_hot.nl_lock);
+		if (ncp->nc_flag & NCF_HOTNEGATIVE) {
+			TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
+			TAILQ_INSERT_TAIL(&ncneg_hot.nl_list, ncp, nc_dst);
+			mtx_unlock(&ncneg_hot.nl_lock);
+			return;
+		}
+		/*
+		 * The shrinker cleared the flag and removed the entry from
+		 * the hot list. Put it back.
+		 */
+	} else {
+		mtx_lock(&ncneg_hot.nl_lock);
+	}
+	neglist = NCP2NEGLIST(ncp);
+	mtx_lock(&neglist->nl_lock);
+	if (!(ncp->nc_flag & NCF_HOTNEGATIVE)) {
+		TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst);
+		TAILQ_INSERT_TAIL(&ncneg_hot.nl_list, ncp, nc_dst);
+		ncp->nc_flag |= NCF_HOTNEGATIVE;
+	}
+	mtx_unlock(&neglist->nl_lock);
+	mtx_unlock(&ncneg_hot.nl_lock);
+}
+
+static void
+cache_negative_insert(struct namecache *ncp, bool neg_locked)
+{
+	struct neglist *neglist;
+
+	MPASS(ncp->nc_flag & NCF_NEGATIVE);
+	cache_assert_bucket_locked(ncp, RA_WLOCKED);
+	neglist = NCP2NEGLIST(ncp);
+	if (!neg_locked) {
+		mtx_lock(&neglist->nl_lock);
+	} else {
+		mtx_assert(&neglist->nl_lock, MA_OWNED);
+	}
+	TAILQ_INSERT_TAIL(&neglist->nl_list, ncp, nc_dst);
+	if (!neg_locked)
+		mtx_unlock(&neglist->nl_lock);
+	atomic_add_rel_long(&numneg, 1);
+}
+
+static void
+cache_negative_remove(struct namecache *ncp, bool neg_locked)
+{
+	struct neglist *neglist;
+	bool hot_locked = false;
+	bool list_locked = false;
+
+	MPASS(ncp->nc_flag & NCF_NEGATIVE);
+	cache_assert_bucket_locked(ncp, RA_WLOCKED);
+	neglist = NCP2NEGLIST(ncp);
+	if (!neg_locked) {
+		if (ncp->nc_flag & NCF_HOTNEGATIVE) {
+			hot_locked = true;
+			mtx_lock(&ncneg_hot.nl_lock);
+			if (!(ncp->nc_flag & NCF_HOTNEGATIVE)) {
+				list_locked = true;
+				mtx_lock(&neglist->nl_lock);
+			}
+		} else {
+			list_locked = true;
+			mtx_lock(&neglist->nl_lock);
+		}
+	} else {
+		mtx_assert(&neglist->nl_lock, MA_OWNED);
+		mtx_assert(&ncneg_hot.nl_lock, MA_OWNED);
+	}
+	if (ncp->nc_flag & NCF_HOTNEGATIVE) {
+		TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
+	} else {
+		TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst);
+	}
+	if (list_locked)
+		mtx_unlock(&neglist->nl_lock);
+	if (hot_locked)
+		mtx_unlock(&ncneg_hot.nl_lock);
+	atomic_subtract_rel_long(&numneg, 1);
+}
+
+static void
+cache_negative_shrink_select(int start, struct namecache **ncpp,
+    struct neglist **neglistpp)
+{
+	struct neglist *neglist;
+	struct namecache *ncp;
+	int i;
+
+	*ncpp = ncp = NULL;
+
+	for (i = start; i < numneglists; i++) {
+		neglist = &neglists[i];
+		if (TAILQ_FIRST(&neglist->nl_list) == NULL)
+			continue;
+		mtx_lock(&neglist->nl_lock);
+		ncp = TAILQ_FIRST(&neglist->nl_list);
+		if (ncp != NULL)
+			break;
+		mtx_unlock(&neglist->nl_lock);
+	}
+
+	*neglistpp = neglist;
+	*ncpp = ncp;
+}
+
+static void
+cache_negative_zap_one(void)
+{
+	struct namecache *ncp, *ncp2;
+	struct neglist *neglist;
+	struct mtx *dvlp;
+	struct rwlock *blp;
+
+	if (!mtx_trylock(&ncneg_shrink_lock))
+		return;
+
+	mtx_lock(&ncneg_hot.nl_lock);
+	ncp = TAILQ_FIRST(&ncneg_hot.nl_list);
+	if (ncp != NULL) {
+		neglist = NCP2NEGLIST(ncp);
+		mtx_lock(&neglist->nl_lock);
+		TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
+		TAILQ_INSERT_TAIL(&neglist->nl_list, ncp, nc_dst);
+		ncp->nc_flag &= ~NCF_HOTNEGATIVE;
+		mtx_unlock(&neglist->nl_lock);
+	}
+
+	cache_negative_shrink_select(shrink_list_turn, &ncp, &neglist);
+	shrink_list_turn++;
+	if (shrink_list_turn == numneglists)
+		shrink_list_turn = 0;
+	if (ncp == NULL && shrink_list_turn == 0)
+		cache_negative_shrink_select(shrink_list_turn, &ncp, &neglist);
+	if (ncp == NULL) {
+		mtx_unlock(&ncneg_hot.nl_lock);
+		goto out;
+	}
+
+	MPASS(ncp->nc_flag & NCF_NEGATIVE);
+	dvlp = VP2VNODELOCK(ncp->nc_dvp);
+	blp = NCP2BUCKETLOCK(ncp);
+	mtx_unlock(&neglist->nl_lock);
+	mtx_unlock(&ncneg_hot.nl_lock);
+	mtx_lock(dvlp);
+	rw_wlock(blp);
+	mtx_lock(&ncneg_hot.nl_lock);
+	mtx_lock(&neglist->nl_lock);
+	ncp2 = TAILQ_FIRST(&neglist->nl_list);
+	if (ncp != ncp2 || dvlp != VP2VNODELOCK(ncp2->nc_dvp) ||
+	    blp != NCP2BUCKETLOCK(ncp2) || !(ncp2->nc_flag & NCF_NEGATIVE)) {
+		ncp = NULL;
+		goto out_unlock_all;
+	}
+	SDT_PROBE3(vfs, namecache, shrink_negative, done, ncp->nc_dvp,
+	    nc_get_name(ncp), ncp->nc_neghits);
+
+	cache_zap_locked(ncp, true);
+out_unlock_all:
+	mtx_unlock(&neglist->nl_lock);
+	mtx_unlock(&ncneg_hot.nl_lock);
+	rw_wunlock(blp);
+	mtx_unlock(dvlp);
+out:
+	mtx_unlock(&ncneg_shrink_lock);
+	cache_free(ncp);
+}
+
+/*
+ * cache_zap_locked():
  *
  *   Removes a namecache entry from cache, whether it contains an actual
  *   pointer to a vnode or if it is just a negative cache entry.
  */
 static void
-cache_zap(struct namecache *ncp)
+cache_zap_locked(struct namecache *ncp, bool neg_locked)
 {
-	struct vnode *vp;
 
-	rw_assert(&cache_lock, RA_WLOCKED);
-	CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp);
-	if (ncp->nc_vp != NULL) {
+	if (!(ncp->nc_flag & NCF_NEGATIVE))
+		cache_assert_vnode_locked(ncp->nc_vp);
+	cache_assert_vnode_locked(ncp->nc_dvp);
+	cache_assert_bucket_locked(ncp, RA_WLOCKED);
+
+	CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp,
+	    (ncp->nc_flag & NCF_NEGATIVE) ? NULL : ncp->nc_vp);
+	if (!(ncp->nc_flag & NCF_NEGATIVE)) {
 		SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp,
 		    nc_get_name(ncp), ncp->nc_vp);
 	} else {
-		SDT_PROBE2(vfs, namecache, zap_negative, done, ncp->nc_dvp,
-		    nc_get_name(ncp));
+		SDT_PROBE3(vfs, namecache, zap_negative, done, ncp->nc_dvp,
+		    nc_get_name(ncp), ncp->nc_neghits);
 	}
-	vp = NULL;
 	LIST_REMOVE(ncp, nc_hash);
+	if (!(ncp->nc_flag & NCF_NEGATIVE)) {
+		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
+		if (ncp == ncp->nc_vp->v_cache_dd)
+			ncp->nc_vp->v_cache_dd = NULL;
+	} else {
+		cache_negative_remove(ncp, neg_locked);
+	}
 	if (ncp->nc_flag & NCF_ISDOTDOT) {
 		if (ncp == ncp->nc_dvp->v_cache_dd)
 			ncp->nc_dvp->v_cache_dd = NULL;
 	} else {
 		LIST_REMOVE(ncp, nc_src);
 		if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
-			vp = ncp->nc_dvp;
-			numcachehv--;
+			ncp->nc_flag |= NCF_DVDROP;
+			atomic_subtract_rel_long(&numcachehv, 1);
 		}
 	}
-	if (ncp->nc_vp) {
-		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
-		if (ncp == ncp->nc_vp->v_cache_dd)
-			ncp->nc_vp->v_cache_dd = NULL;
+	atomic_subtract_rel_long(&numcache, 1);
+}
+
+static void
+cache_zap_negative_locked_vnode_kl(struct namecache *ncp, struct vnode *vp)
+{
+	struct rwlock *blp;
+
+	MPASS(ncp->nc_dvp == vp);
+	MPASS(ncp->nc_flag & NCF_NEGATIVE);
+	cache_assert_vnode_locked(vp);
+
+	blp = NCP2BUCKETLOCK(ncp);
+	rw_wlock(blp);
+	cache_zap_locked(ncp, false);
+	rw_wunlock(blp);
+}
+
+static bool
+cache_zap_locked_vnode_kl2(struct namecache *ncp, struct vnode *vp,
+    struct mtx **vlpp)
+{
+	struct mtx *pvlp, *vlp1, *vlp2, *to_unlock;
+	struct rwlock *blp;
+
+	MPASS(vp == ncp->nc_dvp || vp == ncp->nc_vp);
+	cache_assert_vnode_locked(vp);
+
+	if (ncp->nc_flag & NCF_NEGATIVE) {
+		if (*vlpp != NULL) {
+			mtx_unlock(*vlpp);
+			*vlpp = NULL;
+		}
+		cache_zap_negative_locked_vnode_kl(ncp, vp);
+		return (true);
+	}
+
+	pvlp = VP2VNODELOCK(vp);
+	blp = NCP2BUCKETLOCK(ncp);
+	vlp1 = VP2VNODELOCK(ncp->nc_dvp);
+	vlp2 = VP2VNODELOCK(ncp->nc_vp);
+
+	if (*vlpp == vlp1 || *vlpp == vlp2) {
+		to_unlock = *vlpp;
+		*vlpp = NULL;
 	} else {
-		TAILQ_REMOVE(&ncneg, ncp, nc_dst);
-		numneg--;
+		if (*vlpp != NULL) {
+			mtx_unlock(*vlpp);
+			*vlpp = NULL;
+		}
+		cache_sort(&vlp1, &vlp2);
+		if (vlp1 == pvlp) {
+			mtx_lock(vlp2);
+			to_unlock = vlp2;
+		} else {
+			if (!mtx_trylock(vlp1))
+				goto out_relock;
+			to_unlock = vlp1;
+		}
+	}
+	rw_wlock(blp);
+	cache_zap_locked(ncp, false);
+	rw_wunlock(blp);
+	if (to_unlock != NULL)
+		mtx_unlock(to_unlock);
+	return (true);
+
+out_relock:
+	mtx_unlock(vlp2);
+	mtx_lock(vlp1);
+	mtx_lock(vlp2);
+	MPASS(*vlpp == NULL);
+	*vlpp = vlp1;
+	return (false);
+}
+
+static int
+cache_zap_locked_vnode(struct namecache *ncp, struct vnode *vp)
+{
+	struct mtx *pvlp, *vlp1, *vlp2, *to_unlock;
+	struct rwlock *blp;
+	int error = 0;
+
+	MPASS(vp == ncp->nc_dvp || vp == ncp->nc_vp);
+	cache_assert_vnode_locked(vp);
+
+	pvlp = VP2VNODELOCK(vp);
+	if (ncp->nc_flag & NCF_NEGATIVE) {
+		cache_zap_negative_locked_vnode_kl(ncp, vp);
+		goto out;
+	}
+
+	blp = NCP2BUCKETLOCK(ncp);
+	vlp1 = VP2VNODELOCK(ncp->nc_dvp);
+	vlp2 = VP2VNODELOCK(ncp->nc_vp);
+	cache_sort(&vlp1, &vlp2);
+	if (vlp1 == pvlp) {
+		mtx_lock(vlp2);
+		to_unlock = vlp2;
+	} else {
+		if (!mtx_trylock(vlp1)) {
+			error = EAGAIN;
+			goto out;
+		}
+		to_unlock = vlp1;
+	}
+	rw_wlock(blp);
+	cache_zap_locked(ncp, false);
+	rw_wunlock(blp);
+	mtx_unlock(to_unlock);
+out:
+	mtx_unlock(pvlp);
+	return (error);
+}
+
+static int
+cache_zap_rlocked_bucket(struct namecache *ncp, struct rwlock *blp)
+{
+	struct mtx *dvlp, *vlp;
+
+	cache_assert_bucket_locked(ncp, RA_RLOCKED);
+
+	dvlp = VP2VNODELOCK(ncp->nc_dvp);
+	vlp = NULL;
+	if (!(ncp->nc_flag & NCF_NEGATIVE))
+		vlp = VP2VNODELOCK(ncp->nc_vp);
+	if (cache_trylock_vnodes(dvlp, vlp) == 0) {
+		rw_runlock(blp);
+		rw_wlock(blp);
+		cache_zap_locked(ncp, false);
+		rw_wunlock(blp);
+		cache_unlock_vnodes(dvlp, vlp);
+		return (0);
+	}
+
+	rw_runlock(blp);
+	return (EAGAIN);
+}
+
+static int
+cache_zap_wlocked_bucket_kl(struct namecache *ncp, struct rwlock *blp,
+    struct mtx **vlpp1, struct mtx **vlpp2)
+{
+	struct mtx *dvlp, *vlp;
+
+	cache_assert_bucket_locked(ncp, RA_WLOCKED);
+
+	dvlp = VP2VNODELOCK(ncp->nc_dvp);
+	vlp = NULL;
+	if (!(ncp->nc_flag & NCF_NEGATIVE))
+		vlp = VP2VNODELOCK(ncp->nc_vp);
+	cache_sort(&dvlp, &vlp);
+
+	if (*vlpp1 == dvlp && *vlpp2 == vlp) {
+		cache_zap_locked(ncp, false);
+		cache_unlock_vnodes(dvlp, vlp);
+		*vlpp1 = NULL;
+		*vlpp2 = NULL;
+		return (0);
+	}
+
+	if (*vlpp1 != NULL)
+		mtx_unlock(*vlpp1);
+	if (*vlpp2 != NULL)
+		mtx_unlock(*vlpp2);
+	*vlpp1 = NULL;
+	*vlpp2 = NULL;
+
+	if (cache_trylock_vnodes(dvlp, vlp) == 0) {
+		cache_zap_locked(ncp, false);
+		cache_unlock_vnodes(dvlp, vlp);
+		return (0);
+	}
+
+	rw_wunlock(blp);
+	*vlpp1 = dvlp;
+	*vlpp2 = vlp;
+	if (*vlpp1 != NULL)
+		mtx_lock(*vlpp1);
+	mtx_lock(*vlpp2);
+	rw_wlock(blp);
+	return (EAGAIN);
+}
+
+static void
+cache_lookup_unlock(struct rwlock *blp, struct mtx *vlp)
+{
+
+	if (blp != NULL) {
+		rw_runlock(blp);
+		mtx_assert(vlp, MA_NOTOWNED);
+	} else {
+		mtx_unlock(vlp);
 	}
-	numcache--;
-	cache_free(ncp);
-	if (vp != NULL)
-		vdrop(vp);
 }
 
 /*
@@ -500,19 +1102,21 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
     struct timespec *tsp, int *ticksp)
 {
 	struct namecache *ncp;
+	struct rwlock *blp;
+	struct mtx *dvlp, *dvlp2;
 	uint32_t hash;
-	int error, ltype, wlocked;
+	int error, ltype;
 
 	if (!doingcache) {
 		cnp->cn_flags &= ~MAKEENTRY;
 		return (0);
 	}
 retry:
-	wlocked = 0;
-	counter_u64_add(numcalls, 1);
+	blp = NULL;
+	dvlp = VP2VNODELOCK(dvp);
 	error = 0;
+	counter_u64_add(numcalls, 1);
 
-retry_wlocked:
 	if (cnp->cn_nameptr[0] == '.') {
 		if (cnp->cn_namelen == 1) {
 			*vpp = dvp;
@@ -544,28 +1148,44 @@ retry_wlocked:
 			}
 			return (-1);
 		}
-		if (!wlocked)
-			CACHE_RLOCK();
 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
 			counter_u64_add(dotdothits, 1);
-			if (dvp->v_cache_dd == NULL) {
+			dvlp2 = NULL;
+			mtx_lock(dvlp);
+retry_dotdot:
+			ncp = dvp->v_cache_dd;
+			if (ncp == NULL) {
 				SDT_PROBE3(vfs, namecache, lookup, miss, dvp,
 				    "..", NULL);
-				goto unlock;
+				mtx_unlock(dvlp);
+				return (0);
 			}
 			if ((cnp->cn_flags & MAKEENTRY) == 0) {
-				if (!wlocked && !CACHE_UPGRADE_LOCK())
-					goto wlock;
-				if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT)
-					cache_zap(dvp->v_cache_dd);
-				dvp->v_cache_dd = NULL;
-				CACHE_WUNLOCK();
+				if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) {
+					if (ncp->nc_dvp != dvp)
+						panic("dvp %p v_cache_dd %p\n", dvp, ncp);
+					if (!cache_zap_locked_vnode_kl2(ncp,
+					    dvp, &dvlp2))
+						goto retry_dotdot;
+					MPASS(dvp->v_cache_dd == NULL);
+					mtx_unlock(dvlp);
+					if (dvlp2 != NULL)
+						mtx_unlock(dvlp2);
+					cache_free(ncp);
+				} else {
+					dvp->v_cache_dd = NULL;
+					mtx_unlock(dvlp);
+					if (dvlp2 != NULL)
+						mtx_unlock(dvlp2);
+				}
 				return (0);
 			}
-			ncp = dvp->v_cache_dd;
-			if (ncp->nc_flag & NCF_ISDOTDOT)
-				*vpp = ncp->nc_vp;
-			else
+			if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) {
+				if (ncp->nc_flag & NCF_NEGATIVE)
+					*vpp = NULL;
+				else
+					*vpp = ncp->nc_vp;
+			} else
 				*vpp = ncp->nc_dvp;
 			/* Return failure if negative entry was found. */
 			if (*vpp == NULL)
@@ -581,10 +1201,12 @@ retry_wlocked:
 				    nc_dotdottime;
 			goto success;
 		}
-	} else if (!wlocked)
-		CACHE_RLOCK();
+	}
 
 	hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
+	blp = HASH2BUCKETLOCK(hash);
+	rw_rlock(blp);
+
 	LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
 		counter_u64_add(numchecks, 1);
 		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
@@ -607,15 +1229,11 @@ retry_wlocked:
 	/* We don't want to have an entry, so dump it */
 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
 		counter_u64_add(numposzaps, 1);
-		if (!wlocked && !CACHE_UPGRADE_LOCK())
-			goto wlock;
-		cache_zap(ncp);
-		CACHE_WUNLOCK();
-		return (0);
+		goto zap_and_exit;
 	}
 
 	/* We found a "positive" match, return the vnode */
-	if (ncp->nc_vp) {
+	if (!(ncp->nc_flag & NCF_NEGATIVE)) {
 		counter_u64_add(numposhits, 1);
 		*vpp = ncp->nc_vp;
 		CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
@@ -630,43 +1248,19 @@ negative_success:
 	/* We found a negative match, and want to create it, so purge */
 	if (cnp->cn_nameiop == CREATE) {
 		counter_u64_add(numnegzaps, 1);
-		if (!wlocked && !CACHE_UPGRADE_LOCK())
-			goto wlock;
-		cache_zap(ncp);
-		CACHE_WUNLOCK();
-		return (0);
+		goto zap_and_exit;
 	}
 
-	if (!wlocked && !CACHE_UPGRADE_LOCK())
-		goto wlock;
 	counter_u64_add(numneghits, 1);
-	/*
-	 * We found a "negative" match, so we shift it to the end of
-	 * the "negative" cache entries queue to satisfy LRU.  Also,
-	 * check to see if the entry is a whiteout; indicate this to
-	 * the componentname, if so.
-	 */
-	TAILQ_REMOVE(&ncneg, ncp, nc_dst);
-	TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
+	cache_negative_hit(ncp);
 	if (ncp->nc_flag & NCF_WHITE)
 		cnp->cn_flags |= ISWHITEOUT;
 	SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
 	    nc_get_name(ncp));
 	cache_out_ts(ncp, tsp, ticksp);
-	CACHE_WUNLOCK();
+	cache_lookup_unlock(blp, dvlp);
 	return (ENOENT);
 
-wlock:
-	/*
-	 * We need to update the cache after our lookup, so upgrade to
-	 * a write lock and retry the operation.
-	 */
-	CACHE_RUNLOCK();
-	CACHE_WLOCK();
-	numupgrades++;
-	wlocked = 1;
-	goto retry_wlocked;
-
 success:
 	/*
 	 * On success we return a locked and ref'd vnode as per the lookup
@@ -679,10 +1273,7 @@ success:
 		VOP_UNLOCK(dvp, 0);
 	}
 	vhold(*vpp);
-	if (wlocked)
-		CACHE_WUNLOCK();
-	else
-		CACHE_RUNLOCK();
+	cache_lookup_unlock(blp, dvlp);
 	error = vget(*vpp, cnp->cn_lkflags | LK_VNHELD, cnp->cn_thread);
 	if (cnp->cn_flags & ISDOTDOT) {
 		vn_lock(dvp, ltype | LK_RETRY);
@@ -704,13 +1295,235 @@ success:
 	return (-1);
 
 unlock:
-	if (wlocked)
-		CACHE_WUNLOCK();
+	cache_lookup_unlock(blp, dvlp);
+	return (0);
+
+zap_and_exit:
+	if (blp != NULL)
+		error = cache_zap_rlocked_bucket(ncp, blp);
 	else
-		CACHE_RUNLOCK();
+		error = cache_zap_locked_vnode(ncp, dvp);
+	if (error != 0) {
+		zap_and_exit_bucket_fail++;
+		cache_maybe_yield();
+		goto retry;
+	}
+	cache_free(ncp);
 	return (0);
 }
 
+struct celockstate {
+	struct mtx *vlp[3];
+	struct rwlock *blp[2];
+};
+CTASSERT((nitems(((struct celockstate *)0)->vlp) == 3));
+CTASSERT((nitems(((struct celockstate *)0)->blp) == 2));
+
+static inline void
+cache_celockstate_init(struct celockstate *cel)
+{
+
+	bzero(cel, sizeof(*cel));
+}
+
+static void
+cache_lock_vnodes_cel(struct celockstate *cel, struct vnode *vp,
+    struct vnode *dvp)
+{
+	struct mtx *vlp1, *vlp2;
+
+	MPASS(cel->vlp[0] == NULL);
+	MPASS(cel->vlp[1] == NULL);
+	MPASS(cel->vlp[2] == NULL);
+
+	MPASS(vp != NULL || dvp != NULL);
+
+	vlp1 = VP2VNODELOCK(vp);
+	vlp2 = VP2VNODELOCK(dvp);
+	cache_sort(&vlp1, &vlp2);
+
+	if (vlp1 != NULL) {
+		mtx_lock(vlp1);
+		cel->vlp[0] = vlp1;
+	}
+	mtx_lock(vlp2);
+	cel->vlp[1] = vlp2;
+}
+
+static void
+cache_unlock_vnodes_cel(struct celockstate *cel)
+{
+
+	MPASS(cel->vlp[0] != NULL || cel->vlp[1] != NULL);
+
+	if (cel->vlp[0] != NULL)
+		mtx_unlock(cel->vlp[0]);
+	if (cel->vlp[1] != NULL)
+		mtx_unlock(cel->vlp[1]);
+	if (cel->vlp[2] != NULL)
+		mtx_unlock(cel->vlp[2]);
+}
+
+static bool
+cache_lock_vnodes_cel_3(struct celockstate *cel, struct vnode *vp)
+{
+	struct mtx *vlp;
+	bool ret;
+
+	cache_assert_vlp_locked(cel->vlp[0]);
+	cache_assert_vlp_locked(cel->vlp[1]);
+	MPASS(cel->vlp[2] == NULL);
+
+	vlp = VP2VNODELOCK(vp);
+	MPASS(vlp != NULL);
+
+	ret = true;
+	if (vlp >= cel->vlp[1]) {
+		mtx_lock(vlp);
+	} else {
+		if (mtx_trylock(vlp))
+			goto out;
+		cache_lock_vnodes_cel_3_failures++;
+		cache_unlock_vnodes_cel(cel);
+		if (vlp < cel->vlp[0]) {
+			mtx_lock(vlp);
+			mtx_lock(cel->vlp[0]);
+			mtx_lock(cel->vlp[1]);
+		} else {
+			if (cel->vlp[0] != NULL)
+				mtx_lock(cel->vlp[0]);
+			mtx_lock(vlp);
+			mtx_lock(cel->vlp[1]);
+		}
+		ret = false;
+	}
+out:
+	cel->vlp[2] = vlp;
+	return (ret);
+}
+
+static void
+cache_lock_buckets_cel(struct celockstate *cel, struct rwlock *blp1,
+    struct rwlock *blp2)
+{
+
+	MPASS(cel->blp[0] == NULL);
+	MPASS(cel->blp[1] == NULL);
+
+	cache_sort(&blp1, &blp2);
+
+	if (blp1 != NULL) {
+		rw_wlock(blp1);
+		cel->blp[0] = blp1;
+	}
+	rw_wlock(blp2);
+	cel->blp[1] = blp2;
+}
+
+static void
+cache_unlock_buckets_cel(struct celockstate *cel)
+{
+
+	if (cel->blp[0] != NULL)
+		rw_wunlock(cel->blp[0]);
+	rw_wunlock(cel->blp[1]);
+}
+
+/*
+ * Lock part of the cache affected by the insertion.
+ *
+ * This means vnodelocks for dvp, vp and the relevant bucketlock.
+ * However, insertion can result in removal of an old entry. In this
+ * case we have an additional vnode and bucketlock pair to lock. If the
+ * entry is negative, ncelock is locked instead of the vnode.
+ *
+ * That is, in the worst case we have to lock 3 vnodes and 2 bucketlocks, while
+ * preserving the locking order (smaller address first).
+ */
+static void
+cache_enter_lock(struct celockstate *cel, struct vnode *dvp, struct vnode *vp,
+    uint32_t hash)
+{
+	struct namecache *ncp;
+	struct rwlock *blps[2];
+
+	blps[0] = HASH2BUCKETLOCK(hash);
+	for (;;) {
+		blps[1] = NULL;
+		cache_lock_vnodes_cel(cel, dvp, vp);
+		if (vp == NULL || vp->v_type != VDIR)
+			break;
+		ncp = vp->v_cache_dd;
+		if (ncp == NULL)
+			break;
+		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
+			break;
+		MPASS(ncp->nc_dvp == vp);
+		blps[1] = NCP2BUCKETLOCK(ncp);
+		if (ncp->nc_flag & NCF_NEGATIVE)
+			break;
+		if (cache_lock_vnodes_cel_3(cel, ncp->nc_vp))
+			break;
+		/*
+		 * All vnodes got re-locked. Re-validate the state and if
+		 * nothing changed we are done. Otherwise restart.
+		 */
+		if (ncp == vp->v_cache_dd &&
+		    (ncp->nc_flag & NCF_ISDOTDOT) != 0 &&
+		    blps[1] == NCP2BUCKETLOCK(ncp) &&
+		    VP2VNODELOCK(ncp->nc_vp) == cel->vlp[2])
+			break;
+		cache_unlock_vnodes_cel(cel);
+		cel->vlp[0] = NULL;
+		cel->vlp[1] = NULL;
+		cel->vlp[2] = NULL;
+	}
+	cache_lock_buckets_cel(cel, blps[0], blps[1]);
+}
+
+static void
+cache_enter_lock_dd(struct celockstate *cel, struct vnode *dvp, struct vnode *vp,
+    uint32_t hash)
+{
+	struct namecache *ncp;
+	struct rwlock *blps[2];
+
+	blps[0] = HASH2BUCKETLOCK(hash);
+	for (;;) {
+		blps[1] = NULL;
+		cache_lock_vnodes_cel(cel, dvp, vp);
+		ncp = dvp->v_cache_dd;
+		if (ncp == NULL)
+			break;
+		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
+			break;
+		MPASS(ncp->nc_dvp == dvp);
+		blps[1] = NCP2BUCKETLOCK(ncp);
+		if (ncp->nc_flag & NCF_NEGATIVE)
+			break;
+		if (cache_lock_vnodes_cel_3(cel, ncp->nc_vp))
+			break;
+		if (ncp == dvp->v_cache_dd &&
+		    (ncp->nc_flag & NCF_ISDOTDOT) != 0 &&
+		    blps[1] == NCP2BUCKETLOCK(ncp) &&
+		    VP2VNODELOCK(ncp->nc_vp) == cel->vlp[2])
+			break;
+		cache_unlock_vnodes_cel(cel);
+		cel->vlp[0] = NULL;
+		cel->vlp[1] = NULL;
+		cel->vlp[2] = NULL;
+	}
+	cache_lock_buckets_cel(cel, blps[0], blps[1]);
+}
+
+static void
+cache_enter_unlock(struct celockstate *cel)
+{
+
+	cache_unlock_buckets_cel(cel);
+	cache_unlock_vnodes_cel(cel);
+}
+
 /*
  * Add an entry to the cache.
  */
@@ -718,12 +1531,15 @@ void
 cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
     struct timespec *tsp, struct timespec *dtsp)
 {
-	struct namecache *ncp, *n2;
+	struct celockstate cel;
+	struct namecache *ncp, *n2, *ndd;
 	struct namecache_ts *n3;
 	struct nchashhead *ncpp;
+	struct neglist *neglist;
 	uint32_t hash;
 	int flag;
 	int len;
+	bool neg_locked;
 
 	CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
 	VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
@@ -740,12 +1556,16 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 	if (numcache >= desiredvnodes * ncsizefactor)
 		return;
 
+	cache_celockstate_init(&cel);
+	ndd = NULL;
 	flag = 0;
 	if (cnp->cn_nameptr[0] == '.') {
 		if (cnp->cn_namelen == 1)
 			return;
 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
-			CACHE_WLOCK();
+			len = cnp->cn_namelen;
+			hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
+			cache_enter_lock_dd(&cel, dvp, vp, hash);
 			/*
 			 * If dotdot entry already exists, just retarget it
 			 * to new parent vnode, otherwise continue with new
@@ -755,27 +1575,40 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 			    ncp->nc_flag & NCF_ISDOTDOT) {
 				KASSERT(ncp->nc_dvp == dvp,
 				    ("wrong isdotdot parent"));
-				if (ncp->nc_vp != NULL) {
+				neg_locked = false;
+				if (ncp->nc_flag & NCF_NEGATIVE || vp == NULL) {
+					neglist = NCP2NEGLIST(ncp);
+					mtx_lock(&ncneg_hot.nl_lock);
+					mtx_lock(&neglist->nl_lock);
+					neg_locked = true;
+				}
+				if (!(ncp->nc_flag & NCF_NEGATIVE)) {
 					TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst,
 					    ncp, nc_dst);
 				} else {
-					TAILQ_REMOVE(&ncneg, ncp, nc_dst);
-					numneg--;
+					cache_negative_remove(ncp, true);
 				}
 				if (vp != NULL) {
 					TAILQ_INSERT_HEAD(&vp->v_cache_dst,
 					    ncp, nc_dst);
+					ncp->nc_flag &= ~(NCF_NEGATIVE|NCF_HOTNEGATIVE);
 				} else {
-					TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
-					numneg++;
+					ncp->nc_flag &= ~(NCF_HOTNEGATIVE);
+					ncp->nc_flag |= NCF_NEGATIVE;
+					cache_negative_insert(ncp, true);
+				}
+				if (neg_locked) {
+					mtx_unlock(&neglist->nl_lock);
+					mtx_unlock(&ncneg_hot.nl_lock);
 				}
 				ncp->nc_vp = vp;
-				CACHE_WUNLOCK();
+				cache_enter_unlock(&cel);
 				return;
 			}
 			dvp->v_cache_dd = NULL;
+			cache_enter_unlock(&cel);
+			cache_celockstate_init(&cel);
 			SDT_PROBE3(vfs, namecache, enter, done, dvp, "..", vp);
-			CACHE_WUNLOCK();
 			flag = NCF_ISDOTDOT;
 		}
 	}
@@ -785,9 +1618,11 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 	 * namecache entry as possible before acquiring the lock.
 	 */
 	ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);
+	ncp->nc_flag = flag;
 	ncp->nc_vp = vp;
+	if (vp == NULL)
+		ncp->nc_flag |= NCF_NEGATIVE;
 	ncp->nc_dvp = dvp;
-	ncp->nc_flag = flag;
 	if (tsp != NULL) {
 		n3 = (struct namecache_ts *)ncp;
 		n3->nc_time = *tsp;
@@ -801,7 +1636,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 	len = ncp->nc_nlen = cnp->cn_namelen;
 	hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
 	strlcpy(nc_get_name(ncp), cnp->cn_nameptr, len + 1);
-	CACHE_WLOCK();
+	cache_enter_lock(&cel, dvp, vp, hash);
 
 	/*
 	 * See if this vnode or negative entry is already in the cache
@@ -825,12 +1660,14 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 					n3->nc_dotdottime =
 					    ((struct namecache_ts *)ncp)->
 					    nc_dotdottime;
+					if (ncp->nc_flag & NCF_NEGATIVE)
+						mtx_lock(&ncneg_hot.nl_lock);
 					n3->nc_flag |= NCF_DTS;
+					if (ncp->nc_flag & NCF_NEGATIVE)
+						mtx_unlock(&ncneg_hot.nl_lock);
 				}
 			}
-			CACHE_WUNLOCK();
-			cache_free(ncp);
-			return;
+			goto out_unlock_free;
 		}
 	}
 
@@ -839,17 +1676,14 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 		 * See if we are trying to add .. entry, but some other lookup
 		 * has populated v_cache_dd pointer already.
 		 */
-		if (dvp->v_cache_dd != NULL) {
-			CACHE_WUNLOCK();
-			cache_free(ncp);
-			return;
-		}
+		if (dvp->v_cache_dd != NULL)
+			goto out_unlock_free;
 		KASSERT(vp == NULL || vp->v_type == VDIR,
 		    ("wrong vnode type %p", vp));
 		dvp->v_cache_dd = ncp;
 	}
 
-	numcache++;
+	atomic_add_rel_long(&numcache, 1);
 	if (vp != NULL) {
 		if (vp->v_type == VDIR) {
 			if (flag != NCF_ISDOTDOT) {
@@ -858,9 +1692,12 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 				 * directory name in it and the name ".." for the
 				 * directory's parent.
 				 */
-				if ((n2 = vp->v_cache_dd) != NULL &&
-				    (n2->nc_flag & NCF_ISDOTDOT) != 0)
-					cache_zap(n2);
+				if ((ndd = vp->v_cache_dd) != NULL) {
+					if ((ndd->nc_flag & NCF_ISDOTDOT) != 0)
+						cache_zap_locked(ndd, false);
+					else
+						ndd = NULL;
+				}
 				vp->v_cache_dd = ncp;
 			}
 		} else {
@@ -868,20 +1705,21 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 		}
 	}
 
-	/*
-	 * Insert the new namecache entry into the appropriate chain
-	 * within the cache entries table.
-	 */
-	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
 	if (flag != NCF_ISDOTDOT) {
 		if (LIST_EMPTY(&dvp->v_cache_src)) {
 			vhold(dvp);
-			numcachehv++;
+			atomic_add_rel_long(&numcachehv, 1);
 		}
 		LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
 	}
 
 	/*
+	 * Insert the new namecache entry into the appropriate chain
+	 * within the cache entries table.
+	 */
+	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
+
+	/*
 	 * If the entry is "negative", we place it into the
 	 * "negative" cache queue, otherwise, we place it into the
 	 * destination vnode's cache entries queue.
@@ -893,18 +1731,30 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 	} else {
 		if (cnp->cn_flags & ISWHITEOUT)
 			ncp->nc_flag |= NCF_WHITE;
-		TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
-		numneg++;
+		cache_negative_insert(ncp, false);
 		SDT_PROBE2(vfs, namecache, enter_negative, done, dvp,
 		    nc_get_name(ncp));
 	}
-	if (numneg * ncnegfactor > numcache) {
-		ncp = TAILQ_FIRST(&ncneg);
-		KASSERT(ncp->nc_vp == NULL, ("ncp %p vp %p on ncneg",
-		    ncp, ncp->nc_vp));
-		cache_zap(ncp);
-	}
-	CACHE_WUNLOCK();
+	cache_enter_unlock(&cel);
+	if (numneg * ncnegfactor > numcache)
+		cache_negative_zap_one();
+	cache_free(ndd);
+	return;
+out_unlock_free:
+	cache_enter_unlock(&cel);
+	cache_free(ncp);
+	return;
+}
+
+static u_int
+cache_roundup_2(u_int val)
+{
+	u_int res;
+
+	for (res = 1; res <= val; res <<= 1)
+		continue;
+
+	return (res);
 }
 
 /*
@@ -913,8 +1763,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 static void
 nchinit(void *dummy __unused)
 {
-
-	TAILQ_INIT(&ncneg);
+	u_int i;
 
 	cache_zone_small = uma_zcreate("S VFS Cache",
 	    sizeof(struct namecache) + CACHE_PATH_CUTOFF + 1,
@@ -930,6 +1779,29 @@ nchinit(void *dummy __unused)
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 
 	nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
+	numbucketlocks = cache_roundup_2(mp_ncpus * 64);
+	if (numbucketlocks > nchash + 1)
+		numbucketlocks = nchash + 1;
+	bucketlocks = malloc(sizeof(*bucketlocks) * numbucketlocks, M_VFSCACHE,
+	    M_WAITOK | M_ZERO);
+	for (i = 0; i < numbucketlocks; i++)
+		rw_init_flags(&bucketlocks[i], "ncbuc", RW_DUPOK | RW_RECURSE);
+	numvnodelocks = cache_roundup_2(mp_ncpus * 64);
+	vnodelocks = malloc(sizeof(*vnodelocks) * numvnodelocks, M_VFSCACHE,
+	    M_WAITOK | M_ZERO);
+	for (i = 0; i < numvnodelocks; i++)
+		mtx_init(&vnodelocks[i], "ncvn", NULL, MTX_DUPOK | MTX_RECURSE);
+	ncpurgeminvnodes = numbucketlocks;
+
+	numneglists = 4;
+	neglists = malloc(sizeof(*neglists) * numneglists, M_VFSCACHE,
+	    M_WAITOK | M_ZERO);
+	for (i = 0; i < numneglists; i++) {
+		mtx_init(&neglists[i].nl_lock, "ncnegl", NULL, MTX_DEF);
+		TAILQ_INIT(&neglists[i].nl_list);
+	}
+	mtx_init(&ncneg_hot.nl_lock, "ncneglh", NULL, MTX_DEF);
+	TAILQ_INIT(&ncneg_hot.nl_list);
 
 	numcalls = counter_u64_alloc(M_WAITOK);
 	dothits = counter_u64_alloc(M_WAITOK);
@@ -958,7 +1830,11 @@ cache_changesize(int newmaxvnodes)
 	uint32_t hash;
 	int i;
 
-	new_nchashtbl = hashinit(newmaxvnodes * 2, M_VFSCACHE, &new_nchash);
+	newmaxvnodes = cache_roundup_2(newmaxvnodes * 2);
+	if (newmaxvnodes < numbucketlocks)
+		newmaxvnodes = numbucketlocks;
+
+	new_nchashtbl = hashinit(newmaxvnodes, M_VFSCACHE, &new_nchash);
 	/* If same hash table size, nothing to do */
 	if (nchash == new_nchash) {
 		free(new_nchashtbl, M_VFSCACHE);
@@ -969,7 +1845,8 @@ cache_changesize(int newmaxvnodes)
 	 * None of the namecache entries in the table can be removed
 	 * because to do so, they have to be removed from the hash table.
 	 */
-	CACHE_WLOCK();
+	cache_lock_all_vnodes();
+	cache_lock_all_buckets();
 	old_nchashtbl = nchashtbl;
 	old_nchash = nchash;
 	nchashtbl = new_nchashtbl;
@@ -982,7 +1859,8 @@ cache_changesize(int newmaxvnodes)
 			LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash);
 		}
 	}
-	CACHE_WUNLOCK();
+	cache_unlock_all_buckets();
+	cache_unlock_all_vnodes();
 	free(old_nchashtbl, M_VFSCACHE);
 }
 
@@ -992,21 +1870,47 @@ cache_changesize(int newmaxvnodes)
 void
 cache_purge(struct vnode *vp)
 {
+	TAILQ_HEAD(, namecache) ncps;
+	struct namecache *ncp, *nnp;
+	struct mtx *vlp, *vlp2;
 
 	CTR1(KTR_VFS, "cache_purge(%p)", vp);
 	SDT_PROBE1(vfs, namecache, purge, done, vp);
-	CACHE_WLOCK();
-	while (!LIST_EMPTY(&vp->v_cache_src))
-		cache_zap(LIST_FIRST(&vp->v_cache_src));
-	while (!TAILQ_EMPTY(&vp->v_cache_dst))
-		cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
-	if (vp->v_cache_dd != NULL) {
-		KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT,
+	if (LIST_EMPTY(&vp->v_cache_src) && TAILQ_EMPTY(&vp->v_cache_dst) &&
+	    vp->v_cache_dd == NULL)
+		return;
+	TAILQ_INIT(&ncps);
+	vlp = VP2VNODELOCK(vp);
+	vlp2 = NULL;
+	mtx_lock(vlp);
+retry:
+	while (!LIST_EMPTY(&vp->v_cache_src)) {
+		ncp = LIST_FIRST(&vp->v_cache_src);
+		if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2))
+			goto retry;
+		TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst);
+	}
+	while (!TAILQ_EMPTY(&vp->v_cache_dst)) {
+		ncp = TAILQ_FIRST(&vp->v_cache_dst);
+		if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2))
+			goto retry;
+		TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst);
+	}
+	ncp = vp->v_cache_dd;
+	if (ncp != NULL) {
+		KASSERT(ncp->nc_flag & NCF_ISDOTDOT,
 		   ("lost dotdot link"));
-		cache_zap(vp->v_cache_dd);
+		if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2))
+			goto retry;
+		TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst);
 	}
 	KASSERT(vp->v_cache_dd == NULL, ("incomplete purge"));
-	CACHE_WUNLOCK();
+	mtx_unlock(vlp);
+	if (vlp2 != NULL)
+		mtx_unlock(vlp2);
+	TAILQ_FOREACH_SAFE(ncp, &ncps, nc_dst, nnp) {
+		cache_free(ncp);
+	}
 }
 
 /*
@@ -1015,37 +1919,77 @@ cache_purge(struct vnode *vp)
 void
 cache_purge_negative(struct vnode *vp)
 {
-	struct namecache *cp, *ncp;
+	TAILQ_HEAD(, namecache) ncps;
+	struct namecache *ncp, *nnp;
+	struct mtx *vlp;
 
 	CTR1(KTR_VFS, "cache_purge_negative(%p)", vp);
 	SDT_PROBE1(vfs, namecache, purge_negative, done, vp);
-	CACHE_WLOCK();
-	LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) {
-		if (cp->nc_vp == NULL)
-			cache_zap(cp);
+	TAILQ_INIT(&ncps);
+	vlp = VP2VNODELOCK(vp);
+	mtx_lock(vlp);
+	LIST_FOREACH_SAFE(ncp, &vp->v_cache_src, nc_src, nnp) {
+		if (!(ncp->nc_flag & NCF_NEGATIVE))
+			continue;
+		cache_zap_negative_locked_vnode_kl(ncp, vp);
+		TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst);
+	}
+	mtx_unlock(vlp);
+	TAILQ_FOREACH_SAFE(ncp, &ncps, nc_dst, nnp) {
+		cache_free(ncp);
 	}
-	CACHE_WUNLOCK();
 }
 
 /*
  * Flush all entries referencing a particular filesystem.
  */
 void
-cache_purgevfs(struct mount *mp)
+cache_purgevfs(struct mount *mp, bool force)
 {
-	struct nchashhead *ncpp;
+	TAILQ_HEAD(, namecache) ncps;
+	struct mtx *vlp1, *vlp2;
+	struct rwlock *blp;
+	struct nchashhead *bucket;
 	struct namecache *ncp, *nnp;
+	u_long i, j, n_nchash;
+	int error;
 
 	/* Scan hash tables for applicable entries */
 	SDT_PROBE1(vfs, namecache, purgevfs, done, mp);
-	CACHE_WLOCK();
-	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
-		LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) {
-			if (ncp->nc_dvp->v_mount == mp)
-				cache_zap(ncp);
+	if (!force && mp->mnt_nvnodelistsize <= ncpurgeminvnodes)
+		return;
+	TAILQ_INIT(&ncps);
+	n_nchash = nchash + 1;
+	vlp1 = vlp2 = NULL;
+	for (i = 0; i < numbucketlocks; i++) {
+		blp = (struct rwlock *)&bucketlocks[i];
+		rw_wlock(blp);
+		for (j = i; j < n_nchash; j += numbucketlocks) {
+retry:
+			bucket = &nchashtbl[j];
+			LIST_FOREACH_SAFE(ncp, bucket, nc_hash, nnp) {
+				cache_assert_bucket_locked(ncp, RA_WLOCKED);
+				if (ncp->nc_dvp->v_mount != mp)
+					continue;
+				error = cache_zap_wlocked_bucket_kl(ncp, blp,
+				    &vlp1, &vlp2);
+				if (error != 0)
+					goto retry;
+				TAILQ_INSERT_HEAD(&ncps, ncp, nc_dst);
+			}
 		}
+		rw_wunlock(blp);
+		if (vlp1 == NULL && vlp2 == NULL)
+			cache_maybe_yield();
+	}
+	if (vlp1 != NULL)
+		mtx_unlock(vlp1);
+	if (vlp2 != NULL)
+		mtx_unlock(vlp2);
+
+	TAILQ_FOREACH_SAFE(ncp, &ncps, nc_dst, nnp) {
+		cache_free(ncp);
 	}
-	CACHE_WUNLOCK();
 }
 
 /*
@@ -1122,9 +2066,9 @@ kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, u_int buflen,
 	fdp = td->td_proc->p_fd;
 	FILEDESC_SLOCK(fdp);
 	cdir = fdp->fd_cdir;
-	VREF(cdir);
+	vrefact(cdir);
 	rdir = fdp->fd_rdir;
-	VREF(rdir);
+	vrefact(rdir);
 	FILEDESC_SUNLOCK(fdp);
 	error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen);
 	vrele(rdir);
@@ -1214,30 +2158,20 @@ vn_fullpath_global(struct thread *td, struct vnode *vn,
 int
 vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen)
 {
-	int error;
-
-	CACHE_RLOCK();
-	error = vn_vptocnp_locked(vp, cred, buf, buflen);
-	if (error == 0)
-		CACHE_RUNLOCK();
-	return (error);
-}
-
-static int
-vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
-    u_int *buflen)
-{
 	struct vnode *dvp;
 	struct namecache *ncp;
+	struct mtx *vlp;
 	int error;
 
+	vlp = VP2VNODELOCK(*vp);
+	mtx_lock(vlp);
 	TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) {
 		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 	}
 	if (ncp != NULL) {
 		if (*buflen < ncp->nc_nlen) {
-			CACHE_RUNLOCK();
+			mtx_unlock(vlp);
 			vrele(*vp);
 			counter_u64_add(numfullpathfail4, 1);
 			error = ENOMEM;
@@ -1252,14 +2186,13 @@ vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
 		dvp = *vp;
 		*vp = ncp->nc_dvp;
 		vref(*vp);
-		CACHE_RUNLOCK();
+		mtx_unlock(vlp);
 		vrele(dvp);
-		CACHE_RLOCK();
 		return (0);
 	}
 	SDT_PROBE1(vfs, namecache, fullpath, miss, vp);
 
-	CACHE_RUNLOCK();
+	mtx_unlock(vlp);
 	vn_lock(*vp, LK_SHARED | LK_RETRY);
 	error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen);
 	vput(*vp);
@@ -1270,10 +2203,8 @@ vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
 	}
 
 	*vp = dvp;
-	CACHE_RLOCK();
 	if (dvp->v_iflag & VI_DOOMED) {
 		/* forced unmount */
-		CACHE_RUNLOCK();
 		vrele(dvp);
 		error = ENOENT;
 		SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL);
@@ -1307,13 +2238,11 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
 	SDT_PROBE1(vfs, namecache, fullpath, entry, vp);
 	counter_u64_add(numfullpathcalls, 1);
 	vref(vp);
-	CACHE_RLOCK();
 	if (vp->v_type != VDIR) {
-		error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
+		error = vn_vptocnp(&vp, td->td_ucred, buf, &buflen);
 		if (error)
 			return (error);
 		if (buflen == 0) {
-			CACHE_RUNLOCK();
 			vrele(vp);
 			return (ENOMEM);
 		}
@@ -1321,25 +2250,39 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
 		slash_prefixed = 1;
 	}
 	while (vp != rdir && vp != rootvnode) {
-		if (vp->v_vflag & VV_ROOT) {
-			if (vp->v_iflag & VI_DOOMED) {	/* forced unmount */
-				CACHE_RUNLOCK();
-				vrele(vp);
+		/*
+		 * The vp vnode must be already fully constructed,
+		 * since it is either found in namecache or obtained
+		 * from VOP_VPTOCNP().  We may test for VV_ROOT safely
+		 * without obtaining the vnode lock.
+		 */
+		if ((vp->v_vflag & VV_ROOT) != 0) {
+			vn_lock(vp, LK_RETRY | LK_SHARED);
+
+			/*
+			 * With the vnode locked, check for races with
+			 * unmount, forced or not.  Note that we
+			 * already verified that vp is not equal to
+			 * the root vnode, which means that
+			 * mnt_vnodecovered can be NULL only for the
+			 * case of unmount.
+			 */
+			if ((vp->v_iflag & VI_DOOMED) != 0 ||
+			    (vp1 = vp->v_mount->mnt_vnodecovered) == NULL ||
+			    vp1->v_mountedhere != vp->v_mount) {
+				vput(vp);
 				error = ENOENT;
 				SDT_PROBE3(vfs, namecache, fullpath, return,
 				    error, vp, NULL);
 				break;
 			}
-			vp1 = vp->v_mount->mnt_vnodecovered;
+
 			vref(vp1);
-			CACHE_RUNLOCK();
-			vrele(vp);
+			vput(vp);
 			vp = vp1;
-			CACHE_RLOCK();
 			continue;
 		}
 		if (vp->v_type != VDIR) {
-			CACHE_RUNLOCK();
 			vrele(vp);
 			counter_u64_add(numfullpathfail1, 1);
 			error = ENOTDIR;
@@ -1347,11 +2290,10 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
 			    error, vp, NULL);
 			break;
 		}
-		error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
+		error = vn_vptocnp(&vp, td->td_ucred, buf, &buflen);
 		if (error)
 			break;
 		if (buflen == 0) {
-			CACHE_RUNLOCK();
 			vrele(vp);
 			error = ENOMEM;
 			SDT_PROBE3(vfs, namecache, fullpath, return, error,
@@ -1365,7 +2307,6 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
 		return (error);
 	if (!slash_prefixed) {
 		if (buflen == 0) {
-			CACHE_RUNLOCK();
 			vrele(vp);
 			counter_u64_add(numfullpathfail4, 1);
 			SDT_PROBE3(vfs, namecache, fullpath, return, ENOMEM,
@@ -1375,7 +2316,6 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
 		buf[--buflen] = '/';
 	}
 	counter_u64_add(numfullpathfound, 1);
-	CACHE_RUNLOCK();
 	vrele(vp);
 
 	SDT_PROBE3(vfs, namecache, fullpath, return, 0, startvp, buf + buflen);
@@ -1388,20 +2328,22 @@ vn_dir_dd_ino(struct vnode *vp)
 {
 	struct namecache *ncp;
 	struct vnode *ddvp;
+	struct mtx *vlp;
 
 	ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino");
-	CACHE_RLOCK();
+	vlp = VP2VNODELOCK(vp);
+	mtx_lock(vlp);
 	TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) {
 		if ((ncp->nc_flag & NCF_ISDOTDOT) != 0)
 			continue;
 		ddvp = ncp->nc_dvp;
 		vhold(ddvp);
-		CACHE_RUNLOCK();
+		mtx_unlock(vlp);
 		if (vget(ddvp, LK_SHARED | LK_NOWAIT | LK_VNHELD, curthread))
 			return (NULL);
 		return (ddvp);
 	}
-	CACHE_RUNLOCK();
+	mtx_unlock(vlp);
 	return (NULL);
 }
 
@@ -1409,19 +2351,21 @@ int
 vn_commname(struct vnode *vp, char *buf, u_int buflen)
 {
 	struct namecache *ncp;
+	struct mtx *vlp;
 	int l;
 
-	CACHE_RLOCK();
+	vlp = VP2VNODELOCK(vp);
+	mtx_lock(vlp);
 	TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst)
 		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 	if (ncp == NULL) {
-		CACHE_RUNLOCK();
+		mtx_unlock(vlp);
 		return (ENOENT);
 	}
 	l = min(ncp->nc_nlen, buflen - 1);
 	memcpy(buf, nc_get_name(ncp), l);
-	CACHE_RUNLOCK();
+	mtx_unlock(vlp);
 	buf[l] = '\0';
 	return (0);
 }