From 35c71928a03800d369738a09947e6bcb26d29b1a Mon Sep 17 00:00:00 2001
From: tjr <tjr@FreeBSD.org>
Date: Tue, 17 Jun 2003 08:52:45 +0000
Subject: MFp4: Fix two bugs causing possible deadlocks or panics, and one nit:
 - Emulate lock draining (LK_DRAIN) in null_lock() to avoid deadlocks   when
 the vnode is being recycled. - Don't allow null_nodeget() to return a nullfs
 vnode from the wrong   mount when multiple nullfs's are mounted. It's unclear
 why these checks   were removed in null_subr.c 1.35, but they are definitely
 necessary.   Without the checks, trying to unmount a nullfs mount will
 erroneously   return EBUSY, and forcibly unmounting with -f will cause a
 panic. - Bump LOG2_SIZEVNODE up to 8, since vnodes are >256 bytes now. The
 old   value (7) didn't cause any problems, but made the hash algorithm  
 suboptimal.

These changes fix nullfs enough that a parallel buildworld succeeds.

Submitted by:	tegge (partially; LK_DRAIN)
Tested by:	kris
---
 sys/fs/nullfs/null.h       |  2 ++
 sys/fs/nullfs/null_subr.c  | 45 +++++++++++++++++++++++++++-------
 sys/fs/nullfs/null_vnops.c | 61 +++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 95 insertions(+), 13 deletions(-)

(limited to 'sys/fs/nullfs')

diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
index 3cb3716..62deb1e 100644
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -51,6 +51,8 @@ struct null_node {
 	LIST_ENTRY(null_node)	null_hash;	/* Hash list */
 	struct vnode	        *null_lowervp;	/* VREFed once */
 	struct vnode		*null_vnode;	/* Back pointer */
+	int			null_pending_locks;
+	int			null_drain_wakeup;
 };
 
 #define	MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index f2a43e0..2120415 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -50,7 +50,7 @@
 
 #include <fs/nullfs/null.h>
 
-#define LOG2_SIZEVNODE 7		/* log2(sizeof struct vnode) */
+#define LOG2_SIZEVNODE 8		/* log2(sizeof struct vnode) */
 #define	NNULLNODECACHE 16
 
 /*
@@ -71,8 +71,8 @@ struct mtx null_hashmtx;
 static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table");
 MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part");
 
-static struct vnode * null_hashget(struct vnode *);
-static struct vnode * null_hashins(struct null_node *);
+static struct vnode * null_hashget(struct mount *, struct vnode *);
+static struct vnode * null_hashins(struct mount *, struct null_node *);
 
 /*
  * Initialise cache headers
@@ -103,7 +103,8 @@ nullfs_uninit(vfsp)
  * Lower vnode should be locked on entry and will be left locked on exit.
  */
 static struct vnode *
-null_hashget(lowervp)
+null_hashget(mp, lowervp)
+	struct mount *mp;
 	struct vnode *lowervp;
 {
 	struct thread *td = curthread;	/* XXX */
@@ -121,9 +122,20 @@ null_hashget(lowervp)
 loop:
 	mtx_lock(&null_hashmtx);
 	LIST_FOREACH(a, hd, null_hash) {
-		if (a->null_lowervp == lowervp) {
+		if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
 			vp = NULLTOV(a);
 			mtx_lock(&vp->v_interlock);
+			/*
+			 * Don't block if nullfs vnode is being recycled.
+			 * We already hold a lock on the lower vnode, thus
+			 * waiting might deadlock against the thread
+			 * recycling the nullfs vnode or another thread
+			 * in vrele() waiting for the vnode lock.
+			 */
+			if ((vp->v_iflag & VI_XLOCK) != 0) {
+				VI_UNLOCK(vp);
+				continue;
+			}
 			mtx_unlock(&null_hashmtx);
 			/*
 			 * We need vget for the VXLOCK
@@ -145,7 +157,8 @@ loop:
  * node found.
  */
 static struct vnode *
-null_hashins(xp)
+null_hashins(mp, xp)
+	struct mount *mp;
 	struct null_node *xp;
 {
 	struct thread *td = curthread;	/* XXX */
@@ -157,9 +170,21 @@ null_hashins(xp)
 loop:
 	mtx_lock(&null_hashmtx);
 	LIST_FOREACH(oxp, hd, null_hash) {
-		if (oxp->null_lowervp == xp->null_lowervp) {
+		if (oxp->null_lowervp == xp->null_lowervp &&
+		    NULLTOV(oxp)->v_mount == mp) {
 			ovp = NULLTOV(oxp);
 			mtx_lock(&ovp->v_interlock);
+			/*
+			 * Don't block if nullfs vnode is being recycled.
+			 * We already hold a lock on the lower vnode, thus
+			 * waiting might deadlock against the thread
+			 * recycling the nullfs vnode or another thread
+			 * in vrele() waiting for the vnode lock.
+			 */
+			if ((ovp->v_iflag & VI_XLOCK) != 0) {
+				VI_UNLOCK(ovp);
+				continue;
+			}
 			mtx_unlock(&null_hashmtx);
 			if (vget(ovp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td))
 				goto loop;
@@ -192,7 +217,7 @@ null_nodeget(mp, lowervp, vpp)
 	int error;
 
 	/* Lookup the hash firstly */
-	*vpp = null_hashget(lowervp);
+	*vpp = null_hashget(mp, lowervp);
 	if (*vpp != NULL) {
 		vrele(lowervp);
 		return (0);
@@ -222,6 +247,8 @@ null_nodeget(mp, lowervp, vpp)
 
 	xp->null_vnode = vp;
 	xp->null_lowervp = lowervp;
+	xp->null_pending_locks = 0;
+	xp->null_drain_wakeup = 0;
 
 	vp->v_type = lowervp->v_type;
 	vp->v_data = xp;
@@ -244,7 +271,7 @@ null_nodeget(mp, lowervp, vpp)
 	 * Atomically insert our new node into the hash or vget existing 
 	 * if someone else has beaten us to it.
 	 */
-	*vpp = null_hashins(xp);
+	*vpp = null_hashins(mp, xp);
 	if (*vpp != NULL) {
 		vrele(lowervp);
 		VOP_UNLOCK(vp, LK_THISLAYER, td);
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index 588e18c..96d6f83 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -592,6 +592,7 @@ null_lock(ap)
 	struct thread *td = ap->a_td;
 	struct vnode *lvp;
 	int error;
+	struct null_node *nn;
 
 	if (flags & LK_THISLAYER) {
 		if (vp->v_vnlock != NULL) {
@@ -614,13 +615,65 @@ null_lock(ap)
 		 * going away doesn't mean the struct lock below us is.
 		 * LK_EXCLUSIVE is fine.
 		 */
+		if ((flags & LK_INTERLOCK) == 0) {
+			VI_LOCK(vp);
+			flags |= LK_INTERLOCK;
+		}
+		nn = VTONULL(vp);
 		if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
 			NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n");
-			return(lockmgr(vp->v_vnlock,
-				(flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
-				&vp->v_interlock, td));
+			/*
+			 * Emulate lock draining by waiting for all other
+			 * pending locks to complete.  Afterwards the
+			 * lockmgr call might block, but no other threads
+			 * will attempt to use this nullfs vnode due to the
+			 * VI_XLOCK flag.
+			 */
+			while (nn->null_pending_locks > 0) {
+				nn->null_drain_wakeup = 1;
+				msleep(&nn->null_pending_locks,
+				       VI_MTX(vp),
+				       PVFS,
+				       "nuldr", 0);
+			}
+			error = lockmgr(vp->v_vnlock,
+					(flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
+					VI_MTX(vp), td);
+			return error;
+		}
+		nn->null_pending_locks++;
+		error = lockmgr(vp->v_vnlock, flags, &vp->v_interlock, td);
+		VI_LOCK(vp);
+		/*
+		 * If we're called from vrele then v_usecount can have been 0
+		 * and another process might have initiated a recycle 
+		 * operation.  When that happens, just back out.
+		 */
+		if (error == 0 && (vp->v_iflag & VI_XLOCK) != 0 &&
+		    td != vp->v_vxproc) {
+			lockmgr(vp->v_vnlock,
+				(flags & ~LK_TYPE_MASK) | LK_RELEASE,
+				VI_MTX(vp), td);
+			VI_LOCK(vp);
+			error = ENOENT;
+		}
+		nn->null_pending_locks--;
+		/*
+		 * Wakeup the process draining the vnode after all
+		 * pending lock attempts has been failed.
+		 */
+		if (nn->null_pending_locks == 0 &&
+		    nn->null_drain_wakeup != 0) {
+			nn->null_drain_wakeup = 0;
+			wakeup(&nn->null_pending_locks);
+		}
+		if (error == ENOENT && (vp->v_iflag & VI_XLOCK) != 0 &&
+		    vp->v_vxproc != curthread) {
+			vp->v_iflag |= VI_XWANT;
+			msleep(vp, VI_MTX(vp), PINOD, "nulbo", 0);
 		}
-		return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, td));
+		VI_UNLOCK(vp);
+		return error;
 	} else {
 		/*
 		 * To prevent race conditions involving doing a lookup
-- 
cgit v1.1