- Assume that all lower filesystems now support proper locking. Assert

that they set v->v_vnlock. This is true for all filesystems in the tree. - Remove all uses of LK_THISLAYER. If the lower layer is locked, the null layer is locked. We only use vget() to get a reference now. null essentially does no locking. This fixes LOOKUP_SHARED with nullfs. - Remove the special LK_DRAIN considerations, I do not believe this is needed now as LK_DRAIN doesn't destroy the lower vnode's lock, and it's hardly used anymore. - Add one well commented hack to prevent the lowervp from going away while we're in it's VOP_LOCK routine. This can only happen if we're forcibly unmounted while some callers are waiting in the lock. In this case the lowervp could be recycled after we drop our last ref in null_reclaim(). Prevent this with a vhold().
author: jeff <jeff@FreeBSD.org> 2005-03-15 13:49:33 +0000
committer: jeff <jeff@FreeBSD.org> 2005-03-15 13:49:33 +0000
commit: 57fd917aad75576de4048e68dcfbd17ea004045b (patch)
tree: d2348df5b3f7132a7a900e259147ecf529e73d1c /sys/fs/nullfs
parent: 2115694bbc6bb40d45f659eef89bac4b98ad6585 (diff)
download: FreeBSD-src-57fd917aad75576de4048e68dcfbd17ea004045b.zip
FreeBSD-src-57fd917aad75576de4048e68dcfbd17ea004045b.tar.gz
3 files changed, 63 insertions, 190 deletions
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
index b6cc7e3..c8fc5de 100644
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -47,8 +47,6 @@ struct null_node {
 	LIST_ENTRY(null_node)	null_hash;	/* Hash list */
 	struct vnode	        *null_lowervp;	/* VREFed once */
 	struct vnode		*null_vnode;	/* Back pointer */
-	int			null_pending_locks;
-	int			null_drain_wakeup;
 };
 
 #define	MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index f6543b2..e665675 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -120,27 +120,20 @@ loop:
 	LIST_FOREACH(a, hd, null_hash) {
 		if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
 			vp = NULLTOV(a);
-			mtx_lock(&vp->v_interlock);
+			VI_LOCK(vp);
 			/*
-			 * Don't block if nullfs vnode is being recycled.
-			 * We already hold a lock on the lower vnode, thus
-			 * waiting might deadlock against the thread
-			 * recycling the nullfs vnode or another thread
-			 * in vrele() waiting for the vnode lock.
+			 * If the nullfs node is being recycled we have
+			 * to wait until it finishes prior to scanning
+			 * again.
 			 */
-			if ((vp->v_iflag & VI_DOOMED) != 0) {
-				VI_UNLOCK(vp);
-				continue;
-			}
 			mtx_unlock(&null_hashmtx);
-			/*
-			 * We need vget for the VXLOCK
-			 * stuff, but we don't want to lock
-			 * the lower node.
-			 */
-			if (vget(vp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td))
+			if ((vp->v_iflag & VI_DOOMED) != 0) {
+				/* Wait for recycling to finish. */
+				VOP_LOCK(vp, LK_EXCLUSIVE|LK_INTERLOCK, td);
+				VOP_UNLOCK(vp, 0, td);
 				goto loop;
-
+			}
+			vget(vp, LK_INTERLOCK, td);
 			return (vp);
 		}
 	}
@@ -169,22 +162,19 @@ loop:
 		if (oxp->null_lowervp == xp->null_lowervp &&
 		    NULLTOV(oxp)->v_mount == mp) {
 			ovp = NULLTOV(oxp);
-			mtx_lock(&ovp->v_interlock);
+			VI_LOCK(ovp);
 			/*
-			 * Don't block if nullfs vnode is being recycled.
-			 * We already hold a lock on the lower vnode, thus
-			 * waiting might deadlock against the thread
-			 * recycling the nullfs vnode or another thread
-			 * in vrele() waiting for the vnode lock.
+			 * If the nullfs node is being recycled we have
+			 * to wait until it finishes prior to scanning
+			 * again.
 			 */
-			if ((ovp->v_iflag & VI_DOOMED) != 0) {
-				VI_UNLOCK(ovp);
-				continue;
-			}
 			mtx_unlock(&null_hashmtx);
-			if (vget(ovp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td))
+			if ((ovp->v_iflag & VI_DOOMED) != 0) {
+				VOP_LOCK(ovp, LK_EXCLUSIVE|LK_INTERLOCK, td);
+				VOP_UNLOCK(ovp, 0, td);
 				goto loop;
-
+			}
+			vget(ovp, LK_INTERLOCK, td);
 			return (ovp);
 		}
 	}
@@ -207,7 +197,6 @@ null_nodeget(mp, lowervp, vpp)
 	struct vnode *lowervp;
 	struct vnode **vpp;
 {
-	struct thread *td = curthread;	/* XXX */
 	struct null_node *xp;
 	struct vnode *vp;
 	int error;
@@ -243,26 +232,11 @@ null_nodeget(mp, lowervp, vpp)
 
 	xp->null_vnode = vp;
 	xp->null_lowervp = lowervp;
-	xp->null_pending_locks = 0;
-	xp->null_drain_wakeup = 0;
-
 	vp->v_type = lowervp->v_type;
 	vp->v_data = xp;
-
-	/*
-	 * From NetBSD:
-	 * Now lock the new node. We rely on the fact that we were passed
-	 * a locked vnode. If the lower node is exporting a struct lock
-	 * (v_vnlock != NULL) then we just set the upper v_vnlock to the
-	 * lower one, and both are now locked. If the lower node is exporting
-	 * NULL, then we copy that up and manually lock the new vnode.
-	 */
-
 	vp->v_vnlock = lowervp->v_vnlock;
-	error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_THISLAYER, td);
-	if (error)
-		panic("null_nodeget: can't lock new vnode\n");
-
+	if (vp->v_vnlock == NULL)
+		panic("null_nodeget: Passed a NULL vnlock.\n");
 	/*
 	 * Atomically insert our new node into the hash or vget existing 
 	 * if someone else has beaten us to it.
@@ -270,21 +244,11 @@ null_nodeget(mp, lowervp, vpp)
 	*vpp = null_hashins(mp, xp);
 	if (*vpp != NULL) {
 		vrele(lowervp);
-		VOP_UNLOCK(vp, LK_THISLAYER, td);
-		vp->v_vnlock = NULL;
+		vp->v_vnlock = &vp->v_lock;
 		xp->null_lowervp = NULL;
 		vrele(vp);
 		return (0);
 	}
-
-	/*
-	 * XXX We take extra vref just to workaround UFS's XXX:
-	 * UFS can vrele() vnode in VOP_CLOSE() in some cases. Luckily, this
-	 * can only happen if v_usecount == 1. To workaround, we just don't
-	 * let v_usecount be 1, it will be 2 or more.
-	 */
-	VREF(lowervp);
-
 	*vpp = vp;
 
 	return (0);
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index 19e88b5..3ab3a6f 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -296,7 +296,7 @@ null_bypass(struct vop_generic_args *ap)
 			*(vps_p[i]) = old_vps[i];
 #if 0
 			if (reles & VDESC_VP0_WILLUNLOCK)
-				VOP_UNLOCK(*(vps_p[i]), LK_THISLAYER, curthread);
+				VOP_UNLOCK(*(vps_p[i]), 0, curthread);
 #endif
 			if (reles & VDESC_VP0_WILLRELE)
 				vrele(*(vps_p[i]));
@@ -339,7 +339,6 @@ null_lookup(struct vop_lookup_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct vnode *dvp = ap->a_dvp;
-	struct thread *td = cnp->cn_thread;
 	int flags = cnp->cn_flags;
 	struct vnode *vp, *ldvp, *lvp;
 	int error;
@@ -359,12 +358,6 @@ null_lookup(struct vop_lookup_args *ap)
 	    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
 		error = EROFS;
 
-	/*
-	 * Rely only on the PDIRUNLOCK flag which should be carefully
-	 * tracked by underlying filesystem.
-	 */
-	if ((cnp->cn_flags & PDIRUNLOCK) && dvp->v_vnlock != ldvp->v_vnlock)
-		VOP_UNLOCK(dvp, LK_THISLAYER, td);
 	if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
 		if (ldvp == lvp) {
 			*ap->a_vpp = dvp;
@@ -521,113 +514,42 @@ null_lock(struct vop_lock_args *ap)
 	struct vnode *vp = ap->a_vp;
 	int flags = ap->a_flags;
 	struct thread *td = ap->a_td;
+	struct null_node *nn;
 	struct vnode *lvp;
 	int error;
-	struct null_node *nn;
 
-	if (flags & LK_THISLAYER) {
-		if (vp->v_vnlock != NULL) {
-			/* lock is shared across layers */
-			if (flags & LK_INTERLOCK)
-				mtx_unlock(&vp->v_interlock);
-			return 0;
-		}
-		error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER,
-		    &vp->v_interlock, td);
-		return (error);
-	}
 
 	if ((flags & LK_INTERLOCK) == 0) {
 		VI_LOCK(vp);
-		flags |= LK_INTERLOCK;
+		ap->a_flags = flags |= LK_INTERLOCK;
 	}
-	if (vp->v_vnlock != NULL) {
-		/*
-		 * The lower level has exported a struct lock to us. Use
-		 * it so that all vnodes in the stack lock and unlock
-		 * simultaneously. Note: we don't DRAIN the lock as DRAIN
-		 * decommissions the lock - just because our vnode is
-		 * going away doesn't mean the struct lock below us is.
-		 * LK_EXCLUSIVE is fine.
-		 */
-		nn = VTONULL(vp);
-		if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
-			NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n");
-			/*
-			 * Emulate lock draining by waiting for all other
-			 * pending locks to complete.  Afterwards the
-			 * lockmgr call might block, but no other threads
-			 * will attempt to use this nullfs vnode due to the
-			 * VI_DOOMED flag.
-			 */
-			while (nn->null_pending_locks > 0) {
-				nn->null_drain_wakeup = 1;
-				msleep(&nn->null_pending_locks,
-				       VI_MTX(vp),
-				       PVFS,
-				       "nuldr", 0);
-			}
-			error = lockmgr(vp->v_vnlock,
-					(flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
-					VI_MTX(vp), td);
-			return error;
-		}
-		nn->null_pending_locks++;
-		error = lockmgr(vp->v_vnlock, flags, &vp->v_interlock, td);
-		VI_LOCK(vp);
-		/*
-		 * If we're called from vrele then v_usecount can have been 0
-		 * and another process might have initiated a recycle 
-		 * operation.  When that happens, just back out.
-		 */
-		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0 &&
-		    td != vp->v_vxthread) {
-			lockmgr(vp->v_vnlock,
-				(flags & ~LK_TYPE_MASK) | LK_RELEASE,
-				VI_MTX(vp), td);
-			VI_LOCK(vp);
-			error = ENOENT;
-		}
-		nn->null_pending_locks--;
-		/*
-		 * Wakeup the process draining the vnode after all
-		 * pending lock attempts has been failed.
-		 */
-		if (nn->null_pending_locks == 0 &&
-		    nn->null_drain_wakeup != 0) {
-			nn->null_drain_wakeup = 0;
-			wakeup(&nn->null_pending_locks);
-		}
+	nn = VTONULL(vp);
+	/*
+	 * If we're still active we must ask the lower layer to
+	 * lock as ffs has special lock considerations in it's
+	 * vop lock.
+	 */
+	if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
+		VI_LOCK(lvp);
 		VI_UNLOCK(vp);
-		return error;
-	} else {
 		/*
-		 * To prevent race conditions involving doing a lookup
-		 * on "..", we have to lock the lower node, then lock our
-		 * node. Most of the time it won't matter that we lock our
-		 * node (as any locking would need the lower one locked
-		 * first). But we can LK_DRAIN the upper lock as a step
-		 * towards decomissioning it.
+		 * We have to hold the vnode here to solve a potential
+		 * reclaim race.  If we're forcibly vgone'd while we
+		 * still have refs, a thread could be sleeping inside
+		 * the lowervp's vop_lock routine.  When we vgone we will
+		 * drop our last ref to the lowervp, which would allow it
+		 * to be reclaimed.  The lowervp could then be recycled,
+		 * in which case it is not legal to be sleeping in it's VOP.
+		 * We prevent it from being recycled by holding the vnode
+		 * here.
 		 */
-		lvp = NULLVPTOLOWERVP(vp);
-		if (lvp == NULL)
-			return (lockmgr(&vp->v_lock, flags, &vp->v_interlock, td));
-		if (flags & LK_INTERLOCK) {
-			mtx_unlock(&vp->v_interlock);
-			flags &= ~LK_INTERLOCK;
-		}
-		if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
-			error = VOP_LOCK(lvp,
-				(flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, td);
-		} else
-			error = VOP_LOCK(lvp, flags, td);
-		if (error)
-			return (error);	
-		error = lockmgr(&vp->v_lock, flags, &vp->v_interlock, td);
-		if (error)
-			VOP_UNLOCK(lvp, 0, td);
-		return (error);
-	}
+		vholdl(lvp);
+		error = VOP_LOCK(lvp, flags, td);
+		vdrop(lvp);
+	} else
+		error = vop_stdlock(ap);
+
+	return (error);
 }
 
 /*
@@ -641,27 +563,21 @@ null_unlock(struct vop_unlock_args *ap)
 	struct vnode *vp = ap->a_vp;
 	int flags = ap->a_flags;
 	struct thread *td = ap->a_td;
+	struct null_node *nn;
 	struct vnode *lvp;
+	int error;
 
-	if (vp->v_vnlock != NULL) {
-		if (flags & LK_THISLAYER)
-			return 0;	/* the lock is shared across layers */
-		flags &= ~LK_THISLAYER;
-		return (lockmgr(vp->v_vnlock, flags | LK_RELEASE,
-			&vp->v_interlock, td));
+	if ((flags & LK_INTERLOCK) != 0) {
+		VI_UNLOCK(vp);
+		ap->a_flags = flags &= ~LK_INTERLOCK;
 	}
-	lvp = NULLVPTOLOWERVP(vp);
-	if (lvp == NULL)
-		return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, td));
-	if ((flags & LK_THISLAYER) == 0) {
-		if (flags & LK_INTERLOCK) {
-			mtx_unlock(&vp->v_interlock);
-			flags &= ~LK_INTERLOCK;
-		}
-		VOP_UNLOCK(lvp, flags & ~LK_INTERLOCK, td);
-	} else
-		flags &= ~LK_THISLAYER;
-	return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, td));
+	nn = VTONULL(vp);
+	if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL)
+		error = VOP_UNLOCK(lvp, flags, td);
+	else
+		error = vop_stdunlock(ap);
+
+	return (error);
 }
 
 static int
@@ -670,9 +586,7 @@ null_islocked(struct vop_islocked_args *ap)
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 
-	if (vp->v_vnlock != NULL)
-		return (lockstatus(vp->v_vnlock, td));
-	return (lockstatus(&vp->v_lock, td));
+	return (lockstatus(vp->v_vnlock, td));
 }
 
 /*
@@ -715,17 +629,14 @@ null_reclaim(struct vop_reclaim_args *ap)
 
 	if (lowervp) {
 		null_hashrem(xp);
-
-		vrele(lowervp);
 		vrele(lowervp);
 	}
 
 	vp->v_data = NULL;
 	vp->v_object = NULL;
 	vnlock = vp->v_vnlock;
-	lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL, curthread);
 	vp->v_vnlock = &vp->v_lock;
-	transferlockers(vnlock, vp->v_vnlock);
+	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL, curthread);
 	lockmgr(vnlock, LK_RELEASE, NULL, curthread);
 	FREE(xp, M_NULLFSNODE);
author	jeff <jeff@FreeBSD.org>	2005-03-15 13:49:33 +0000
committer	jeff <jeff@FreeBSD.org>	2005-03-15 13:49:33 +0000
commit	57fd917aad75576de4048e68dcfbd17ea004045b (patch)
tree	d2348df5b3f7132a7a900e259147ecf529e73d1c /sys/fs/nullfs
parent	2115694bbc6bb40d45f659eef89bac4b98ad6585 (diff)
download	FreeBSD-src-57fd917aad75576de4048e68dcfbd17ea004045b.zip FreeBSD-src-57fd917aad75576de4048e68dcfbd17ea004045b.tar.gz