Fix vnode locking bugs in the nullfs.

Add correct support for v_object management, so mmap() operation should work properly. Add support for extattrctl() routine (submitted by semenu). At this point nullfs can be considered as functional and much more stable. In fact, it should behave as a "hard" "symlink" to underlying filesystem. Reviewed in general by: mckusick, dillon Parts of logic obtained from: NetBSD
author: bp <bp@FreeBSD.org> 2000-09-25 15:38:32 +0000
committer: bp <bp@FreeBSD.org> 2000-09-25 15:38:32 +0000
commit: c2ae01d2e9194f6a0c8ce1d9795c7cfe665c3203 (patch)
tree: 9f45d94548bec2859f78dc84dbe4e1df02ec23df /sys/fs/nullfs/null_vnops.c
parent: 6110b03d2438de0cb3f47dbce3cec5c1dfa712f6 (diff)
download: FreeBSD-src-c2ae01d2e9194f6a0c8ce1d9795c7cfe665c3203.zip
FreeBSD-src-c2ae01d2e9194f6a0c8ce1d9795c7cfe665c3203.tar.gz
1 files changed, 228 insertions, 65 deletions
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index 5692df6..0b7cb96 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -176,6 +176,8 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
@@ -183,13 +185,22 @@
 #include <sys/malloc.h>
 #include <miscfs/nullfs/null.h>
 
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
 static int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
 SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, 
 	&null_bug_bypass, 0, "");
 
 static int	null_access(struct vop_access_args *ap);
+static int	null_createvobject(struct vop_createvobject_args *ap);
+static int	null_destroyvobject(struct vop_destroyvobject_args *ap);
 static int	null_getattr(struct vop_getattr_args *ap);
+static int	null_getvobject(struct vop_getvobject_args *ap);
 static int	null_inactive(struct vop_inactive_args *ap);
+static int	null_islocked(struct vop_islocked_args *ap);
 static int	null_lock(struct vop_lock_args *ap);
 static int	null_lookup(struct vop_lookup_args *ap);
 static int	null_open(struct vop_open_args *ap);
@@ -277,7 +288,7 @@ null_bypass(ap)
 			 * of vrele'ing their vp's.  We must account for
 			 * that.  (This should go away in the future.)
 			 */
-			if (reles & 1)
+			if (reles & VDESC_VP0_WILLRELE)
 				VREF(*this_vp_p);
 		}
 
@@ -287,7 +298,12 @@ null_bypass(ap)
 	 * Call the operation on the lower layer
 	 * with the modified argument structure.
 	 */
-	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+	if (vps_p[0] && *vps_p[0])
+		error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+	else {
+		printf("null_bypass: no map for %s\n", descp->vdesc_name);
+		error = EINVAL;
+	}
 
 	/*
 	 * Maintain the illusion of call-by-value
@@ -300,7 +316,11 @@ null_bypass(ap)
 			break;   /* bail out at end of list */
 		if (old_vps[i]) {
 			*(vps_p[i]) = old_vps[i];
-			if (reles & 1)
+#if 0
+			if (reles & VDESC_VP0_WILLUNLOCK)
+				VOP_UNLOCK(*(vps_p[i]), LK_THISLAYER, curproc);
+#endif
+			if (reles & VDESC_VP0_WILLRELE)
 				vrele(*(vps_p[i]));
 		}
 	}
@@ -345,44 +365,43 @@ null_lookup(ap)
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
+	struct vnode *dvp = ap->a_dvp;
 	struct proc *p = cnp->cn_proc;
 	int flags = cnp->cn_flags;
-	struct vop_lock_args lockargs;
-	struct vop_unlock_args unlockargs;
-	struct vnode *dvp, *vp;
+	struct vnode *vp, *ldvp, *lvp;
 	int error;
 
-	if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
-	error = null_bypass((struct vop_generic_args *)ap);
+	/*
+	 * Although it is possible to call null_bypass(), we'll do
+	 * a direct call to reduce overhead
+	 */
+	ldvp = NULLVPTOLOWERVP(dvp);
+	vp = lvp = NULL;
+	error = VOP_LOOKUP(ldvp, &lvp, cnp);
 	if (error == EJUSTRETURN && (flags & ISLASTCN) &&
-	    (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
 		error = EROFS;
+
 	/*
-	 * We must do the same locking and unlocking at this layer as 
-	 * is done in the layers below us. We could figure this out 
-	 * based on the error return and the LASTCN, LOCKPARENT, and
-	 * LOCKLEAF flags. However, it is more expidient to just find 
-	 * out the state of the lower level vnodes and set ours to the
-	 * same state.
+	 * Rely only on the PDIRUNLOCK flag which should be carefully
+	 * tracked by underlying filesystem.
 	 */
-	dvp = ap->a_dvp;
-	vp = *ap->a_vpp;
-	if (dvp == vp)
-		return (error);
-	if (!VOP_ISLOCKED(dvp, NULL)) {
-		unlockargs.a_vp = dvp;
-		unlockargs.a_flags = 0;
-		unlockargs.a_p = p;
-		vop_nounlock(&unlockargs);
-	}
-	if (vp != NULLVP && VOP_ISLOCKED(vp, NULL)) {
-		lockargs.a_vp = vp;
-		lockargs.a_flags = LK_SHARED;
-		lockargs.a_p = p;
-		vop_nolock(&lockargs);
+	if (cnp->cn_flags & PDIRUNLOCK)
+		VOP_UNLOCK(dvp, LK_THISLAYER, p);
+	if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
+		if (ldvp == lvp) {
+			*ap->a_vpp = dvp;
+			VREF(dvp);
+			vrele(lvp);
+		} else {
+			error = null_node_create(dvp->v_mount, lvp, &vp);
+			if (error == 0)
+				*ap->a_vpp = vp;
+		}
 	}
 	return (error);
 }
@@ -430,6 +449,7 @@ null_setattr(ap)
 				return (EROFS);
 		}
 	}
+
 	return (null_bypass((struct vop_generic_args *)ap));
 }
 
@@ -454,6 +474,9 @@ null_getattr(ap)
 	return (0);
 }
 
+/*
+ * Handle to disallow write access if mounted read-only.
+ */
 static int
 null_access(ap)
 	struct vop_access_args /* {
@@ -559,12 +582,62 @@ null_lock(ap)
 		struct proc *a_p;
 	} */ *ap;
 {
+	struct vnode *vp = ap->a_vp;
+	int flags = ap->a_flags;
+	struct proc *p = ap->a_p;
+	struct vnode *lvp;
+	int error;
 
-	vop_nolock(ap);
-	if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN)
-		return (0);
-	ap->a_flags &= ~LK_INTERLOCK;
-	return (null_bypass((struct vop_generic_args *)ap));
+	if (flags & LK_THISLAYER) {
+		if (vp->v_vnlock != NULL)
+			return 0;	/* lock is shared across layers */
+		error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER,
+		    &vp->v_interlock, p);
+		return (error);
+	}
+
+	if (vp->v_vnlock != NULL) {
+		/*
+		 * The lower level has exported a struct lock to us. Use
+		 * it so that all vnodes in the stack lock and unlock
+		 * simultaneously. Note: we don't DRAIN the lock as DRAIN
+		 * decommissions the lock - just because our vnode is
+		 * going away doesn't mean the struct lock below us is.
+		 * LK_EXCLUSIVE is fine.
+		 */
+		if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
+			NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n");
+			return(lockmgr(vp->v_vnlock,
+				(flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
+				&vp->v_interlock, p));
+		}
+		return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, p));
+	} else {
+		/*
+		 * To prevent race conditions involving doing a lookup
+		 * on "..", we have to lock the lower node, then lock our
+		 * node. Most of the time it won't matter that we lock our
+		 * node (as any locking would need the lower one locked
+		 * first). But we can LK_DRAIN the upper lock as a step
+		 * towards decomissioning it.
+		 */
+		lvp = NULLVPTOLOWERVP(vp);
+		if (flags & LK_INTERLOCK) {
+			simple_unlock(&vp->v_interlock);
+			flags &= ~LK_INTERLOCK;
+		}
+		if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
+			error = VOP_LOCK(lvp,
+				(flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, p);
+		} else
+			error = VOP_LOCK(lvp, flags, p);
+		if (error)
+			return (error);	
+		error = lockmgr(&vp->v_lock, flags, &vp->v_interlock, p);
+		if (error)
+			VOP_UNLOCK(lvp, 0, p);
+		return (error);
+	}
 }
 
 /*
@@ -580,11 +653,46 @@ null_unlock(ap)
 		struct proc *a_p;
 	} */ *ap;
 {
-	vop_nounlock(ap);
-	ap->a_flags &= ~LK_INTERLOCK;
-	return (null_bypass((struct vop_generic_args *)ap));
+	struct vnode *vp = ap->a_vp;
+	int flags = ap->a_flags;
+	struct proc *p = ap->a_p;
+
+	if (vp->v_vnlock != NULL) {
+		if (flags & LK_THISLAYER)
+			return 0;	/* the lock is shared across layers */
+		flags &= ~LK_THISLAYER;
+		return (lockmgr(vp->v_vnlock, flags | LK_RELEASE,
+			&vp->v_interlock, p));
+	}
+	if ((flags & LK_THISLAYER) == 0) {
+		if (flags & LK_INTERLOCK)
+			simple_unlock(&vp->v_interlock);
+		VOP_UNLOCK(NULLVPTOLOWERVP(vp), flags & ~LK_INTERLOCK, p);
+	} else
+		flags &= ~LK_THISLAYER;
+	return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, p));
+}
+
+static int
+null_islocked(ap)
+	struct vop_islocked_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct proc *p = ap->a_p;
+
+	if (vp->v_vnlock != NULL)
+		return (lockstatus(vp->v_vnlock, p));
+	return (lockstatus(&vp->v_lock, p));
 }
 
+/*
+ * There is no way to tell that someone issued remove/rmdir operation
+ * on the underlying filesystem. For now we just have to release lowevrp
+ * as soon as possible.
+ */
 static int
 null_inactive(ap)
 	struct vop_inactive_args /* {
@@ -593,27 +701,34 @@ null_inactive(ap)
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
+	struct proc *p = ap->a_p;
 	struct null_node *xp = VTONULL(vp);
 	struct vnode *lowervp = xp->null_lowervp;
+
+	lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, p);
+	LIST_REMOVE(xp, null_hash);
+	lockmgr(&null_hashlock, LK_RELEASE, NULL, p);
+
+	xp->null_lowervp = NULLVP;
+	if (vp->v_vnlock != NULL) {
+		vp->v_vnlock = &vp->v_lock;	/* we no longer share the lock */
+	} else
+		VOP_UNLOCK(vp, LK_THISLAYER, p);
+
+	vput(lowervp);
 	/*
-	 * Do nothing (and _don't_ bypass).
-	 * Wait to vrele lowervp until reclaim,
-	 * so that until then our null_node is in the
-	 * cache and reusable.
-	 * We still have to tell the lower layer the vnode
-	 * is now inactive though.
-	 *
-	 * NEEDSWORK: Someday, consider inactive'ing
-	 * the lowervp and then trying to reactivate it
-	 * with capabilities (v_id)
-	 * like they do in the name lookup cache code.
-	 * That's too much work for now.
+	 * Now it is safe to drop references to the lower vnode.
+	 * VOP_INACTIVE() will be called by vrele() if necessary.
 	 */
-	VOP_INACTIVE(lowervp, ap->a_p);
-	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+	vrele (lowervp);
+
 	return (0);
 }
 
+/*
+ * We can free memory in null_inactive, but we do this
+ * here. (Possible to guard vp->v_data to point somewhere)
+ */
 static int
 null_reclaim(ap)
 	struct vop_reclaim_args /* {
@@ -622,21 +737,11 @@ null_reclaim(ap)
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
-	struct null_node *xp = VTONULL(vp);
-	struct vnode *lowervp = xp->null_lowervp;
+	void *vdata = vp->v_data;
 
-	/*
-	 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
-	 * so we can't call VOPs on ourself.
-	 */
-	/* After this assignment, this node will not be re-used. */
-	xp->null_lowervp = NULLVP;
-	lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, ap->a_p);
-	LIST_REMOVE(xp, null_hash);
-	lockmgr(&null_hashlock, LK_RELEASE, NULL, ap->a_p);
-	FREE(vp->v_data, M_TEMP);
 	vp->v_data = NULL;
-	vrele (lowervp);
+	FREE(vdata, M_NULLFSNODE);
+
 	return (0);
 }
 
@@ -652,16 +757,74 @@ null_print(ap)
 }
 
 /*
+ * Let an underlying filesystem do the work
+ */
+static int
+null_createvobject(ap)
+	struct vop_createvobject_args /* {
+		struct vnode *vp;
+		struct ucred *cred;
+		struct proc *p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL;
+	int error;
+
+	if (vp->v_type == VNON || lowervp == NULL)
+		return 0;
+	error = VOP_CREATEVOBJECT(lowervp, ap->a_cred, ap->a_p);
+	if (error)
+		return (error);
+	vp->v_flag |= VOBJBUF;
+	return (0);
+}
+
+/*
+ * We have nothing to destroy and this operation shouldn't be bypassed.
+ */
+static int
+null_destroyvobject(ap)
+	struct vop_destroyvobject_args /* {
+		struct vnode *vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	vp->v_flag &= ~VOBJBUF;
+	return (0);
+}
+
+static int
+null_getvobject(ap)
+	struct vop_getvobject_args /* {
+		struct vnode *vp;
+		struct vm_object **objpp;
+	} */ *ap;
+{
+	struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp);
+
+	if (lvp == NULL)
+		return EINVAL;
+	return (VOP_GETVOBJECT(lvp, ap->a_objpp));
+}
+
+/*
  * Global vfs data structures
  */
 vop_t **null_vnodeop_p;
 static struct vnodeopv_entry_desc null_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) null_bypass },
+
 	{ &vop_access_desc,		(vop_t *) null_access },
 	{ &vop_bmap_desc,		(vop_t *) vop_eopnotsupp },
+	{ &vop_createvobject_desc,	(vop_t *) null_createvobject },
+	{ &vop_destroyvobject_desc,	(vop_t *) null_destroyvobject },
 	{ &vop_getattr_desc,		(vop_t *) null_getattr },
+	{ &vop_getvobject_desc,		(vop_t *) null_getvobject },
 	{ &vop_getwritemount_desc,	(vop_t *) vop_stdgetwritemount},
 	{ &vop_inactive_desc,		(vop_t *) null_inactive },
+	{ &vop_islocked_desc,		(vop_t *) null_islocked },
 	{ &vop_lock_desc,		(vop_t *) null_lock },
 	{ &vop_lookup_desc,		(vop_t *) null_lookup },
 	{ &vop_open_desc,		(vop_t *) null_open },
author	bp <bp@FreeBSD.org>	2000-09-25 15:38:32 +0000
committer	bp <bp@FreeBSD.org>	2000-09-25 15:38:32 +0000
commit	c2ae01d2e9194f6a0c8ce1d9795c7cfe665c3203 (patch)
tree	9f45d94548bec2859f78dc84dbe4e1df02ec23df /sys/fs/nullfs/null_vnops.c
parent	6110b03d2438de0cb3f47dbce3cec5c1dfa712f6 (diff)
download	FreeBSD-src-c2ae01d2e9194f6a0c8ce1d9795c7cfe665c3203.zip FreeBSD-src-c2ae01d2e9194f6a0c8ce1d9795c7cfe665c3203.tar.gz