This is a major fixup of unionfs. At least 30 serious bugs have been

fixed (many due to changing semantics in other parts of the kernel and not the original author's fault), including one critical one: unionfs could cause UFS corruption in the fronting store due to calling VOP_OPEN for writing without turning on vmio for the UFS vnode. Most of the bugs were related to semantics changes in VOP calls, lock ordering problems (causing deadlocks), improper handling of a read-only backing store (such as an NFS mount), improper referencing and locking of vnodes, not using real struct locks for vnode locking, not using recursive locks when accessing the fronting store, and things like that. New functionality has been added: unionfs now has mmap() support, but only partially tested, and rename has been enhanced considerably. There are still some things that unionfs cannot do. You cannot rename a directory without confusing unionfs, and there are issues with softlinks, hardlinks, and special files. unionfs mostly doesn't understand them (and never did). There are probably still panic situations, but hopefully no where near as many as before this commit. The unionfs in this commit has been tested overlayed on /usr/src (backing /usr/src being a read-only NFS mount, fronting /usr/src being a local filesystem). kernel builds have been tested, buildworld is undergoing testing. More testing is necessary.
author: dillon <dillon@FreeBSD.org> 1999-09-26 20:52:41 +0000
committer: dillon <dillon@FreeBSD.org> 1999-09-26 20:52:41 +0000
commit: 3d3ec302ae149b2f4e4a320e614ece22abcad72d (patch)
tree: 4e32ca35a1989bfd056b9e2e048f39b4cd97ab7f /sys/fs/unionfs/union_subr.c
parent: d4b4260008992958840fe8440a39c277f957c88b (diff)
download: FreeBSD-src-3d3ec302ae149b2f4e4a320e614ece22abcad72d.zip
FreeBSD-src-3d3ec302ae149b2f4e4a320e614ece22abcad72d.tar.gz
1 files changed, 348 insertions, 196 deletions
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
index ed09a65..c03153c 100644
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -53,6 +53,7 @@
 #include <vm/vm.h>
 #include <vm/vm_extern.h>	/* for vnode_pager_setsize */
 #include <vm/vm_zone.h>
+#include <vm/vm_object.h>	/* for vm cache coherency */
 #include <miscfs/union/union.h>
 
 #include <sys/proc.h>
@@ -97,7 +98,7 @@ union_init()
 
 	for (i = 0; i < NHASH; i++)
 		LIST_INIT(&unhead[i]);
-	bzero((caddr_t) unvplock, sizeof(unvplock));
+	bzero((caddr_t)unvplock, sizeof(unvplock));
 	return (0);
 }
 
@@ -105,15 +106,12 @@ static int
 union_list_lock(ix)
 	int ix;
 {
-
-	if (unvplock[ix] & UN_LOCKED) {
-		unvplock[ix] |= UN_WANT;
+	if (unvplock[ix] & UNVP_LOCKED) {
+		unvplock[ix] |= UNVP_WANT;
 		(void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0);
 		return (1);
 	}
-
-	unvplock[ix] |= UN_LOCKED;
-
+	unvplock[ix] |= UNVP_LOCKED;
 	return (0);
 }
 
@@ -121,15 +119,25 @@ static void
 union_list_unlock(ix)
 	int ix;
 {
+	unvplock[ix] &= ~UNVP_LOCKED;
 
-	unvplock[ix] &= ~UN_LOCKED;
-
-	if (unvplock[ix] & UN_WANT) {
-		unvplock[ix] &= ~UN_WANT;
+	if (unvplock[ix] & UNVP_WANT) {
+		unvplock[ix] &= ~UNVP_WANT;
 		wakeup((caddr_t) &unvplock[ix]);
 	}
 }
 
+/*
+ *	union_updatevp:
+ *
+ *	The uppervp, if not NULL, must be referenced and not locked by us
+ *	The lowervp, if not NULL, must be referenced.
+ *
+ *	if uppervp and lowervp match pointers already installed, nothing
+ *	happens. The passed vp's (when matching) are not adjusted.  This
+ *	routine may only be called by union_newupper() and union_newlower().
+ */
+
 static void
 union_updatevp(un, uppervp, lowervp)
 	struct union_node *un;
@@ -153,9 +161,10 @@ union_updatevp(un, uppervp, lowervp)
 		uhash = nhash;
 	}
 
-	if (lhash != uhash)
+	if (lhash != uhash) {
 		while (union_list_lock(lhash))
 			continue;
+	}
 
 	while (union_list_lock(uhash))
 		continue;
@@ -177,10 +186,6 @@ union_updatevp(un, uppervp, lowervp)
 				free(un->un_path, M_TEMP);
 				un->un_path = 0;
 			}
-			if (un->un_dirvp) {
-				vrele(un->un_dirvp);
-				un->un_dirvp = NULLVP;
-			}
 		}
 		un->un_lowervp = lowervp;
 		un->un_lowersz = VNOVAL;
@@ -189,7 +194,6 @@ union_updatevp(un, uppervp, lowervp)
 	if (un->un_uppervp != uppervp) {
 		if (un->un_uppervp)
 			vrele(un->un_uppervp);
-
 		un->un_uppervp = uppervp;
 		un->un_uppersz = VNOVAL;
 	}
@@ -202,21 +206,30 @@ union_updatevp(un, uppervp, lowervp)
 	union_list_unlock(nhash);
 }
 
+/*
+ * Set a new lowervp.  The passed lowervp must be referenced and will be
+ * stored in the vp in a referenced state. 
+ */
+
 static void
 union_newlower(un, lowervp)
 	struct union_node *un;
 	struct vnode *lowervp;
 {
-
 	union_updatevp(un, un->un_uppervp, lowervp);
 }
 
+/*
+ * Set a new uppervp.  The passed uppervp must be locked and will be 
+ * stored in the vp in a locked state.  The caller should not unlock
+ * uppervp.
+ */
+
 static void
 union_newupper(un, uppervp)
 	struct union_node *un;
 	struct vnode *uppervp;
 {
-
 	union_updatevp(un, uppervp, un->un_lowervp);
 }
 
@@ -253,27 +266,51 @@ union_newsize(vp, uppersz, lowersz)
 	}
 
 	if (sz != VNOVAL) {
-#ifdef DEBUG
-		printf("union: %s size now %ld\n",
-			uppersz != VNOVAL ? "upper" : "lower", (long) sz);
-#endif
+		UDEBUG(("union: %s size now %ld\n",
+			(uppersz != VNOVAL ? "upper" : "lower"), (long)sz));
 		vnode_pager_setsize(vp, sz);
 	}
 }
 
 /*
- * allocate a union_node/vnode pair.  the vnode is
- * referenced and locked.  the new vnode is returned
- * via (vpp).  (mp) is the mountpoint of the union filesystem,
- * (dvp) is the parent directory where the upper layer object
- * should exist (but doesn't) and (cnp) is the componentname
- * information which is partially copied to allow the upper
- * layer object to be created at a later time.  (uppervp)
- * and (lowervp) reference the upper and lower layer objects
- * being mapped.  either, but not both, can be nil.
- * if supplied, (uppervp) is locked.
- * the reference is either maintained in the new union_node
- * object which is allocated, or they are vrele'd.
+ *	union_allocvp:	allocate a union_node and associate it with a
+ *			parent union_node and one or two vnodes.
+ *
+ *	vpp	Holds the returned vnode locked and referenced if no 
+ *		error occurs.
+ *
+ *	mp	Holds the mount point.  mp may or may not be busied. 
+ *		allocvp makes no changes to mp.
+ *
+ *	dvp	Holds the parent union_node to the one we wish to create.
+ *		XXX may only be used to traverse an uncopied lowervp-based
+ *		tree?  XXX
+ *
+ *		dvp may or may not be locked.  allocvp makes no changes
+ *		to dvp.
+ *
+ *	upperdvp Holds the parent vnode to uppervp, generally used along
+ *		with path component information to create a shadow of
+ *		lowervp when uppervp does not exist.
+ *
+ *		upperdvp is referenced but unlocked on entry, and will be
+ *		dereferenced on return.
+ *
+ *	uppervp	Holds the new uppervp vnode to be stored in the 
+ *		union_node we are allocating.  uppervp is referenced but
+ *		not locked, and will be dereferenced on return.
+ *
+ *	lowervp	Holds the new lowervp vnode to be stored in the
+ *		union_node we are allocating.  uppervp is referenced but
+ *		not locked, and will be dereferenced on return.
+ * 
+ *	cnp	Holds path component information to be coupled with
+ *		lowervp and upperdvp to allow unionfs to create an uppervp
+ *		later on.  Only used if lowervp is valid.  The conents
+ *		of cnp is only valid for the duration of the call.
+ *
+ *	docache	Determine whether this node should be entered in the
+ *		cache or whether it should be destroyed as soon as possible.
  *
  * all union_nodes are maintained on a singly-linked
  * list.  new nodes are only allocated when they cannot
@@ -292,12 +329,13 @@ union_newsize(vp, uppersz, lowersz)
  * zero references to it and so it needs to removed from
  * the vnode free list.
  */
+
 int
-union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
+union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache)
 	struct vnode **vpp;
 	struct mount *mp;
-	struct vnode *undvp;		/* parent union vnode */
-	struct vnode *dvp;		/* may be null */
+	struct vnode *dvp;		/* parent union vnode */
+	struct vnode *upperdvp;		/* parent vnode of uppervp */
 	struct componentname *cnp;	/* may be null */
 	struct vnode *uppervp;		/* may be null */
 	struct vnode *lowervp;		/* may be null */
@@ -307,6 +345,7 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
 	struct union_node *un = 0;
 	struct vnode *xlowervp = NULLVP;
 	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	struct proc *p = (cnp) ? cnp->cn_proc : curproc;
 	int hash = 0;
 	int vflag;
 	int try;
@@ -382,65 +421,76 @@ loop:
 
 	if (un) {
 		/*
-		 * Obtain a lock on the union_node.
-		 * uppervp is locked, though un->un_uppervp
-		 * may not be.  this doesn't break the locking
-		 * hierarchy since in the case that un->un_uppervp
-		 * is not yet locked it will be vrele'd and replaced
-		 * with uppervp.
+		 * Obtain a lock on the union_node.  Everything is unlocked
+		 * except for dvp, so check that case.  If they match, our
+		 * new un is already locked.  Otherwise we have to lock our
+		 * new un.
+		 *
+		 * A potential deadlock situation occurs when we are holding
+		 * one lock while trying to get another.  We must follow 
+		 * strict ordering rules to avoid it.  We try to locate dvp
+		 * by scanning up from un_vnode, since the most likely 
+		 * scenario is un being under dvp.
 		 */
 
-		if ((dvp != NULLVP) && (uppervp == dvp)) {
+		if (dvp && un->un_vnode != dvp) {
+			struct vnode *scan = un->un_vnode;
+
+			do {
+				scan = VTOUNION(scan)->un_pvp;
+			} while (scan && scan->v_tag == VT_UNION && scan != dvp);
+			if (scan != dvp) {
+				/*
+				 * our new un is above dvp (we never saw dvp
+				 * while moving up the tree).
+				 */
+				VREF(dvp);
+				VOP_UNLOCK(dvp, 0, p);
+				error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p);
+				vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+				vrele(dvp);
+			} else {
+				/*
+				 * our new un is under dvp
+				 */
+				error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p);
+			}
+		} else if (dvp == NULLVP) {
 			/*
-			 * Access ``.'', so (un) will already
-			 * be locked.  Since this process has
-			 * the lock on (uppervp) no other
-			 * process can hold the lock on (un).
+			 * dvp is NULL, we need to lock un.
 			 */
-#ifdef DIAGNOSTIC
-			if ((un->un_flags & UN_LOCKED) == 0)
-				panic("union: . not locked");
-			else if (curproc && un->un_pid != curproc->p_pid &&
-				    un->un_pid > -1 && curproc->p_pid > -1)
-				panic("union: allocvp not lock owner");
-#endif
+			error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p);
 		} else {
-			if (un->un_flags & UN_LOCKED) {
-				vrele(UNIONTOV(un));
-				un->un_flags |= UN_WANT;
-				(void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0);
-				goto loop;
-			}
-			un->un_flags |= UN_LOCKED;
-
-#ifdef DIAGNOSTIC
-			if (curproc)
-				un->un_pid = curproc->p_pid;
-			else
-				un->un_pid = -1;
-#endif
+			/*
+			 * dvp == un->un_vnode, we are already locked.
+			 */
+			error = 0;
 		}
 
-		/*
-		 * At this point, the union_node is locked,
-		 * un->un_uppervp may not be locked, and uppervp
-		 * is locked or nil.
-		 */
+		if (error)
+			goto loop;
 
 		/*
-		 * Save information about the upper layer.
+		 * At this point, the union_node is locked and referenced.
+		 *
+		 * uppervp is locked and referenced or NULL, lowervp is
+		 * referenced or NULL.
 		 */
+		UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n",
+			un, un->un_vnode, un->un_uppervp, 
+			(un->un_uppervp ? un->un_uppervp->v_usecount : -99),
+			uppervp,
+			(uppervp ? uppervp->v_usecount : -99)
+		));
+
 		if (uppervp != un->un_uppervp) {
+			KASSERT(uppervp == NULL || uppervp->v_usecount > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", uppervp->v_usecount));
 			union_newupper(un, uppervp);
 		} else if (uppervp) {
+			KASSERT(uppervp->v_usecount > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", uppervp->v_usecount));
 			vrele(uppervp);
 		}
 
-		if (un->un_uppervp) {
-			un->un_flags |= UN_ULOCK;
-			un->un_flags &= ~UN_KLOCK;
-		}
-
 		/*
 		 * Save information about the lower layer.
 		 * This needs to keep track of pathname
@@ -456,12 +506,22 @@ loop:
 				bcopy(cnp->cn_nameptr, un->un_path,
 						cnp->cn_namelen);
 				un->un_path[cnp->cn_namelen] = '\0';
-				VREF(dvp);
-				un->un_dirvp = dvp;
 			}
 		} else if (lowervp) {
 			vrele(lowervp);
 		}
+
+		/*
+		 * and upperdvp
+		 */
+		if (upperdvp != un->un_dirvp) {
+			if (un->un_dirvp)
+				vrele(un->un_dirvp);
+			un->un_dirvp = upperdvp;
+		} else if (upperdvp) {
+			vrele(upperdvp);
+		}
+
 		*vpp = UNIONTOV(un);
 		return (0);
 	}
@@ -477,17 +537,22 @@ loop:
 			goto loop;
 	}
 
+	/*
+	 * Create new node rather then replace old node
+	 */
+
 	error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
 	if (error) {
-		if (uppervp) {
-			if (dvp == uppervp)
-				vrele(uppervp);
-			else
-				vput(uppervp);
-		}
+		/*
+		 * If an error occurs clear out vnodes.
+		 */
 		if (lowervp)
 			vrele(lowervp);
-
+		if (uppervp) 
+			vrele(uppervp);
+		if (upperdvp)
+			vrele(upperdvp);
+		*vpp = NULL;
 		goto out;
 	}
 
@@ -499,37 +564,34 @@ loop:
 		(*vpp)->v_type = uppervp->v_type;
 	else
 		(*vpp)->v_type = lowervp->v_type;
+
 	un = VTOUNION(*vpp);
+	bzero(un, sizeof(*un));
+
+	lockinit(&un->un_lock, PVFS, "unlock", 0, 0);
+	vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);
+
 	un->un_vnode = *vpp;
 	un->un_uppervp = uppervp;
 	un->un_uppersz = VNOVAL;
 	un->un_lowervp = lowervp;
 	un->un_lowersz = VNOVAL;
-	un->un_pvp = undvp;
-	if (undvp != NULLVP)
-		VREF(undvp);
+	un->un_dirvp = upperdvp;
+	un->un_pvp = dvp;		/* only parent dir in new allocation */
+	if (dvp != NULLVP)
+		VREF(dvp);
 	un->un_dircache = 0;
 	un->un_openl = 0;
-	un->un_flags = UN_LOCKED;
-	if (un->un_uppervp)
-		un->un_flags |= UN_ULOCK;
-#ifdef DIAGNOSTIC
-	if (curproc)
-		un->un_pid = curproc->p_pid;
-	else
-		un->un_pid = -1;
-#endif
+
 	if (cnp && (lowervp != NULLVP)) {
 		un->un_hash = cnp->cn_hash;
 		un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
 		bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
 		un->un_path[cnp->cn_namelen] = '\0';
-		VREF(dvp);
-		un->un_dirvp = dvp;
 	} else {
 		un->un_hash = 0;
 		un->un_path = 0;
-		un->un_dirvp = 0;
+		un->un_dirvp = NULL;
 	}
 
 	if (docache) {
@@ -537,10 +599,10 @@ loop:
 		un->un_flags |= UN_CACHED;
 	}
 
+out:
 	if (xlowervp)
 		vrele(xlowervp);
 
-out:
 	if (docache)
 		union_list_unlock(hash);
 
@@ -558,16 +620,26 @@ union_freevp(vp)
 		LIST_REMOVE(un, un_cache);
 	}
 
-	if (un->un_pvp != NULLVP)
+	if (un->un_pvp != NULLVP) {
 		vrele(un->un_pvp);
-	if (un->un_uppervp != NULLVP)
+		un->un_pvp = NULL;
+	}
+	if (un->un_uppervp != NULLVP) {
 		vrele(un->un_uppervp);
-	if (un->un_lowervp != NULLVP)
+		un->un_uppervp = NULL;
+	}
+	if (un->un_lowervp != NULLVP) {
 		vrele(un->un_lowervp);
-	if (un->un_dirvp != NULLVP)
+		un->un_lowervp = NULL;
+	}
+	if (un->un_dirvp != NULLVP) {
 		vrele(un->un_dirvp);
-	if (un->un_path)
+		un->un_dirvp = NULL;
+	}
+	if (un->un_path) {
 		free(un->un_path, M_TEMP);
+		un->un_path = NULL;
+	}
 
 	FREE(vp->v_data, M_TEMP);
 	vp->v_data = 0;
@@ -579,6 +651,9 @@ union_freevp(vp)
  * copyfile.  copy the vnode (fvp) to the vnode (tvp)
  * using a sequence of reads and writes.  both (fvp)
  * and (tvp) are locked on entry and exit.
+ *
+ * fvp and tvp are both exclusive locked on call, but their refcount's
+ * haven't been bumped at all.
  */
 static int
 union_copyfile(fvp, tvp, cred, p)
@@ -600,48 +675,62 @@ union_copyfile(fvp, tvp, cred, p)
 	 * give up at the first sign of trouble.
 	 */
 
+	bzero(&uio, sizeof(uio));
+
 	uio.uio_procp = p;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_offset = 0;
 
-	VOP_UNLOCK(fvp, 0, p);				/* XXX */
 	VOP_LEASE(fvp, p, cred, LEASE_READ);
-	vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
-	VOP_UNLOCK(tvp, 0, p);				/* XXX */
 	VOP_LEASE(tvp, p, cred, LEASE_WRITE);
-	vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
 
 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
 
 	/* ugly loop follows... */
 	do {
 		off_t offset = uio.uio_offset;
+		int count;
+		int bufoffset;
 
+		/*
+		 * Setup for big read
+		 */
 		uio.uio_iov = &iov;
 		uio.uio_iovcnt = 1;
 		iov.iov_base = buf;
 		iov.iov_len = MAXBSIZE;
 		uio.uio_resid = iov.iov_len;
 		uio.uio_rw = UIO_READ;
-		error = VOP_READ(fvp, &uio, 0, cred);
 
-		if (error == 0) {
+		if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0)
+			break;
+
+		/*
+		 * Get bytes read, handle read eof case and setup for
+		 * write loop
+		 */
+		if ((count = MAXBSIZE - uio.uio_resid) == 0)
+			break;
+		bufoffset = 0;
+
+		/*
+		 * Write until an error occurs or our buffer has been
+		 * exhausted, then update the offset for the next read.
+		 */
+		while (bufoffset < count) {
 			uio.uio_iov = &iov;
 			uio.uio_iovcnt = 1;
-			iov.iov_base = buf;
-			iov.iov_len = MAXBSIZE - uio.uio_resid;
-			uio.uio_offset = offset;
+			iov.iov_base = buf + bufoffset;
+			iov.iov_len = count - bufoffset;
+			uio.uio_offset = offset + bufoffset;
 			uio.uio_rw = UIO_WRITE;
 			uio.uio_resid = iov.iov_len;
 
-			if (uio.uio_resid == 0)
+			if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0)
 				break;
-
-			do {
-				error = VOP_WRITE(tvp, &uio, 0, cred);
-			} while ((uio.uio_resid > 0) && (error == 0));
+			bufoffset += (count - bufoffset) - uio.uio_resid;
 		}
-
+		uio.uio_offset = offset + bufoffset;
 	} while (error == 0);
 
 	free(buf, M_TEMP);
@@ -649,9 +738,10 @@ union_copyfile(fvp, tvp, cred, p)
 }
 
 /*
- * (un) is assumed to be locked on entry and remains
- * locked on exit.
+ *
+ * un's vnode is assumed to be locked on entry and remains locked on exit.
  */
+
 int
 union_copyup(un, docopy, cred, p)
 	struct union_node *un;
@@ -676,12 +766,9 @@ union_copyup(un, docopy, cred, p)
 	if (error)
 		return (error);
 
-	/* at this point, uppervp is locked */
-	union_newupper(un, uvp);
-	un->un_flags |= UN_ULOCK;
-
 	lvp = un->un_lowervp;
 
+	KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
 	if (docopy) {
 		/*
 		 * XX - should not ignore errors
@@ -689,23 +776,22 @@ union_copyup(un, docopy, cred, p)
 		 */
 		vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_OPEN(lvp, FREAD, cred, p);
+		if (error == 0 && vn_canvmio(lvp) == TRUE)
+			error = vfs_object_create(lvp, p, cred);
 		if (error == 0) {
 			error = union_copyfile(lvp, uvp, cred, p);
 			VOP_UNLOCK(lvp, 0, p);
 			(void) VOP_CLOSE(lvp, FREAD, cred, p);
 		}
-#ifdef DEBUG
 		if (error == 0)
-			uprintf("union: copied up %s\n", un->un_path);
-#endif
+			UDEBUG(("union: copied up %s\n", un->un_path));
 
 	}
-	un->un_flags &= ~UN_ULOCK;
 	VOP_UNLOCK(uvp, 0, p);
+	union_newupper(un, uvp);
+	KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
 	union_vn_close(uvp, FWRITE, cred, p);
-	vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p);
-	un->un_flags |= UN_ULOCK;
-
+	KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
 	/*
 	 * Subsequent IOs will go to the top layer, so
 	 * call close on the lower vnode and open on the
@@ -721,6 +807,8 @@ union_copyup(un, docopy, cred, p)
 			(void) VOP_CLOSE(lvp, FREAD, cred, p);
 			(void) VOP_OPEN(uvp, FREAD, cred, p);
 		}
+		if (vn_canvmio(uvp) == TRUE)
+			error = vfs_object_create(uvp, p, cred);
 		un->un_openl = 0;
 	}
 
@@ -728,6 +816,17 @@ union_copyup(un, docopy, cred, p)
 
 }
 
+/*
+ *	union_relookup:
+ *
+ *	dvp should be locked on entry and will be locked on return.  No
+ *	net change in the ref count will occur.
+ *
+ *	If an error is returned, *vpp will be invalid, otherwise it
+ *	will hold a locked, referenced vnode.  If *vpp == dvp then
+ *	remember that only one exclusive lock is held.
+ */
+
 static int
 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
 	struct union_mount *um;
@@ -757,7 +856,7 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
 	cn->cn_pnbuf[cn->cn_namelen] = '\0';
 
 	cn->cn_nameiop = CREATE;
-	cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+	cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN);
 	cn->cn_proc = cnp->cn_proc;
 	if (um->um_op == UNMNT_ABOVE)
 		cn->cn_cred = cnp->cn_cred;
@@ -768,15 +867,30 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
 	cn->cn_consume = cnp->cn_consume;
 
 	VREF(dvp);
-	error = relookup(dvp, vpp, cn);
-	if (!error)
-		vrele(dvp);
-	else {
+	VOP_UNLOCK(dvp, 0, cnp->cn_proc);
+
+	/*
+	 * Pass dvp unlocked and referenced on call to relookup().
+	 *
+	 * If an error occurs, dvp will be returned unlocked and dereferenced.
+	 */
+
+	if ((error = relookup(dvp, vpp, cn)) != 0) {
 		zfree(namei_zone, cn->cn_pnbuf);
 		cn->cn_pnbuf = NULL;
+		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_proc);
+		return(error);
 	}
 
-	return (error);
+	/*
+	 * If no error occurs, dvp will be returned locked with the reference
+	 * left as before, and vpp will be returned referenced and locked.
+	 *
+	 * We want to return with dvp as it was passed to us, so we get
+	 * rid of our reference.
+	 */
+	vrele(dvp);
+	return (0);
 }
 
 /*
@@ -785,11 +899,11 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
  *
  * (um) points to the union mount structure for access to the
  * the mounting process's credentials.
- * (dvp) is the directory in which to create the shadow directory.
- * it is unlocked on entry and exit.
+ * (dvp) is the directory in which to create the shadow directory,
+ * it is locked (but not ref'd) on entry and return.
  * (cnp) is the componentname to be created.
  * (vpp) is the returned newly created shadow directory, which
- * is returned locked.
+ * is returned locked and ref'd
  */
 int
 union_mkshadow(um, dvp, cnp, vpp)
@@ -810,8 +924,10 @@ union_mkshadow(um, dvp, cnp, vpp)
 
 	if (*vpp) {
 		VOP_ABORTOP(dvp, &cn);
-		VOP_UNLOCK(dvp, 0, p);
-		vrele(*vpp);
+		if (dvp == *vpp)
+			vrele(*vpp);
+		else
+			vput(*vpp);
 		*vpp = NULLVP;
 		return (EEXIST);
 	}
@@ -832,7 +948,7 @@ union_mkshadow(um, dvp, cnp, vpp)
 	VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE);
 
 	error = VOP_MKDIR(dvp, vpp, &cn, &va);
-	vput(dvp);
+	/*vput(dvp);*/
 	return (error);
 }
 
@@ -842,7 +958,7 @@ union_mkshadow(um, dvp, cnp, vpp)
  * (um) points to the union mount structure for access to the
  * the mounting process's credentials.
  * (dvp) is the directory in which to create the whiteout.
- * it is locked on entry and exit.
+ * it is locked on entry and return.
  * (cnp) is the componentname to be created.
  */
 int
@@ -857,17 +973,16 @@ union_mkwhiteout(um, dvp, cnp, path)
 	struct vnode *wvp;
 	struct componentname cn;
 
-	VOP_UNLOCK(dvp, 0, p);
 	error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
-	if (error) {
-		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+	if (error)
 		return (error);
-	}
 
 	if (wvp) {
 		VOP_ABORTOP(dvp, &cn);
-		vrele(dvp);
-		vrele(wvp);
+		if (wvp == dvp)
+			vrele(wvp);
+		else
+			vput(wvp);
 		return (EEXIST);
 	}
 
@@ -877,9 +992,6 @@ union_mkwhiteout(um, dvp, cnp, path)
 	error = VOP_WHITEOUT(dvp, &cn, CREATE);
 	if (error)
 		VOP_ABORTOP(dvp, &cn);
-
-	vrele(dvp);
-
 	return (error);
 }
 
@@ -890,6 +1002,12 @@ union_mkwhiteout(um, dvp, cnp, path)
  * the problem with calling namei is that a) it locks too many
  * things, and b) it doesn't start at the "right" directory,
  * whereas relookup is told where to start.
+ *
+ * On entry, the vnode associated with un is locked.  It remains locked
+ * on return.
+ *
+ * If no error occurs, *vpp contains a locked referenced vnode for your
+ * use.  If an error occurs *vpp iis undefined.
  */
 static int
 union_vn_create(vpp, un, p)
@@ -921,26 +1039,34 @@ union_vn_create(vpp, un, p)
 	cn.cn_pnbuf = zalloc(namei_zone);
 	bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
 	cn.cn_nameiop = CREATE;
-	cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+	cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN);
 	cn.cn_proc = p;
 	cn.cn_cred = p->p_ucred;
 	cn.cn_nameptr = cn.cn_pnbuf;
 	cn.cn_hash = un->un_hash;
 	cn.cn_consume = 0;
 
+	/*
+	 * Pass dvp unlocked and referenced on call to relookup().
+	 *
+	 * If an error occurs, dvp will be returned unlocked and dereferenced.
+	 */
 	VREF(un->un_dirvp);
 	error = relookup(un->un_dirvp, &vp, &cn);
 	if (error)
 		return (error);
-	vrele(un->un_dirvp);
 
+	/*
+	 * If no error occurs, dvp will be returned locked with the reference
+	 * left as before, and vpp will be returned referenced and locked.
+	 */
 	if (vp) {
+		vput(un->un_dirvp);
 		VOP_ABORTOP(un->un_dirvp, &cn);
-		if (un->un_dirvp == vp)
-			vrele(un->un_dirvp);
+		if (vp == un->un_dirvp)
+			vrele(vp);
 		else
-			vput(un->un_dirvp);
-		vrele(vp);
+			vput(vp);
 		return (EEXIST);
 	}
 
@@ -964,11 +1090,12 @@ union_vn_create(vpp, un, p)
 		return (error);
 
 	error = VOP_OPEN(vp, fmode, cred, p);
+	if (error == 0 && vn_canvmio(vp) == TRUE)
+		error = vfs_object_create(vp, p, cred);
 	if (error) {
 		vput(vp);
 		return (error);
 	}
-
 	vp->v_writecount++;
 	*vpp = vp;
 	return (0);
@@ -987,6 +1114,14 @@ union_vn_close(vp, fmode, cred, p)
 	return (VOP_CLOSE(vp, fmode, cred, p));
 }
 
+#if 0
+
+/*
+ *	union_removed_upper:
+ *
+ *	called with union_node unlocked. XXX
+ */
+
 void
 union_removed_upper(un)
 	struct union_node *un;
@@ -999,9 +1134,7 @@ union_removed_upper(un)
 	 * union node will have neither uppervp nor lowervp.  We remove
 	 * the union node from cache, so that it will not be referrenced.
 	 */
-#if 0
 	union_newupper(un, NULLVP);
-#endif
 	if (un->un_dircache != 0) {
 		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
 			vrele(*vpp);
@@ -1013,28 +1146,8 @@ union_removed_upper(un)
 		un->un_flags &= ~UN_CACHED;
 		LIST_REMOVE(un, un_cache);
 	}
-
-	if (un->un_flags & UN_ULOCK) {
-		un->un_flags &= ~UN_ULOCK;
-		VOP_UNLOCK(un->un_uppervp, 0, p);
-	}
 }
 
-#if 0
-struct vnode *
-union_lowervp(vp)
-	struct vnode *vp;
-{
-	struct union_node *un = VTOUNION(vp);
-
-	if ((un->un_lowervp != NULLVP) &&
-	    (vp->v_type == un->un_lowervp->v_type)) {
-		if (vget(un->un_lowervp, 0) == 0)
-			return (un->un_lowervp);
-	}
-
-	return (NULLVP);
-}
 #endif
 
 /*
@@ -1104,13 +1217,12 @@ union_dircache(vp, p)
 
 	nvp = NULLVP;
 
-	if (dircache == 0) {
+	if (dircache == NULL) {
 		cnt = 0;
 		union_dircache_r(vp, 0, &cnt);
 		cnt++;
-		dircache = (struct vnode **)
-				malloc(cnt * sizeof(struct vnode *),
-					M_TEMP, M_WAITOK);
+		dircache = malloc(cnt * sizeof(struct vnode *),
+				M_TEMP, M_WAITOK);
 		vpp = dircache;
 		union_dircache_r(vp, &vpp, &cnt);
 		*vpp = NULLVP;
@@ -1126,9 +1238,11 @@ union_dircache(vp, p)
 	if (*vpp == NULLVP)
 		goto out;
 
-	vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);
+	/*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);*/
+	UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? (*vpp)->v_usecount : -99)));
 	VREF(*vpp);
-	error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
+	error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0);
+	UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? (*vpp)->v_usecount : -99)));
 	if (error)
 		goto out;
 
@@ -1142,6 +1256,40 @@ out:
 }
 
 /*
+ * Guarentee coherency with the VM cache by invalidating any clean VM pages
+ * associated with this write and updating any dirty VM pages.  Since our
+ * vnode is locked, other processes will not be able to read the pages in
+ * again until after our write completes.
+ *
+ * We also have to be coherent with reads, by flushing any pending dirty
+ * pages prior to issuing the read.
+ *
+ * XXX this is somewhat of a hack at the moment.  To support this properly
+ * we would have to be able to run VOP_READ and VOP_WRITE through the VM
+ * cache.  Then we wouldn't need to worry about coherency.
+ */
+
+void 
+union_vm_coherency(struct vnode *vp, struct uio *uio, int cleanfls)
+{
+	vm_object_t object;
+	vm_pindex_t pstart;
+	vm_pindex_t pend;
+	int pgoff;
+
+	if ((object = vp->v_object) == NULL)
+	    return;
+
+	pgoff = uio->uio_offset & PAGE_MASK;
+	pstart = uio->uio_offset / PAGE_SIZE;
+	pend = pstart + (uio->uio_resid + pgoff + PAGE_MASK) / PAGE_SIZE;
+
+	vm_object_page_clean(object, pstart, pend, OBJPC_SYNC);
+	if (cleanfls)
+		vm_object_page_remove(object, pstart, pend, TRUE);
+}
+
+/*
  * Module glue to remove #ifdef UNION from vfs_syscalls.c
  */
 static int
@@ -1169,6 +1317,8 @@ union_dircheck(struct proc *p, struct vnode **vp, struct file *fp)
 
 		if (lvp != NULLVP) {
 			error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
+			if (error == 0 && vn_canvmio(lvp) == TRUE)
+				error = vfs_object_create(lvp, p, fp->f_cred);
 			if (error) {
 				vput(lvp);
 				return (error);
@@ -1201,9 +1351,11 @@ union_modevent(module_t mod, int type, void *data)
 	}
 	return 0;
 }
+
 static moduledata_t union_mod = {
 	"union_dircheck",
 	union_modevent,
 	NULL
 };
+
 DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY);
author	dillon <dillon@FreeBSD.org>	1999-09-26 20:52:41 +0000
committer	dillon <dillon@FreeBSD.org>	1999-09-26 20:52:41 +0000
commit	3d3ec302ae149b2f4e4a320e614ece22abcad72d (patch)
tree	4e32ca35a1989bfd056b9e2e048f39b4cd97ab7f /sys/fs/unionfs/union_subr.c
parent	d4b4260008992958840fe8440a39c277f957c88b (diff)
download	FreeBSD-src-3d3ec302ae149b2f4e4a320e614ece22abcad72d.zip FreeBSD-src-3d3ec302ae149b2f4e4a320e614ece22abcad72d.tar.gz