4 files changed, 4018 insertions, 0 deletions
diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h
new file mode 100644
index 0000000..3fb0b6e
--- /dev/null
+++ b/sys/fs/unionfs/union.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union.h	8.9 (Berkeley) 12/10/94
+ * $FreeBSD$
+ */
+
+struct union_args {
+	char		*target;	/* Target of loopback  */
+	int		mntflags;	/* Options on the mount */
+};
+
+#define UNMNT_ABOVE	0x0001		/* Target appears below mount point */
+#define UNMNT_BELOW	0x0002		/* Target appears below mount point */
+#define UNMNT_REPLACE	0x0003		/* Target replaces mount point */
+#define UNMNT_OPMASK	0x0003
+
+struct union_mount {
+	struct vnode	*um_uppervp;	/* UN_ULOCK holds locking state */
+	struct vnode	*um_lowervp;	/* Left unlocked */
+	struct ucred	*um_cred;	/* Credentials of user calling mount */
+	int		um_cmode;	/* cmask from mount process */
+	int		um_op;		/* Operation mode */
+};
+
+#ifdef KERNEL
+
+#ifndef DIAGNOSTIC
+#define DIAGNOSTIC
+#endif
+
+/*
+ * DEFDIRMODE is the mode bits used to create a shadow directory.
+ */
+#define VRWXMODE (VREAD|VWRITE|VEXEC)
+#define VRWMODE (VREAD|VWRITE)
+#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6))
+#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6))
+
+/*
+ * A cache of vnode references	(hangs off v_data)
+ *
+ * Placing un_lock as the first elements theoretically allows us to
+ * use the vop_stdlock functions.  However, we need to make sure of
+ * certain side effects so we will still punch in our own code.
+ */
+struct union_node {
+	struct lock		un_lock;
+	LIST_ENTRY(union_node)	un_cache;	/* Hash chain */
+	struct vnode		*un_vnode;	/* Back pointer */
+	struct vnode	        *un_uppervp;	/* overlaying object */
+	struct vnode	        *un_lowervp;	/* underlying object */
+	struct vnode		*un_dirvp;	/* Parent dir of uppervp */
+	struct vnode		*un_pvp;	/* Parent vnode */
+	char			*un_path;	/* saved component name */
+	int			un_openl;	/* # of opens on lowervp */
+	int			un_exclcnt;	/* exclusive count */
+	unsigned int		un_flags;
+	struct vnode		**un_dircache;	/* cached union stack */
+	off_t			un_uppersz;	/* size of upper object */
+	off_t			un_lowersz;	/* size of lower object */
+#ifdef DIAGNOSTIC
+	pid_t			un_pid;
+#endif
+};
+
+/*
+ * XXX UN_ULOCK -	indicates that the uppervp is locked
+ *
+ * UN_CACHED -	node is in the union cache
+ */
+
+/*#define UN_ULOCK	0x04*/	/* Upper node is locked */
+#define UN_CACHED	0x10	/* In union cache */
+
+/*
+ * Hash table locking flags
+ */
+
+#define UNVP_WANT	0x01
+#define UNVP_LOCKED	0x02
+
+extern int union_allocvp __P((struct vnode **, struct mount *,
+				struct vnode *, 
+				struct vnode *, 
+				struct componentname *, struct vnode *,
+				struct vnode *, int));
+extern int union_freevp __P((struct vnode *));
+extern struct vnode *union_dircache __P((struct vnode *, struct proc *));
+extern int union_copyup __P((struct union_node *, int, struct ucred *,
+				struct proc *));
+extern int union_dowhiteout __P((struct union_node *, struct ucred *,
+					struct proc *));
+extern int union_mkshadow __P((struct union_mount *, struct vnode *,
+				struct componentname *, struct vnode **));
+extern int union_mkwhiteout __P((struct union_mount *, struct vnode *,
+				struct componentname *, char *));
+extern int union_cn_close __P((struct vnode *, int, struct ucred *,
+				struct proc *));
+extern void union_removed_upper __P((struct union_node *un));
+extern struct vnode *union_lowervp __P((struct vnode *));
+extern void union_newsize __P((struct vnode *, off_t, off_t));
+extern void union_vm_coherency __P((struct vnode *, struct uio *, int));
+
+extern int (*union_dircheckp) __P((struct proc *, struct vnode **,
+				 struct file *));
+
+#define	MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data))
+#define	VTOUNION(vp) ((struct union_node *)(vp)->v_data)
+#define	UNIONTOV(un) ((un)->un_vnode)
+#define	LOWERVP(vp) (VTOUNION(vp)->un_lowervp)
+#define	UPPERVP(vp) (VTOUNION(vp)->un_uppervp)
+#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp))
+
+#define UDEBUG(x)	if (uniondebug) printf x
+#define UDEBUG_ENABLED	1
+
+extern vop_t **union_vnodeop_p;
+extern struct vfsops union_vfsops;
+extern int uniondebug;
+
+#endif /* KERNEL */
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
new file mode 100644
index 0000000..6b88bef
--- /dev/null
+++ b/sys/fs/unionfs/union_subr.c
@@ -0,0 +1,1373 @@
+/*
+ * Copyright (c) 1994 Jan-Simon Pendry
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/module.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>	/* for vnode_pager_setsize */
+#include <vm/vm_zone.h>
+#include <vm/vm_object.h>	/* for vm cache coherency */
+#include <miscfs/union/union.h>
+
+#include <sys/proc.h>
+
+extern int	union_init __P((void));
+
+/* must be power of two, otherwise change UNION_HASH() */
+#define NHASH 32
+
+/* unsigned int ... */
+#define UNION_HASH(u, l) \
+	(((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1))
+
+static LIST_HEAD(unhead, union_node) unhead[NHASH];
+static int unvplock[NHASH];
+
+static void	union_dircache_r __P((struct vnode *vp, struct vnode ***vppp,
+				      int *cntp));
+static int	union_list_lock __P((int ix));
+static void	union_list_unlock __P((int ix));
+static int	union_relookup __P((struct union_mount *um, struct vnode *dvp,
+				    struct vnode **vpp,
+				    struct componentname *cnp,
+				    struct componentname *cn, char *path,
+				    int pathlen));
+static void	union_updatevp __P((struct union_node *un,
+				    struct vnode *uppervp,
+				    struct vnode *lowervp));
+static void union_newlower __P((struct union_node *, struct vnode *));
+static void union_newupper __P((struct union_node *, struct vnode *));
+static int union_copyfile __P((struct vnode *, struct vnode *,
+					struct ucred *, struct proc *));
+static int union_vn_create __P((struct vnode **, struct union_node *,
+				struct proc *));
+static int union_vn_close __P((struct vnode *, int, struct ucred *,
+				struct proc *));
+
+int
+union_init()
+{
+	int i;
+
+	for (i = 0; i < NHASH; i++)
+		LIST_INIT(&unhead[i]);
+	bzero((caddr_t)unvplock, sizeof(unvplock));
+	return (0);
+}
+
+static int
+union_list_lock(ix)
+	int ix;
+{
+	if (unvplock[ix] & UNVP_LOCKED) {
+		unvplock[ix] |= UNVP_WANT;
+		(void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0);
+		return (1);
+	}
+	unvplock[ix] |= UNVP_LOCKED;
+	return (0);
+}
+
+static void
+union_list_unlock(ix)
+	int ix;
+{
+	unvplock[ix] &= ~UNVP_LOCKED;
+
+	if (unvplock[ix] & UNVP_WANT) {
+		unvplock[ix] &= ~UNVP_WANT;
+		wakeup((caddr_t) &unvplock[ix]);
+	}
+}
+
+/*
+ *	union_updatevp:
+ *
+ *	The uppervp, if not NULL, must be referenced and not locked by us
+ *	The lowervp, if not NULL, must be referenced.
+ *
+ *	if uppervp and lowervp match pointers already installed, nothing
+ *	happens. The passed vp's (when matching) are not adjusted.  This
+ *	routine may only be called by union_newupper() and union_newlower().
+ */
+
+static void
+union_updatevp(un, uppervp, lowervp)
+	struct union_node *un;
+	struct vnode *uppervp;
+	struct vnode *lowervp;
+{
+	int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
+	int nhash = UNION_HASH(uppervp, lowervp);
+	int docache = (lowervp != NULLVP || uppervp != NULLVP);
+	int lhash, uhash;
+
+	/*
+	 * Ensure locking is ordered from lower to higher
+	 * to avoid deadlocks.
+	 */
+	if (nhash < ohash) {
+		lhash = nhash;
+		uhash = ohash;
+	} else {
+		lhash = ohash;
+		uhash = nhash;
+	}
+
+	if (lhash != uhash) {
+		while (union_list_lock(lhash))
+			continue;
+	}
+
+	while (union_list_lock(uhash))
+		continue;
+
+	if (ohash != nhash || !docache) {
+		if (un->un_flags & UN_CACHED) {
+			un->un_flags &= ~UN_CACHED;
+			LIST_REMOVE(un, un_cache);
+		}
+	}
+
+	if (ohash != nhash)
+		union_list_unlock(ohash);
+
+	if (un->un_lowervp != lowervp) {
+		if (un->un_lowervp) {
+			vrele(un->un_lowervp);
+			if (un->un_path) {
+				free(un->un_path, M_TEMP);
+				un->un_path = 0;
+			}
+		}
+		un->un_lowervp = lowervp;
+		un->un_lowersz = VNOVAL;
+	}
+
+	if (un->un_uppervp != uppervp) {
+		if (un->un_uppervp)
+			vrele(un->un_uppervp);
+		un->un_uppervp = uppervp;
+		un->un_uppersz = VNOVAL;
+	}
+
+	if (docache && (ohash != nhash)) {
+		LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
+		un->un_flags |= UN_CACHED;
+	}
+
+	union_list_unlock(nhash);
+}
+
+/*
+ * Set a new lowervp.  The passed lowervp must be referenced and will be
+ * stored in the vp in a referenced state. 
+ */
+
+static void
+union_newlower(un, lowervp)
+	struct union_node *un;
+	struct vnode *lowervp;
+{
+	union_updatevp(un, un->un_uppervp, lowervp);
+}
+
+/*
+ * Set a new uppervp.  The passed uppervp must be locked and will be 
+ * stored in the vp in a locked state.  The caller should not unlock
+ * uppervp.
+ */
+
+static void
+union_newupper(un, uppervp)
+	struct union_node *un;
+	struct vnode *uppervp;
+{
+	union_updatevp(un, uppervp, un->un_lowervp);
+}
+
+/*
+ * Keep track of size changes in the underlying vnodes.
+ * If the size changes, then callback to the vm layer
+ * giving priority to the upper layer size.
+ */
+void
+union_newsize(vp, uppersz, lowersz)
+	struct vnode *vp;
+	off_t uppersz, lowersz;
+{
+	struct union_node *un;
+	off_t sz;
+
+	/* only interested in regular files */
+	if (vp->v_type != VREG)
+		return;
+
+	un = VTOUNION(vp);
+	sz = VNOVAL;
+
+	if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
+		un->un_uppersz = uppersz;
+		if (sz == VNOVAL)
+			sz = un->un_uppersz;
+	}
+
+	if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
+		un->un_lowersz = lowersz;
+		if (sz == VNOVAL)
+			sz = un->un_lowersz;
+	}
+
+	if (sz != VNOVAL) {
+		UDEBUG(("union: %s size now %ld\n",
+			(uppersz != VNOVAL ? "upper" : "lower"), (long)sz));
+		vnode_pager_setsize(vp, sz);
+	}
+}
+
+/*
+ *	union_allocvp:	allocate a union_node and associate it with a
+ *			parent union_node and one or two vnodes.
+ *
+ *	vpp	Holds the returned vnode locked and referenced if no 
+ *		error occurs.
+ *
+ *	mp	Holds the mount point.  mp may or may not be busied. 
+ *		allocvp makes no changes to mp.
+ *
+ *	dvp	Holds the parent union_node to the one we wish to create.
+ *		XXX may only be used to traverse an uncopied lowervp-based
+ *		tree?  XXX
+ *
+ *		dvp may or may not be locked.  allocvp makes no changes
+ *		to dvp.
+ *
+ *	upperdvp Holds the parent vnode to uppervp, generally used along
+ *		with path component information to create a shadow of
+ *		lowervp when uppervp does not exist.
+ *
+ *		upperdvp is referenced but unlocked on entry, and will be
+ *		dereferenced on return.
+ *
+ *	uppervp	Holds the new uppervp vnode to be stored in the 
+ *		union_node we are allocating.  uppervp is referenced but
+ *		not locked, and will be dereferenced on return.
+ *
+ *	lowervp	Holds the new lowervp vnode to be stored in the
+ *		union_node we are allocating.  uppervp is referenced but
+ *		not locked, and will be dereferenced on return.
+ * 
+ *	cnp	Holds path component information to be coupled with
+ *		lowervp and upperdvp to allow unionfs to create an uppervp
+ *		later on.  Only used if lowervp is valid.  The conents
+ *		of cnp is only valid for the duration of the call.
+ *
+ *	docache	Determine whether this node should be entered in the
+ *		cache or whether it should be destroyed as soon as possible.
+ *
+ * all union_nodes are maintained on a singly-linked
+ * list.  new nodes are only allocated when they cannot
+ * be found on this list.  entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list.  this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode.  this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference.  this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+
+int
+union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache)
+	struct vnode **vpp;
+	struct mount *mp;
+	struct vnode *dvp;		/* parent union vnode */
+	struct vnode *upperdvp;		/* parent vnode of uppervp */
+	struct componentname *cnp;	/* may be null */
+	struct vnode *uppervp;		/* may be null */
+	struct vnode *lowervp;		/* may be null */
+	int docache;
+{
+	int error;
+	struct union_node *un = 0;
+	struct vnode *xlowervp = NULLVP;
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	struct proc *p = (cnp) ? cnp->cn_proc : curproc;
+	int hash = 0;
+	int vflag;
+	int try;
+
+	if (uppervp == NULLVP && lowervp == NULLVP)
+		panic("union: unidentifiable allocation");
+
+	if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
+		xlowervp = lowervp;
+		lowervp = NULLVP;
+	}
+
+	/* detect the root vnode (and aliases) */
+	vflag = 0;
+	if ((uppervp == um->um_uppervp) &&
+	    ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
+		if (lowervp == NULLVP) {
+			lowervp = um->um_lowervp;
+			if (lowervp != NULLVP)
+				VREF(lowervp);
+		}
+		vflag = VROOT;
+	}
+
+loop:
+	if (!docache) {
+		un = 0;
+	} else for (try = 0; try < 3; try++) {
+		switch (try) {
+		case 0:
+			if (lowervp == NULLVP)
+				continue;
+			hash = UNION_HASH(uppervp, lowervp);
+			break;
+
+		case 1:
+			if (uppervp == NULLVP)
+				continue;
+			hash = UNION_HASH(uppervp, NULLVP);
+			break;
+
+		case 2:
+			if (lowervp == NULLVP)
+				continue;
+			hash = UNION_HASH(NULLVP, lowervp);
+			break;
+		}
+
+		while (union_list_lock(hash))
+			continue;
+
+		for (un = unhead[hash].lh_first; un != 0;
+					un = un->un_cache.le_next) {
+			if ((un->un_lowervp == lowervp ||
+			     un->un_lowervp == NULLVP) &&
+			    (un->un_uppervp == uppervp ||
+			     un->un_uppervp == NULLVP) &&
+			    (UNIONTOV(un)->v_mount == mp)) {
+				if (vget(UNIONTOV(un), 0,
+				    cnp ? cnp->cn_proc : NULL)) {
+					union_list_unlock(hash);
+					goto loop;
+				}
+				break;
+			}
+		}
+
+		union_list_unlock(hash);
+
+		if (un)
+			break;
+	}
+
+	if (un) {
+		/*
+		 * Obtain a lock on the union_node.  Everything is unlocked
+		 * except for dvp, so check that case.  If they match, our
+		 * new un is already locked.  Otherwise we have to lock our
+		 * new un.
+		 *
+		 * A potential deadlock situation occurs when we are holding
+		 * one lock while trying to get another.  We must follow 
+		 * strict ordering rules to avoid it.  We try to locate dvp
+		 * by scanning up from un_vnode, since the most likely 
+		 * scenario is un being under dvp.
+		 */
+
+		if (dvp && un->un_vnode != dvp) {
+			struct vnode *scan = un->un_vnode;
+
+			do {
+				scan = VTOUNION(scan)->un_pvp;
+			} while (scan && scan->v_tag == VT_UNION && scan != dvp);
+			if (scan != dvp) {
+				/*
+				 * our new un is above dvp (we never saw dvp
+				 * while moving up the tree).
+				 */
+				VREF(dvp);
+				VOP_UNLOCK(dvp, 0, p);
+				error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p);
+				vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+				vrele(dvp);
+			} else {
+				/*
+				 * our new un is under dvp
+				 */
+				error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p);
+			}
+		} else if (dvp == NULLVP) {
+			/*
+			 * dvp is NULL, we need to lock un.
+			 */
+			error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p);
+		} else {
+			/*
+			 * dvp == un->un_vnode, we are already locked.
+			 */
+			error = 0;
+		}
+
+		if (error)
+			goto loop;
+
+		/*
+		 * At this point, the union_node is locked and referenced.
+		 *
+		 * uppervp is locked and referenced or NULL, lowervp is
+		 * referenced or NULL.
+		 */
+		UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n",
+			un, un->un_vnode, un->un_uppervp, 
+			(un->un_uppervp ? un->un_uppervp->v_usecount : -99),
+			uppervp,
+			(uppervp ? uppervp->v_usecount : -99)
+		));
+
+		if (uppervp != un->un_uppervp) {
+			KASSERT(uppervp == NULL || uppervp->v_usecount > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", uppervp->v_usecount));
+			union_newupper(un, uppervp);
+		} else if (uppervp) {
+			KASSERT(uppervp->v_usecount > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", uppervp->v_usecount));
+			vrele(uppervp);
+		}
+
+		/*
+		 * Save information about the lower layer.
+		 * This needs to keep track of pathname
+		 * and directory information which union_vn_create
+		 * might need.
+		 */
+		if (lowervp != un->un_lowervp) {
+			union_newlower(un, lowervp);
+			if (cnp && (lowervp != NULLVP)) {
+				un->un_path = malloc(cnp->cn_namelen+1,
+						M_TEMP, M_WAITOK);
+				bcopy(cnp->cn_nameptr, un->un_path,
+						cnp->cn_namelen);
+				un->un_path[cnp->cn_namelen] = '\0';
+			}
+		} else if (lowervp) {
+			vrele(lowervp);
+		}
+
+		/*
+		 * and upperdvp
+		 */
+		if (upperdvp != un->un_dirvp) {
+			if (un->un_dirvp)
+				vrele(un->un_dirvp);
+			un->un_dirvp = upperdvp;
+		} else if (upperdvp) {
+			vrele(upperdvp);
+		}
+
+		*vpp = UNIONTOV(un);
+		return (0);
+	}
+
+	if (docache) {
+		/*
+		 * otherwise lock the vp list while we call getnewvnode
+		 * since that can block.
+		 */ 
+		hash = UNION_HASH(uppervp, lowervp);
+
+		if (union_list_lock(hash))
+			goto loop;
+	}
+
+	/*
+	 * Create new node rather then replace old node
+	 */
+
+	error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
+	if (error) {
+		/*
+		 * If an error occurs clear out vnodes.
+		 */
+		if (lowervp)
+			vrele(lowervp);
+		if (uppervp) 
+			vrele(uppervp);
+		if (upperdvp)
+			vrele(upperdvp);
+		*vpp = NULL;
+		goto out;
+	}
+
+	MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
+		M_TEMP, M_WAITOK);
+
+	(*vpp)->v_flag |= vflag;
+	if (uppervp)
+		(*vpp)->v_type = uppervp->v_type;
+	else
+		(*vpp)->v_type = lowervp->v_type;
+
+	un = VTOUNION(*vpp);
+	bzero(un, sizeof(*un));
+
+	lockinit(&un->un_lock, PVFS, "unlock", 0, 0);
+	vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);
+
+	un->un_vnode = *vpp;
+	un->un_uppervp = uppervp;
+	un->un_uppersz = VNOVAL;
+	un->un_lowervp = lowervp;
+	un->un_lowersz = VNOVAL;
+	un->un_dirvp = upperdvp;
+	un->un_pvp = dvp;		/* only parent dir in new allocation */
+	if (dvp != NULLVP)
+		VREF(dvp);
+	un->un_dircache = 0;
+	un->un_openl = 0;
+
+	if (cnp && (lowervp != NULLVP)) {
+		un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
+		bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
+		un->un_path[cnp->cn_namelen] = '\0';
+	} else {
+		un->un_path = 0;
+		un->un_dirvp = NULL;
+	}
+
+	if (docache) {
+		LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
+		un->un_flags |= UN_CACHED;
+	}
+
+out:
+	if (xlowervp)
+		vrele(xlowervp);
+
+	if (docache)
+		union_list_unlock(hash);
+
+	return (error);
+}
+
+int
+union_freevp(vp)
+	struct vnode *vp;
+{
+	struct union_node *un = VTOUNION(vp);
+
+	if (un->un_flags & UN_CACHED) {
+		un->un_flags &= ~UN_CACHED;
+		LIST_REMOVE(un, un_cache);
+	}
+
+	if (un->un_pvp != NULLVP) {
+		vrele(un->un_pvp);
+		un->un_pvp = NULL;
+	}
+	if (un->un_uppervp != NULLVP) {
+		vrele(un->un_uppervp);
+		un->un_uppervp = NULL;
+	}
+	if (un->un_lowervp != NULLVP) {
+		vrele(un->un_lowervp);
+		un->un_lowervp = NULL;
+	}
+	if (un->un_dirvp != NULLVP) {
+		vrele(un->un_dirvp);
+		un->un_dirvp = NULL;
+	}
+	if (un->un_path) {
+		free(un->un_path, M_TEMP);
+		un->un_path = NULL;
+	}
+
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = 0;
+
+	return (0);
+}
+
+/*
+ * copyfile.  copy the vnode (fvp) to the vnode (tvp)
+ * using a sequence of reads and writes.  both (fvp)
+ * and (tvp) are locked on entry and exit.
+ *
+ * fvp and tvp are both exclusive locked on call, but their refcount's
+ * haven't been bumped at all.
+ */
+static int
+union_copyfile(fvp, tvp, cred, p)
+	struct vnode *fvp;
+	struct vnode *tvp;
+	struct ucred *cred;
+	struct proc *p;
+{
+	char *buf;
+	struct uio uio;
+	struct iovec iov;
+	int error = 0;
+
+	/*
+	 * strategy:
+	 * allocate a buffer of size MAXBSIZE.
+	 * loop doing reads and writes, keeping track
+	 * of the current uio offset.
+	 * give up at the first sign of trouble.
+	 */
+
+	bzero(&uio, sizeof(uio));
+
+	uio.uio_procp = p;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_offset = 0;
+
+	VOP_LEASE(fvp, p, cred, LEASE_READ);
+	VOP_LEASE(tvp, p, cred, LEASE_WRITE);
+
+	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
+
+	/* ugly loop follows... */
+	do {
+		off_t offset = uio.uio_offset;
+		int count;
+		int bufoffset;
+
+		/*
+		 * Setup for big read
+		 */
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		iov.iov_base = buf;
+		iov.iov_len = MAXBSIZE;
+		uio.uio_resid = iov.iov_len;
+		uio.uio_rw = UIO_READ;
+
+		if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0)
+			break;
+
+		/*
+		 * Get bytes read, handle read eof case and setup for
+		 * write loop
+		 */
+		if ((count = MAXBSIZE - uio.uio_resid) == 0)
+			break;
+		bufoffset = 0;
+
+		/*
+		 * Write until an error occurs or our buffer has been
+		 * exhausted, then update the offset for the next read.
+		 */
+		while (bufoffset < count) {
+			uio.uio_iov = &iov;
+			uio.uio_iovcnt = 1;
+			iov.iov_base = buf + bufoffset;
+			iov.iov_len = count - bufoffset;
+			uio.uio_offset = offset + bufoffset;
+			uio.uio_rw = UIO_WRITE;
+			uio.uio_resid = iov.iov_len;
+
+			if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0)
+				break;
+			bufoffset += (count - bufoffset) - uio.uio_resid;
+		}
+		uio.uio_offset = offset + bufoffset;
+	} while (error == 0);
+
+	free(buf, M_TEMP);
+	return (error);
+}
+
+/*
+ *
+ * un's vnode is assumed to be locked on entry and remains locked on exit.
+ */
+
+int
+union_copyup(un, docopy, cred, p)
+	struct union_node *un;
+	int docopy;
+	struct ucred *cred;
+	struct proc *p;
+{
+	int error;
+	struct vnode *lvp, *uvp;
+
+	/*
+	 * If the user does not have read permission, the vnode should not
+	 * be copied to upper layer.
+	 */
+	vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = VOP_ACCESS(un->un_lowervp, VREAD, cred, p);
+	VOP_UNLOCK(un->un_lowervp, 0, p);
+	if (error)
+		return (error);
+
+	error = union_vn_create(&uvp, un, p);
+	if (error)
+		return (error);
+
+	lvp = un->un_lowervp;
+
+	KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
+	if (docopy) {
+		/*
+		 * XX - should not ignore errors
+		 * from VOP_CLOSE
+		 */
+		vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p);
+		error = VOP_OPEN(lvp, FREAD, cred, p);
+		if (error == 0 && vn_canvmio(lvp) == TRUE)
+			error = vfs_object_create(lvp, p, cred);
+		if (error == 0) {
+			error = union_copyfile(lvp, uvp, cred, p);
+			VOP_UNLOCK(lvp, 0, p);
+			(void) VOP_CLOSE(lvp, FREAD, cred, p);
+		}
+		if (error == 0)
+			UDEBUG(("union: copied up %s\n", un->un_path));
+
+	}
+	VOP_UNLOCK(uvp, 0, p);
+	union_newupper(un, uvp);
+	KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
+	union_vn_close(uvp, FWRITE, cred, p);
+	KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
+	/*
+	 * Subsequent IOs will go to the top layer, so
+	 * call close on the lower vnode and open on the
+	 * upper vnode to ensure that the filesystem keeps
+	 * its references counts right.  This doesn't do
+	 * the right thing with (cred) and (FREAD) though.
+	 * Ignoring error returns is not right, either.
+	 */
+	if (error == 0) {
+		int i;
+
+		for (i = 0; i < un->un_openl; i++) {
+			(void) VOP_CLOSE(lvp, FREAD, cred, p);
+			(void) VOP_OPEN(uvp, FREAD, cred, p);
+		}
+		if (un->un_openl) {
+			if (vn_canvmio(uvp) == TRUE)
+				error = vfs_object_create(uvp, p, cred);
+		}
+		un->un_openl = 0;
+	}
+
+	return (error);
+
+}
+
+/*
+ *	union_relookup:
+ *
+ *	dvp should be locked on entry and will be locked on return.  No
+ *	net change in the ref count will occur.
+ *
+ *	If an error is returned, *vpp will be invalid, otherwise it
+ *	will hold a locked, referenced vnode.  If *vpp == dvp then
+ *	remember that only one exclusive lock is held.
+ */
+
+static int
+union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
+	struct union_mount *um;
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+	struct componentname *cn;
+	char *path;
+	int pathlen;
+{
+	int error;
+
+	/*
+	 * A new componentname structure must be faked up because
+	 * there is no way to know where the upper level cnp came
+	 * from or what it is being used for.  This must duplicate
+	 * some of the work done by NDINIT, some of the work done
+	 * by namei, some of the work done by lookup and some of
+	 * the work done by VOP_LOOKUP when given a CREATE flag.
+	 * Conclusion: Horrible.
+	 */
+	cn->cn_namelen = pathlen;
+	cn->cn_pnbuf = zalloc(namei_zone);
+	bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
+	cn->cn_pnbuf[cn->cn_namelen] = '\0';
+
+	cn->cn_nameiop = CREATE;
+	cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN);
+	cn->cn_proc = cnp->cn_proc;
+	if (um->um_op == UNMNT_ABOVE)
+		cn->cn_cred = cnp->cn_cred;
+	else
+		cn->cn_cred = um->um_cred;
+	cn->cn_nameptr = cn->cn_pnbuf;
+	cn->cn_consume = cnp->cn_consume;
+
+	VREF(dvp);
+	VOP_UNLOCK(dvp, 0, cnp->cn_proc);
+
+	/*
+	 * Pass dvp unlocked and referenced on call to relookup().
+	 *
+	 * If an error occurs, dvp will be returned unlocked and dereferenced.
+	 */
+
+	if ((error = relookup(dvp, vpp, cn)) != 0) {
+		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_proc);
+		return(error);
+	}
+
+	/*
+	 * If no error occurs, dvp will be returned locked with the reference
+	 * left as before, and vpp will be returned referenced and locked.
+	 *
+	 * We want to return with dvp as it was passed to us, so we get
+	 * rid of our reference.
+	 */
+	vrele(dvp);
+	return (0);
+}
+
+/*
+ * Create a shadow directory in the upper layer.
+ * The new vnode is returned locked.
+ *
+ * (um) points to the union mount structure for access to the
+ * the mounting process's credentials.
+ * (dvp) is the directory in which to create the shadow directory,
+ * it is locked (but not ref'd) on entry and return.
+ * (cnp) is the componentname to be created.
+ * (vpp) is the returned newly created shadow directory, which
+ * is returned locked and ref'd
+ */
+int
+union_mkshadow(um, dvp, cnp, vpp)
+	struct union_mount *um;
+	struct vnode *dvp;
+	struct componentname *cnp;
+	struct vnode **vpp;
+{
+	int error;
+	struct vattr va;
+	struct proc *p = cnp->cn_proc;
+	struct componentname cn;
+
+	error = union_relookup(um, dvp, vpp, cnp, &cn,
+			cnp->cn_nameptr, cnp->cn_namelen);
+	if (error)
+		return (error);
+
+	if (*vpp) {
+		if (cn.cn_flags & HASBUF) {
+			zfree(namei_zone, cn.cn_pnbuf);
+			cn.cn_flags &= ~HASBUF;
+		}
+		if (dvp == *vpp)
+			vrele(*vpp);
+		else
+			vput(*vpp);
+		*vpp = NULLVP;
+		return (EEXIST);
+	}
+
+	/*
+	 * policy: when creating the shadow directory in the
+	 * upper layer, create it owned by the user who did
+	 * the mount, group from parent directory, and mode
+	 * 777 modified by umask (ie mostly identical to the
+	 * mkdir syscall).  (jsp, kb)
+	 */
+
+	VATTR_NULL(&va);
+	va.va_type = VDIR;
+	va.va_mode = um->um_cmode;
+
+	/* VOP_LEASE: dvp is locked */
+	VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE);
+
+	error = VOP_MKDIR(dvp, vpp, &cn, &va);
+	if (cn.cn_flags & HASBUF) {
+		zfree(namei_zone, cn.cn_pnbuf);
+		cn.cn_flags &= ~HASBUF;
+	}
+	/*vput(dvp);*/
+	return (error);
+}
+
+/*
+ * Create a whiteout entry in the upper layer.
+ *
+ * (um) points to the union mount structure for access to the
+ * the mounting process's credentials.
+ * (dvp) is the directory in which to create the whiteout.
+ * it is locked on entry and return.
+ * (cnp) is the componentname to be created.
+ */
+int
+union_mkwhiteout(um, dvp, cnp, path)
+	struct union_mount *um;
+	struct vnode *dvp;
+	struct componentname *cnp;
+	char *path;
+{
+	int error;
+	struct proc *p = cnp->cn_proc;
+	struct vnode *wvp;
+	struct componentname cn;
+
+	error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
+	if (error)
+		return (error);
+
+	if (wvp) {
+		if (cn.cn_flags & HASBUF) {
+			zfree(namei_zone, cn.cn_pnbuf);
+			cn.cn_flags &= ~HASBUF;
+		}
+		if (wvp == dvp)
+			vrele(wvp);
+		else
+			vput(wvp);
+		return (EEXIST);
+	}
+
+	/* VOP_LEASE: dvp is locked */
+	VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE);
+
+	error = VOP_WHITEOUT(dvp, &cn, CREATE);
+	if (cn.cn_flags & HASBUF) {
+		zfree(namei_zone, cn.cn_pnbuf);
+		cn.cn_flags &= ~HASBUF;
+	}
+	return (error);
+}
+
+/*
+ * union_vn_create: creates and opens a new shadow file
+ * on the upper union layer.  this function is similar
+ * in spirit to calling vn_open but it avoids calling namei().
+ * the problem with calling namei is that a) it locks too many
+ * things, and b) it doesn't start at the "right" directory,
+ * whereas relookup is told where to start.
+ *
+ * On entry, the vnode associated with un is locked.  It remains locked
+ * on return.
+ *
+ * If no error occurs, *vpp contains a locked referenced vnode for your
+ * use.  If an error occurs *vpp iis undefined.
+ */
+static int
+union_vn_create(vpp, un, p)
+	struct vnode **vpp;
+	struct union_node *un;
+	struct proc *p;
+{
+	struct vnode *vp;
+	struct ucred *cred = p->p_ucred;
+	struct vattr vat;
+	struct vattr *vap = &vat;
+	int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
+	int error;
+	int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
+	struct componentname cn;
+
+	*vpp = NULLVP;
+
+	/*
+	 * Build a new componentname structure (for the same
+	 * reasons outlines in union_mkshadow).
+	 * The difference here is that the file is owned by
+	 * the current user, rather than by the person who
+	 * did the mount, since the current user needs to be
+	 * able to write the file (that's why it is being
+	 * copied in the first place).
+	 */
+	cn.cn_namelen = strlen(un->un_path);
+	cn.cn_pnbuf = zalloc(namei_zone);
+	bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
+	cn.cn_nameiop = CREATE;
+	cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN);
+	cn.cn_proc = p;
+	cn.cn_cred = p->p_ucred;
+	cn.cn_nameptr = cn.cn_pnbuf;
+	cn.cn_consume = 0;
+
+	/*
+	 * Pass dvp unlocked and referenced on call to relookup().
+	 *
+	 * If an error occurs, dvp will be returned unlocked and dereferenced.
+	 */
+	VREF(un->un_dirvp);
+	error = relookup(un->un_dirvp, &vp, &cn);
+	if (error)
+		return (error);
+
+	/*
+	 * If no error occurs, dvp will be returned locked with the reference
+	 * left as before, and vpp will be returned referenced and locked.
+	 */
+	if (vp) {
+		vput(un->un_dirvp);
+		if (cn.cn_flags & HASBUF) {
+			zfree(namei_zone, cn.cn_pnbuf);
+			cn.cn_flags &= ~HASBUF;
+		}
+		if (vp == un->un_dirvp)
+			vrele(vp);
+		else
+			vput(vp);
+		return (EEXIST);
+	}
+
+	/*
+	 * Good - there was no race to create the file
+	 * so go ahead and create it.  The permissions
+	 * on the file will be 0666 modified by the
+	 * current user's umask.  Access to the file, while
+	 * it is unioned, will require access to the top *and*
+	 * bottom files.  Access when not unioned will simply
+	 * require access to the top-level file.
+	 * TODO: confirm choice of access permissions.
+	 */
+	VATTR_NULL(vap);
+	vap->va_type = VREG;
+	vap->va_mode = cmode;
+	VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE);
+	error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap);
+	if (cn.cn_flags & HASBUF) {
+		zfree(namei_zone, cn.cn_pnbuf);
+		cn.cn_flags &= ~HASBUF;
+	}
+	vput(un->un_dirvp);
+	if (error)
+		return (error);
+
+	error = VOP_OPEN(vp, fmode, cred, p);
+	if (error == 0 && vn_canvmio(vp) == TRUE)
+		error = vfs_object_create(vp, p, cred);
+	if (error) {
+		vput(vp);
+		return (error);
+	}
+	vp->v_writecount++;
+	*vpp = vp;
+	return (0);
+}
+
+static int
+union_vn_close(vp, fmode, cred, p)
+	struct vnode *vp;
+	int fmode;
+	struct ucred *cred;
+	struct proc *p;
+{
+
+	if (fmode & FWRITE)
+		--vp->v_writecount;
+	return (VOP_CLOSE(vp, fmode, cred, p));
+}
+
+#if 0
+
+/*
+ *	union_removed_upper:
+ *
+ *	called with union_node unlocked. XXX
+ */
+
+void
+union_removed_upper(un)
+	struct union_node *un;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct vnode **vpp;
+
+	/*
+	 * Do not set the uppervp to NULLVP.  If lowervp is NULLVP,
+	 * union node will have neither uppervp nor lowervp.  We remove
+	 * the union node from cache, so that it will not be referrenced.
+	 */
+	union_newupper(un, NULLVP);
+	if (un->un_dircache != 0) {
+		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
+			vrele(*vpp);
+		free(un->un_dircache, M_TEMP);
+		un->un_dircache = 0;
+	}
+
+	if (un->un_flags & UN_CACHED) {
+		un->un_flags &= ~UN_CACHED;
+		LIST_REMOVE(un, un_cache);
+	}
+}
+
+#endif
+
+/*
+ * determine whether a whiteout is needed
+ * during a remove/rmdir operation.
+ */
+int
+union_dowhiteout(un, cred, p)
+	struct union_node *un;
+	struct ucred *cred;
+	struct proc *p;
+{
+	struct vattr va;
+
+	if (un->un_lowervp != NULLVP)
+		return (1);
+
+	if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 &&
+	    (va.va_flags & OPAQUE))
+		return (1);
+
+	return (0);
+}
+
+static void
+union_dircache_r(vp, vppp, cntp)
+	struct vnode *vp;
+	struct vnode ***vppp;
+	int *cntp;
+{
+	struct union_node *un;
+
+	if (vp->v_op != union_vnodeop_p) {
+		if (vppp) {
+			VREF(vp);
+			*(*vppp)++ = vp;
+			if (--(*cntp) == 0)
+				panic("union: dircache table too small");
+		} else {
+			(*cntp)++;
+		}
+
+		return;
+	}
+
+	un = VTOUNION(vp);
+	if (un->un_uppervp != NULLVP)
+		union_dircache_r(un->un_uppervp, vppp, cntp);
+	if (un->un_lowervp != NULLVP)
+		union_dircache_r(un->un_lowervp, vppp, cntp);
+}
+
+struct vnode *
+union_dircache(vp, p)
+	struct vnode *vp;
+	struct proc *p;
+{
+	int cnt;
+	struct vnode *nvp;
+	struct vnode **vpp;
+	struct vnode **dircache;
+	struct union_node *un;
+	int error;
+
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	dircache = VTOUNION(vp)->un_dircache;
+
+	nvp = NULLVP;
+
+	if (dircache == NULL) {
+		cnt = 0;
+		union_dircache_r(vp, 0, &cnt);
+		cnt++;
+		dircache = malloc(cnt * sizeof(struct vnode *),
+				M_TEMP, M_WAITOK);
+		vpp = dircache;
+		union_dircache_r(vp, &vpp, &cnt);
+		*vpp = NULLVP;
+		vpp = dircache + 1;
+	} else {
+		vpp = dircache;
+		do {
+			if (*vpp++ == VTOUNION(vp)->un_uppervp)
+				break;
+		} while (*vpp != NULLVP);
+	}
+
+	if (*vpp == NULLVP)
+		goto out;
+
+	/*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);*/
+	UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? (*vpp)->v_usecount : -99)));
+	VREF(*vpp);
+	error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0);
+	UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? (*vpp)->v_usecount : -99)));
+	if (error)
+		goto out;
+
+	VTOUNION(vp)->un_dircache = 0;
+	un = VTOUNION(nvp);
+	un->un_dircache = dircache;
+
+out:
+	VOP_UNLOCK(vp, 0, p);
+	return (nvp);
+}
+
+/*
+ * Guarentee coherency with the VM cache by invalidating any clean VM pages
+ * associated with this write and updating any dirty VM pages.  Since our
+ * vnode is locked, other processes will not be able to read the pages in
+ * again until after our write completes.
+ *
+ * We also have to be coherent with reads, by flushing any pending dirty
+ * pages prior to issuing the read.
+ *
+ * XXX this is somewhat of a hack at the moment.  To support this properly
+ * we would have to be able to run VOP_READ and VOP_WRITE through the VM
+ * cache.  Then we wouldn't need to worry about coherency.
+ */
+
+void 
+union_vm_coherency(struct vnode *vp, struct uio *uio, int cleanfls)
+{
+	vm_object_t object;
+	vm_pindex_t pstart;
+	vm_pindex_t pend;
+	int pgoff;
+
+	if ((object = vp->v_object) == NULL)
+	    return;
+
+	pgoff = uio->uio_offset & PAGE_MASK;
+	pstart = uio->uio_offset / PAGE_SIZE;
+	pend = pstart + (uio->uio_resid + pgoff + PAGE_MASK) / PAGE_SIZE;
+
+	vm_object_page_clean(object, pstart, pend, OBJPC_SYNC);
+	if (cleanfls)
+		vm_object_page_remove(object, pstart, pend, TRUE);
+}
+
+/*
+ * Module glue to remove #ifdef UNION from vfs_syscalls.c
+ */
+static int
+union_dircheck(struct proc *p, struct vnode **vp, struct file *fp)
+{
+	int error = 0;
+
+	if ((*vp)->v_op == union_vnodeop_p) {
+		struct vnode *lvp;
+
+		lvp = union_dircache(*vp, p);
+		if (lvp != NULLVP) {
+			struct vattr va;
+
+			/*
+			 * If the directory is opaque,
+			 * then don't show lower entries
+			 */
+			error = VOP_GETATTR(*vp, &va, fp->f_cred, p);
+			if (va.va_flags & OPAQUE) {
+				vput(lvp);
+				lvp = NULL;
+			}
+		}
+
+		if (lvp != NULLVP) {
+			error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
+			if (error == 0 && vn_canvmio(lvp) == TRUE)
+				error = vfs_object_create(lvp, p, fp->f_cred);
+			if (error) {
+				vput(lvp);
+				return (error);
+			}
+			VOP_UNLOCK(lvp, 0, p);
+			fp->f_data = (caddr_t) lvp;
+			fp->f_offset = 0;
+			error = vn_close(*vp, FREAD, fp->f_cred, p);
+			if (error)
+				return (error);
+			*vp = lvp;
+			return -1;	/* goto unionread */
+		}
+	}
+	return error;
+}
+
+static int
+union_modevent(module_t mod, int type, void *data)
+{
+	switch (type) {
+	case MOD_LOAD:
+		union_dircheckp = union_dircheck;
+		break;
+	case MOD_UNLOAD:
+		union_dircheckp = NULL;
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static moduledata_t union_mod = {
+	"union_dircheck",
+	union_modevent,
+	NULL
+};
+
+DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY);
diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
new file mode 100644
index 0000000..8b58e55
--- /dev/null
+++ b/sys/fs/unionfs/union_vfsops.c
@@ -0,0 +1,509 @@
+/*
+ * Copyright (c) 1994, 1995 The Regents of the University of California.
+ * Copyright (c) 1994, 1995 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union_vfsops.c	8.20 (Berkeley) 5/20/95
+ * $FreeBSD$
+ */
+
+/*
+ * Union Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <miscfs/union/union.h>
+#include <vm/vm_zone.h>
+
+static MALLOC_DEFINE(M_UNIONFSMNT, "UNION mount", "UNION mount structure");
+
+extern int	union_init __P((struct vfsconf *));
+static int	union_mount __P((struct mount *mp, char *path, caddr_t data,
+				 struct nameidata *ndp, struct proc *p));
+static int	union_root __P((struct mount *mp, struct vnode **vpp));
+static int	union_statfs __P((struct mount *mp, struct statfs *sbp,
+				  struct proc *p));
+static int	union_unmount __P((struct mount *mp, int mntflags,
+				   struct proc *p));
+
+/*
+ * Mount union filesystem
+ */
+static int
+union_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error = 0;
+	struct union_args args;
+	struct vnode *lowerrootvp = NULLVP;
+	struct vnode *upperrootvp = NULLVP;
+	struct union_mount *um = 0;
+	struct ucred *cred = 0;
+	char *cp = 0;
+	int len;
+	u_int size;
+
+	UDEBUG(("union_mount(mp = %p)\n", (void *)mp));
+
+	/*
+	 * Disable clustered write, otherwise system becomes unstable.
+	 */
+	mp->mnt_flag |= MNT_NOCLUSTERW;
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		/*
+		 * Need to provide.
+		 * 1. a way to convert between rdonly and rdwr mounts.
+		 * 2. support for nfs exports.
+		 */
+		error = EOPNOTSUPP;
+		goto bad;
+	}
+
+	/*
+	 * Get argument
+	 */
+	error = copyin(data, (caddr_t)&args, sizeof(struct union_args));
+	if (error)
+		goto bad;
+
+	/*
+	 * Obtain lower vnode.  Vnode is stored in mp->mnt_vnodecovered.
+	 * We need to reference it but not lock it.
+	 */
+
+	lowerrootvp = mp->mnt_vnodecovered;
+	VREF(lowerrootvp);
+
+#if 0
+	/*
+	 * Unlock lower node to avoid deadlock.
+	 */
+	if (lowerrootvp->v_op == union_vnodeop_p)
+		VOP_UNLOCK(lowerrootvp, 0, p);
+#endif
+
+	/*
+	 * Obtain upper vnode by calling namei() on the path.  The
+	 * upperrootvp will be turned referenced but not locked.
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT,
+	       UIO_USERSPACE, args.target, p);
+
+	error = namei(ndp);
+
+#if 0
+	if (lowerrootvp->v_op == union_vnodeop_p)
+		vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, p);
+#endif
+	if (error)
+		goto bad;
+
+	NDFREE(ndp, NDF_ONLY_PNBUF);
+	upperrootvp = ndp->ni_vp;
+	vrele(ndp->ni_dvp);
+	ndp->ni_dvp = NULL;
+
+	UDEBUG(("mount_root UPPERVP %p locked = %d\n", upperrootvp,
+	    VOP_ISLOCKED(upperrootvp, NULL)));
+
+	/*
+	 * Check multi union mount to avoid `lock myself again' panic.
+	 * Also require that it be a directory.
+	 */
+	if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) {
+#ifdef DIAGNOSTIC
+		printf("union_mount: multi union mount?\n");
+#endif
+		error = EDEADLK;
+		goto bad;
+	}
+
+	if (upperrootvp->v_type != VDIR) {
+		error = EINVAL;
+		goto bad;
+	}
+
+	/*
+	 * Allocate our union_mount structure and populate the fields.
+	 * The vnode references are stored in the union_mount as held,
+	 * unlocked references.  Depending on the _BELOW flag, the
+	 * filesystems are viewed in a different order.  In effect this
+	 * is the same as providing a mount-under option to the mount
+	 * syscall.
+	 */
+
+	um = (struct union_mount *) malloc(sizeof(struct union_mount),
+				M_UNIONFSMNT, M_WAITOK);
+
+	bzero(um, sizeof(struct union_mount));
+
+	um->um_op = args.mntflags & UNMNT_OPMASK;
+
+	switch (um->um_op) {
+	case UNMNT_ABOVE:
+		um->um_lowervp = lowerrootvp;
+		um->um_uppervp = upperrootvp;
+		upperrootvp = NULL;
+		lowerrootvp = NULL;
+		break;
+
+	case UNMNT_BELOW:
+		um->um_lowervp = upperrootvp;
+		um->um_uppervp = lowerrootvp;
+		upperrootvp = NULL;
+		lowerrootvp = NULL;
+		break;
+
+	case UNMNT_REPLACE:
+		vrele(lowerrootvp);
+		lowerrootvp = NULL;
+		um->um_uppervp = upperrootvp;
+		um->um_lowervp = lowerrootvp;
+		upperrootvp = NULL;
+		break;
+
+	default:
+		error = EINVAL;
+		goto bad;
+	}
+
+	/*
+	 * Unless the mount is readonly, ensure that the top layer
+	 * supports whiteout operations
+	 */
+	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+		error = VOP_WHITEOUT(um->um_uppervp, NULL, LOOKUP);
+		if (error)
+			goto bad;
+	}
+
+	um->um_cred = p->p_ucred;
+	crhold(um->um_cred);
+	um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask;
+
+	/*
+	 * Depending on what you think the MNT_LOCAL flag might mean,
+	 * you may want the && to be || on the conditional below.
+	 * At the moment it has been defined that the filesystem is
+	 * only local if it is all local, ie the MNT_LOCAL flag implies
+	 * that the entire namespace is local.  If you think the MNT_LOCAL
+	 * flag implies that some of the files might be stored locally
+	 * then you will want to change the conditional.
+	 */
+	if (um->um_op == UNMNT_ABOVE) {
+		if (((um->um_lowervp == NULLVP) ||
+		     (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
+		    (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
+			mp->mnt_flag |= MNT_LOCAL;
+	}
+
+	/*
+	 * Copy in the upper layer's RDONLY flag.  This is for the benefit
+	 * of lookup() which explicitly checks the flag, rather than asking
+	 * the filesystem for its own opinion.  This means, that an update
+	 * mount of the underlying filesystem to go from rdonly to rdwr
+	 * will leave the unioned view as read-only.
+	 */
+	mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
+
+	mp->mnt_data = (qaddr_t) um;
+	vfs_getnewfsid(mp);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+	switch (um->um_op) {
+	case UNMNT_ABOVE:
+		cp = "<above>:";
+		break;
+	case UNMNT_BELOW:
+		cp = "<below>:";
+		break;
+	case UNMNT_REPLACE:
+		cp = "";
+		break;
+	}
+	len = strlen(cp);
+	bcopy(cp, mp->mnt_stat.f_mntfromname, len);
+
+	cp = mp->mnt_stat.f_mntfromname + len;
+	len = MNAMELEN - len;
+
+	(void) copyinstr(args.target, cp, len - 1, &size);
+	bzero(cp + size, len - size);
+
+	(void)union_statfs(mp, &mp->mnt_stat, p);
+
+	UDEBUG(("union_mount: from %s, on %s\n",
+		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname));
+	return (0);
+
+bad:
+	if (um) {
+		if (um->um_uppervp)
+			vrele(um->um_uppervp);
+		if (um->um_lowervp)
+			vrele(um->um_lowervp);
+		/* XXX other fields */
+		free(um, M_UNIONFSMNT);
+	}
+	if (cred)
+		crfree(cred);
+	if (upperrootvp)
+		vrele(upperrootvp);
+	if (lowerrootvp)
+		vrele(lowerrootvp);
+	return (error);
+}
+
+/*
+ * Free reference to union layer
+ */
+static int
+union_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	struct vnode *um_rootvp;
+	int error;
+	int freeing;
+	int flags = 0;
+
+	UDEBUG(("union_unmount(mp = %p)\n", (void *)mp));
+
+	if (mntflags & MNT_FORCE)
+		flags |= FORCECLOSE;
+
+	if ((error = union_root(mp, &um_rootvp)) != 0)
+		return (error);
+
+	/*
+	 * Keep flushing vnodes from the mount list.
+	 * This is needed because of the un_pvp held
+	 * reference to the parent vnode.
+	 * If more vnodes have been freed on a given pass,
+	 * the try again.  The loop will iterate at most
+	 * (d) times, where (d) is the maximum tree depth
+	 * in the filesystem.
+	 */
+	for (freeing = 0; vflush(mp, um_rootvp, flags) != 0;) {
+		struct vnode *vp;
+		int n;
+
+		/* count #vnodes held on mount list */
+		for (n = 0, vp = mp->mnt_vnodelist.lh_first;
+				vp != NULLVP;
+				vp = vp->v_mntvnodes.le_next)
+			n++;
+
+		/* if this is unchanged then stop */
+		if (n == freeing)
+			break;
+
+		/* otherwise try once more time */
+		freeing = n;
+	}
+
+	/* At this point the root vnode should have a single reference */
+	if (um_rootvp->v_usecount > 1) {
+		vput(um_rootvp);
+		return (EBUSY);
+	}
+
+#ifdef DEBUG
+	vprint("union root", um_rootvp);
+#endif	 
+	/*
+	 * Discard references to upper and lower target vnodes.
+	 */
+	if (um->um_lowervp)
+		vrele(um->um_lowervp);
+	vrele(um->um_uppervp);
+	crfree(um->um_cred);
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vput(um_rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(um_rootvp);
+	/*
+	 * Finally, throw away the union_mount structure
+	 */
+	free(mp->mnt_data, M_UNIONFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return (0);
+}
+
+static int
+union_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	int error;
+
+	/*
+	 * Supply an unlocked reference to um_uppervp and to um_lowervp.  It
+	 * is possible for um_uppervp to be locked without the associated
+	 * root union_node being locked.  We let union_allocvp() deal with
+	 * it.
+	 */
+	UDEBUG(("union_root UPPERVP %p locked = %d\n", um->um_uppervp,
+	    VOP_ISLOCKED(um->um_uppervp, NULL)));
+
+	VREF(um->um_uppervp);
+	if (um->um_lowervp)
+		VREF(um->um_lowervp);
+
+	error = union_allocvp(vpp, mp, NULLVP, NULLVP, NULL, 
+		    um->um_uppervp, um->um_lowervp, 1);
+	UDEBUG(("error %d\n", error));
+	UDEBUG(("union_root2 UPPERVP %p locked = %d\n", um->um_uppervp,
+	    VOP_ISLOCKED(um->um_uppervp, NULL)));
+
+	return (error);
+}
+
+static int
+union_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	int error;
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	struct statfs mstat;
+	int lbsize;
+
+	UDEBUG(("union_statfs(mp = %p, lvp = %p, uvp = %p)\n",
+	    (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp));
+
+	bzero(&mstat, sizeof(mstat));
+
+	if (um->um_lowervp) {
+		error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p);
+		if (error)
+			return (error);
+	}
+
+	/* now copy across the "interesting" information and fake the rest */
+#if 0
+	sbp->f_type = mstat.f_type;
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+#endif
+	lbsize = mstat.f_bsize;
+	sbp->f_blocks = mstat.f_blocks;
+	sbp->f_bfree = mstat.f_bfree;
+	sbp->f_bavail = mstat.f_bavail;
+	sbp->f_files = mstat.f_files;
+	sbp->f_ffree = mstat.f_ffree;
+
+	error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p);
+	if (error)
+		return (error);
+
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+
+	/*
+	 * if the lower and upper blocksizes differ, then frig the
+	 * block counts so that the sizes reported by df make some
+	 * kind of sense.  none of this makes sense though.
+	 */
+
+	if (mstat.f_bsize != lbsize)
+		sbp->f_blocks = ((off_t) sbp->f_blocks * lbsize) / mstat.f_bsize;
+
+	/*
+	 * The "total" fields count total resources in all layers,
+	 * the "free" fields count only those resources which are
+	 * free in the upper layer (since only the upper layer
+	 * is writeable).
+	 */
+	sbp->f_blocks += mstat.f_blocks;
+	sbp->f_bfree = mstat.f_bfree;
+	sbp->f_bavail = mstat.f_bavail;
+	sbp->f_files += mstat.f_files;
+	sbp->f_ffree = mstat.f_ffree;
+
+	if (sbp != &mp->mnt_stat) {
+		sbp->f_type = mp->mnt_vfc->vfc_typenum;
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+static struct vfsops union_vfsops = {
+	union_mount,
+	vfs_stdstart,	/* underlying start already done */
+	union_unmount,
+	union_root,
+	vfs_stdquotactl,
+	union_statfs,
+	vfs_stdsync,    /* XXX assumes no cached data on union level */
+	vfs_stdvget,
+	vfs_stdfhtovp,
+	vfs_stdcheckexp,
+	vfs_stdvptofh,
+	union_init,
+	vfs_stduninit,
+	vfs_stdextattrctl,
+};
+
+VFS_SET(union_vfsops, union, VFCF_LOOPBACK);
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
new file mode 100644
index 0000000..03d41a0
--- /dev/null
+++ b/sys/fs/unionfs/union_vnops.c
@@ -0,0 +1,1981 @@
+/*
+ * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
+ * Copyright (c) 1992, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union_vnops.c	8.32 (Berkeley) 6/23/95
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/lock.h>
+#include <sys/sysctl.h>
+#include <miscfs/union/union.h>
+
+#include <vm/vm.h>
+#include <vm/vnode_pager.h>
+
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+
+int uniondebug = 0;
+
+#if UDEBUG_ENABLED
+SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, "");
+#else
+SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, "");
+#endif
+
+static int	union_access __P((struct vop_access_args *ap));
+static int	union_advlock __P((struct vop_advlock_args *ap));
+static int	union_bmap __P((struct vop_bmap_args *ap));
+static int	union_close __P((struct vop_close_args *ap));
+static int	union_create __P((struct vop_create_args *ap));
+static int	union_fsync __P((struct vop_fsync_args *ap));
+static int	union_getattr __P((struct vop_getattr_args *ap));
+static int	union_inactive __P((struct vop_inactive_args *ap));
+static int	union_ioctl __P((struct vop_ioctl_args *ap));
+static int	union_lease __P((struct vop_lease_args *ap));
+static int	union_link __P((struct vop_link_args *ap));
+static int	union_lock __P((struct vop_lock_args *ap));
+static int	union_lookup __P((struct vop_lookup_args *ap));
+static int	union_lookup1 __P((struct vnode *udvp, struct vnode **dvp,
+				   struct vnode **vpp,
+				   struct componentname *cnp));
+static int	union_mkdir __P((struct vop_mkdir_args *ap));
+static int	union_mknod __P((struct vop_mknod_args *ap));
+static int	union_mmap __P((struct vop_mmap_args *ap));
+static int	union_open __P((struct vop_open_args *ap));
+static int	union_pathconf __P((struct vop_pathconf_args *ap));
+static int	union_print __P((struct vop_print_args *ap));
+static int	union_read __P((struct vop_read_args *ap));
+static int	union_readdir __P((struct vop_readdir_args *ap));
+static int	union_readlink __P((struct vop_readlink_args *ap));
+static int	union_reclaim __P((struct vop_reclaim_args *ap));
+static int	union_remove __P((struct vop_remove_args *ap));
+static int	union_rename __P((struct vop_rename_args *ap));
+static int	union_revoke __P((struct vop_revoke_args *ap));
+static int	union_rmdir __P((struct vop_rmdir_args *ap));
+static int	union_poll __P((struct vop_poll_args *ap));
+static int	union_setattr __P((struct vop_setattr_args *ap));
+static int	union_strategy __P((struct vop_strategy_args *ap));
+static int	union_getpages __P((struct vop_getpages_args *ap));
+static int	union_putpages __P((struct vop_putpages_args *ap));
+static int	union_symlink __P((struct vop_symlink_args *ap));
+static int	union_unlock __P((struct vop_unlock_args *ap));
+static int	union_whiteout __P((struct vop_whiteout_args *ap));
+static int	union_write __P((struct vop_read_args *ap));
+
+static __inline
+struct vnode *
+union_lock_upper(struct union_node *un, struct proc *p)
+{
+	struct vnode *uppervp;
+
+	if ((uppervp = un->un_uppervp) != NULL) {
+		VREF(uppervp);
+		vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p);
+	}
+	KASSERT((uppervp == NULL || uppervp->v_usecount > 0), ("uppervp usecount is 0"));
+	return(uppervp);
+}
+
+static __inline
+void
+union_unlock_upper(struct vnode *uppervp, struct proc *p)
+{
+	vput(uppervp);
+}
+
+static __inline
+struct vnode *
+union_lock_other(struct union_node *un, struct proc *p)
+{
+	struct vnode *vp;
+
+	if (un->un_uppervp != NULL) {
+		vp = union_lock_upper(un, p);
+	} else if ((vp = un->un_lowervp) != NULL) {
+		VREF(vp);
+		vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p);
+	}
+	return(vp);
+}
+
+static __inline
+void
+union_unlock_other(struct vnode *vp, struct proc *p)
+{
+	vput(vp);
+}
+
+/*
+ *	union_lookup:
+ *
+ *	udvp	must be exclusively locked on call and will remain 
+ *		exclusively locked on return.  This is the mount point 
+ *		for out filesystem.
+ *
+ *	dvp	Our base directory, locked and referenced.
+ *		The passed dvp will be dereferenced and unlocked on return
+ *		and a new dvp will be returned which is locked and 
+ *		referenced in the same variable.
+ *
+ *	vpp	is filled in with the result if no error occured,
+ *		locked and ref'd.
+ *
+ *		If an error is returned, *vpp is set to NULLVP.  If no
+ *		error occurs, *vpp is returned with a reference and an
+ *		exclusive lock.
+ */
+
+static int
+union_lookup1(udvp, pdvp, vpp, cnp)
+	struct vnode *udvp;
+	struct vnode **pdvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+{
+	int error;
+	struct proc *p = cnp->cn_proc;
+	struct vnode *dvp = *pdvp;
+	struct vnode *tdvp;
+	struct mount *mp;
+
+	/*
+	 * If stepping up the directory tree, check for going
+	 * back across the mount point, in which case do what
+	 * lookup would do by stepping back down the mount
+	 * hierarchy.
+	 */
+	if (cnp->cn_flags & ISDOTDOT) {
+		while ((dvp != udvp) && (dvp->v_flag & VROOT)) {
+			/*
+			 * Don't do the NOCROSSMOUNT check
+			 * at this level.  By definition,
+			 * union fs deals with namespaces, not
+			 * filesystems.
+			 */
+			tdvp = dvp;
+			dvp = dvp->v_mount->mnt_vnodecovered;
+			VREF(dvp);
+			vput(tdvp);
+			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+		}
+	}
+
+	/*
+	 * Set return dvp to be the upperdvp 'parent directory.
+	 */
+	*pdvp = dvp;
+
+	/*
+	 * If the VOP_LOOKUP call generates an error, tdvp is invalid and no
+	 * changes will have been made to dvp, so we are set to return.
+	 */
+
+        error = VOP_LOOKUP(dvp, &tdvp, cnp);
+	if (error) {
+		UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags));
+		*vpp = NULL;
+		return (error);
+	}
+
+	/*
+	 * The parent directory will have been unlocked, unless lookup
+	 * found the last component or if dvp == tdvp (tdvp must be locked).
+	 *
+	 * We want our dvp to remain locked and ref'd.  We also want tdvp
+	 * to remain locked and ref'd.
+	 */
+	UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags));
+
+	if (dvp != tdvp && (cnp->cn_flags & ISLASTCN) == 0)
+		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+
+	/*
+	 * Lastly check if the current node is a mount point in
+	 * which case walk up the mount hierarchy making sure not to
+	 * bump into the root of the mount tree (ie. dvp != udvp).
+	 *
+	 * We use dvp as a temporary variable here, it is no longer related
+	 * to the dvp above.  However, we have to ensure that both *pdvp and
+	 * tdvp are locked on return.
+	 */
+
+	dvp = tdvp;
+	while (
+	    dvp != udvp && 
+	    (dvp->v_type == VDIR) &&
+	    (mp = dvp->v_mountedhere)
+	) {
+		int relock_pdvp = 0;
+
+		if (vfs_busy(mp, 0, 0, p))
+			continue;
+
+		if (dvp == *pdvp)
+			relock_pdvp = 1;
+		vput(dvp);
+		dvp = NULL;
+		error = VFS_ROOT(mp, &dvp);
+
+		vfs_unbusy(mp, p);
+
+		if (relock_pdvp)
+			vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, p);
+
+		if (error) {
+			*vpp = NULL;
+			return (error);
+		}
+	}
+	*vpp = dvp;
+	return (0);
+}
+
+static int
+union_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	int uerror, lerror;
+	struct vnode *uppervp, *lowervp;
+	struct vnode *upperdvp, *lowerdvp;
+	struct vnode *dvp = ap->a_dvp;		/* starting dir */
+	struct union_node *dun = VTOUNION(dvp);	/* associated union node */
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	int lockparent = cnp->cn_flags & LOCKPARENT;
+	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
+	struct ucred *saved_cred = NULL;
+	int iswhiteout;
+	struct vattr va;
+
+	*ap->a_vpp = NULLVP;
+
+	/*
+	 * Disallow write attemps to the filesystem mounted read-only.
+	 */
+	if ((cnp->cn_flags & ISLASTCN) && 
+	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
+		return (EROFS);
+	}
+
+	/*
+	 * For any lookup's we do, always return with the parent locked
+	 */
+	cnp->cn_flags |= LOCKPARENT;
+
+	lowerdvp = dun->un_lowervp;
+	uppervp = NULLVP;
+	lowervp = NULLVP;
+	iswhiteout = 0;
+
+	uerror = ENOENT;
+	lerror = ENOENT;
+
+	/*
+	 * Get a private lock on uppervp and a reference, effectively 
+	 * taking it out of the union_node's control.
+	 *
+	 * We must lock upperdvp while holding our lock on dvp
+	 * to avoid a deadlock.
+	 */
+	upperdvp = union_lock_upper(dun, p);
+
+	/*
+	 * do the lookup in the upper level.
+	 * if that level comsumes additional pathnames,
+	 * then assume that something special is going
+	 * on and just return that vnode.
+	 */
+	if (upperdvp != NULLVP) {
+		/*
+		 * We do not have to worry about the DOTDOT case, we've
+		 * already unlocked dvp.
+		 */
+		UDEBUG(("A %p\n", upperdvp));
+
+		/*
+		 * Do the lookup.   We must supply a locked and referenced
+		 * upperdvp to the function and will get a new locked and
+		 * referenced upperdvp back with the old having been 
+		 * dereferenced.
+		 *
+		 * If an error is returned, uppervp will be NULLVP.  If no
+		 * error occurs, uppervp will be the locked and referenced
+		 * return vnode or possibly NULL, depending on what is being
+		 * requested.  It is possible that the returned uppervp
+		 * will be the same as upperdvp.
+		 */
+		uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp);
+		UDEBUG((
+		    "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n",
+		    uerror,
+		    upperdvp,
+		    upperdvp->v_usecount,
+		    VOP_ISLOCKED(upperdvp, NULL),
+		    uppervp,
+		    (uppervp ? uppervp->v_usecount : -99),
+		    (uppervp ? VOP_ISLOCKED(uppervp, NULL) : -99)
+		));
+
+		/*
+		 * Disallow write attemps to the filesystem mounted read-only.
+		 */
+		if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) &&
+		    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+		    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) {
+			error = EROFS;
+			goto out;
+		}
+
+		/*
+		 * Special case.  If cn_consume != 0 skip out.  The result
+		 * of the lookup is transfered to our return variable.  If
+		 * an error occured we have to throw away the results.
+		 */
+
+		if (cnp->cn_consume != 0) {
+			if ((error = uerror) == 0) {
+				*ap->a_vpp = uppervp;
+				uppervp = NULL;
+			}
+			goto out;
+		}
+
+		/*
+		 * Calculate whiteout, fall through
+		 */
+
+		if (uerror == ENOENT || uerror == EJUSTRETURN) {
+			if (cnp->cn_flags & ISWHITEOUT) {
+				iswhiteout = 1;
+			} else if (lowerdvp != NULLVP) {
+				int terror;
+
+				terror = VOP_GETATTR(upperdvp, &va,
+					cnp->cn_cred, cnp->cn_proc);
+				if (terror == 0 && (va.va_flags & OPAQUE))
+					iswhiteout = 1;
+			}
+		}
+	}
+
+	/*
+	 * in a similar way to the upper layer, do the lookup
+	 * in the lower layer.   this time, if there is some
+	 * component magic going on, then vput whatever we got
+	 * back from the upper layer and return the lower vnode
+	 * instead.
+	 */
+
+	if (lowerdvp != NULLVP && !iswhiteout) {
+		int nameiop;
+
+		UDEBUG(("B %p\n", lowerdvp));
+
+		/*
+		 * Force only LOOKUPs on the lower node, since
+		 * we won't be making changes to it anyway.
+		 */
+		nameiop = cnp->cn_nameiop;
+		cnp->cn_nameiop = LOOKUP;
+		if (um->um_op == UNMNT_BELOW) {
+			saved_cred = cnp->cn_cred;
+			cnp->cn_cred = um->um_cred;
+		}
+
+		/*
+		 * We shouldn't have to worry about locking interactions
+		 * between the lower layer and our union layer (w.r.t.
+		 * `..' processing) because we don't futz with lowervp
+		 * locks in the union-node instantiation code path.
+		 *
+		 * union_lookup1() requires lowervp to be locked on entry,
+		 * and it will be unlocked on return.  The ref count will
+		 * not change.  On return lowervp doesn't represent anything
+		 * to us so we NULL it out.
+		 */
+		VREF(lowerdvp);
+		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p);
+		lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp);
+		if (lowerdvp == lowervp)
+			vrele(lowerdvp);
+		else
+			vput(lowerdvp);
+		lowerdvp = NULL;	/* lowerdvp invalid after vput */
+
+		if (um->um_op == UNMNT_BELOW)
+			cnp->cn_cred = saved_cred;
+		cnp->cn_nameiop = nameiop;
+
+		if (cnp->cn_consume != 0 || lerror == EACCES) {
+			if ((error = lerror) == 0) {
+				*ap->a_vpp = lowervp;
+				lowervp = NULL;
+			}
+			goto out;
+		}
+	} else {
+		UDEBUG(("C %p\n", lowerdvp));
+		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
+			if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) {
+				VREF(lowervp);
+				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p);
+				lerror = 0;
+			}
+		}
+	}
+
+	/*
+	 * Ok.  Now we have uerror, uppervp, upperdvp, lerror, and lowervp.
+	 *
+	 * 1. If both layers returned an error, select the upper layer.
+	 *
+	 * 2. If the upper layer faile and the bottom layer succeeded,
+	 *    two subcases occur:
+	 *
+	 *	a.  The bottom vnode is not a directory, in which case
+	 *	    just return a new union vnode referencing an
+	 *	    empty top layer and the existing bottom layer.
+	 *
+	 *	b.  The button vnode is a directory, in which case
+	 *	    create a new directory in the top layer and
+	 *	    and fall through to case 3.
+	 *
+	 * 3. If the top layer succeeded then return a new union
+	 *    vnode referencing whatever the new top layer and
+	 *    whatever the bottom layer returned.
+	 */
+
+	/* case 1. */
+	if ((uerror != 0) && (lerror != 0)) {
+		error = uerror;
+		goto out;
+	}
+
+	/* case 2. */
+	if (uerror != 0 /* && (lerror == 0) */ ) {
+		if (lowervp->v_type == VDIR) { /* case 2b. */
+			KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL"));
+			/*
+			 * oops, uppervp has a problem, we may have to shadow.
+			 */
+			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
+			if (uerror) {
+				error = uerror;
+				goto out;
+			}
+		}
+	}
+
+	/*
+	 * Must call union_allocvp with both the upper and lower vnodes
+	 * referenced and the upper vnode locked.   ap->a_vpp is returned 
+	 * referenced and locked.  lowervp, uppervp, and upperdvp are 
+	 * absorbed by union_allocvp() whether it succeeds or fails.
+	 *
+	 * upperdvp is the parent directory of uppervp which may be
+	 * different, depending on the path, from dvp->un_uppervp.  That's
+	 * why it is a separate argument.  Note that it must be unlocked.
+	 *
+	 * dvp must be locked on entry to the call and will be locked on
+	 * return.
+	 */
+
+	if (uppervp && uppervp != upperdvp)
+		VOP_UNLOCK(uppervp, 0, p);
+	if (lowervp)
+		VOP_UNLOCK(lowervp, 0, p);
+	if (upperdvp)
+		VOP_UNLOCK(upperdvp, 0, p);
+
+	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
+			      uppervp, lowervp, 1);
+
+	UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? ((*ap->a_vpp)->v_usecount) : -99));
+
+	uppervp = NULL;
+	upperdvp = NULL;
+	lowervp = NULL;
+
+	/* 
+	 *	Termination Code
+	 *
+	 *	- put away any extra junk laying around.  Note that lowervp
+	 *	  (if not NULL) will never be the same as *ap->a_vp and 
+	 *	  neither will uppervp, because when we set that state we 
+	 *	  NULL-out lowervp or uppervp.  On the otherhand, upperdvp
+	 *	  may match uppervp or *ap->a_vpp.
+	 *
+	 *	- relock/unlock dvp if appropriate.
+	 */
+
+out:
+	if (upperdvp) {
+		if (upperdvp == uppervp || upperdvp == *ap->a_vpp)
+			vrele(upperdvp);
+		else
+			vput(upperdvp);
+	}
+
+	if (uppervp)
+		vput(uppervp);
+
+	if (lowervp)
+		vput(lowervp);
+
+	/*
+	 * Restore LOCKPARENT state
+	 */
+
+	if (!lockparent)
+		cnp->cn_flags &= ~LOCKPARENT;
+
+	UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp,
+		((*ap->a_vpp) ? (*ap->a_vpp)->v_usecount : -99),
+		lowervp, uppervp));
+
+	/*
+	 * dvp lock state, determine whether to relock dvp.  dvp is expected
+	 * to be locked on return if:
+	 *
+	 *	- there was an error (except not EJUSTRETURN), or
+	 *	- we hit the last component and lockparent is true
+	 *
+	 * dvp_is_locked is the current state of the dvp lock, not counting
+	 * the possibility that *ap->a_vpp == dvp (in which case it is locked
+	 * anyway).  Note that *ap->a_vpp == dvp only if no error occured.
+	 */
+
+	if (*ap->a_vpp != dvp) {
+		if ((error == 0 || error == EJUSTRETURN) &&
+		    (!lockparent || (cnp->cn_flags & ISLASTCN) == 0)) {
+			VOP_UNLOCK(dvp, 0, p);
+		}
+	}
+
+	/*
+	 * Diagnostics
+	 */
+
+#ifdef DIAGNOSTIC
+	if (cnp->cn_namelen == 1 &&
+	    cnp->cn_nameptr[0] == '.' &&
+	    *ap->a_vpp != dvp) {
+		panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp);
+	}
+#endif
+
+	return (error);
+}
+
+/*
+ * 	union_create:
+ *
+ * a_dvp is locked on entry and remains locked on return.  a_vpp is returned
+ * locked if no error occurs, otherwise it is garbage.
+ */
+
+static int
+union_create(ap)
+	struct vop_create_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	struct vnode *dvp;
+	int error = EROFS;
+
+	if ((dvp = union_lock_upper(dun, p)) != NULL) {
+		struct vnode *vp;
+		struct mount *mp;
+
+		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
+		if (error == 0) {
+			mp = ap->a_dvp->v_mount;
+			VOP_UNLOCK(vp, 0, p);
+			UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vp->v_usecount));
+			error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
+				cnp, vp, NULLVP, 1);
+			UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount));
+		}
+		union_unlock_upper(dvp, p);
+	}
+	return (error);
+}
+
+static int
+union_whiteout(ap)
+	struct vop_whiteout_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+		int a_flags;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	struct componentname *cnp = ap->a_cnp;
+	struct vnode *uppervp;
+	int error = EOPNOTSUPP;
+
+	if ((uppervp = union_lock_upper(un, cnp->cn_proc)) != NULLVP) {
+		error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags);
+		union_unlock_upper(uppervp, cnp->cn_proc);
+	}
+	return(error);
+}
+
+/*
+ * 	union_mknod:
+ *
+ *	a_dvp is locked on entry and should remain locked on return.
+ *	a_vpp is garbagre whether an error occurs or not.
+ */
+
+static int
+union_mknod(ap)
+	struct vop_mknod_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct componentname *cnp = ap->a_cnp;
+	struct vnode *dvp;
+	int error = EROFS;
+
+	if ((dvp = union_lock_upper(dun, cnp->cn_proc)) != NULL) {
+		error = VOP_MKNOD(dvp, ap->a_vpp, cnp, ap->a_vap);
+		union_unlock_upper(dvp, cnp->cn_proc);
+	}
+	return (error);
+}
+
+/*
+ *	union_open:
+ *
+ *	run open VOP.  When opening the underlying vnode we have to mimic
+ *	vn_open.  What we *really* need to do to avoid screwups if the
+ *	open semantics change is to call vn_open().  For example, ufs blows
+ *	up if you open a file but do not vmio it prior to writing.
+ */
+
+static int
+union_open(ap)
+	struct vop_open_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *tvp;
+	int mode = ap->a_mode;
+	struct ucred *cred = ap->a_cred;
+	struct proc *p = ap->a_p;
+	int error = 0;
+	int tvpisupper = 1;
+
+	/*
+	 * If there is an existing upper vp then simply open that.
+	 * The upper vp takes precedence over the lower vp.  When opening
+	 * a lower vp for writing copy it to the uppervp and then open the
+	 * uppervp.
+	 *
+	 * At the end of this section tvp will be left locked.
+	 */
+	if ((tvp = union_lock_upper(un, p)) == NULLVP) {
+		/*
+		 * If the lower vnode is being opened for writing, then
+		 * copy the file contents to the upper vnode and open that,
+		 * otherwise can simply open the lower vnode.
+		 */
+		tvp = un->un_lowervp;
+		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
+			int docopy = !(mode & O_TRUNC);
+			error = union_copyup(un, docopy, cred, p);
+			tvp = union_lock_upper(un, p);
+		} else {
+			un->un_openl++;
+			VREF(tvp);
+			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p);
+			tvpisupper = 0;
+		}
+	}
+
+	/*
+	 * We are holding the correct vnode, open it
+	 */
+
+	if (error == 0)
+		error = VOP_OPEN(tvp, mode, cred, p);
+
+	/*
+	 * Absolutely necessary or UFS will blowup
+	 */
+        if (error == 0 && vn_canvmio(tvp) == TRUE) {
+                error = vfs_object_create(tvp, p, cred);
+        }
+
+	/*
+	 * Release any locks held
+	 */
+	if (tvpisupper) {
+		if (tvp)
+			union_unlock_upper(tvp, p);
+	} else {
+		vput(tvp);
+	}
+	return (error);
+}
+
+/*
+ *	union_close:
+ *
+ *	It is unclear whether a_vp is passed locked or unlocked.  Whatever
+ *	the case we do not change it.
+ */
+
+static int
+union_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *vp;
+
+	if ((vp = un->un_uppervp) == NULLVP) {
+#ifdef UNION_DIAGNOSTIC
+		if (un->un_openl <= 0)
+			panic("union: un_openl cnt");
+#endif
+		--un->un_openl;
+		vp = un->un_lowervp;
+	}
+	ap->a_vp = vp;
+	return (VCALL(vp, VOFFSET(vop_close), ap));
+}
+
+/*
+ * Check access permission on the union vnode.
+ * The access check being enforced is to check
+ * against both the underlying vnode, and any
+ * copied vnode.  This ensures that no additional
+ * file permissions are given away simply because
+ * the user caused an implicit file copy.
+ */
+static int
+union_access(ap)
+	struct vop_access_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct proc *p = ap->a_p;
+	int error = EACCES;
+	struct vnode *vp;
+
+	/*
+	 * Disallow write attempts on filesystems mounted read-only.
+	 */
+	if ((ap->a_mode & VWRITE) && 
+	    (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) {
+		switch (ap->a_vp->v_type) {
+		case VREG: 
+		case VDIR:
+		case VLNK:
+			return (EROFS);
+		default:
+			break;
+		}
+	}
+
+	if ((vp = union_lock_upper(un, p)) != NULLVP) {
+		ap->a_vp = vp;
+		error = VCALL(vp, VOFFSET(vop_access), ap);
+		union_unlock_upper(vp, p);
+		return(error);
+	}
+
+	if ((vp = un->un_lowervp) != NULLVP) {
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+		ap->a_vp = vp;
+
+		/*
+		 * Remove VWRITE from a_mode if our mount point is RW, because
+		 * we want to allow writes and lowervp may be read-only.
+		 */
+		if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0)
+			ap->a_mode &= ~VWRITE;
+
+		error = VCALL(vp, VOFFSET(vop_access), ap);
+		if (error == 0) {
+			struct union_mount *um;
+
+			um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount);
+
+			if (um->um_op == UNMNT_BELOW) {
+				ap->a_cred = um->um_cred;
+				error = VCALL(vp, VOFFSET(vop_access), ap);
+			}
+		}
+		VOP_UNLOCK(vp, 0, p);
+	}
+	return(error);
+}
+
+/*
+ * We handle getattr only to change the fsid and
+ * track object sizes
+ *
+ * It's not clear whether VOP_GETATTR is to be
+ * called with the vnode locked or not.  stat() calls
+ * it with (vp) locked, and fstat calls it with
+ * (vp) unlocked. 
+ *
+ * Because of this we cannot use our normal locking functions
+ * if we do not intend to lock the main a_vp node.  At the moment
+ * we are running without any specific locking at all, but beware
+ * to any programmer that care must be taken if locking is added
+ * to this function.
+ */
+
+static int
+union_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error;
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *vp;
+	struct vattr *vap;
+	struct vattr va;
+
+	/*
+	 * Some programs walk the filesystem hierarchy by counting
+	 * links to directories to avoid stat'ing all the time.
+	 * This means the link count on directories needs to be "correct".
+	 * The only way to do that is to call getattr on both layers
+	 * and fix up the link count.  The link count will not necessarily
+	 * be accurate but will be large enough to defeat the tree walkers.
+	 */
+
+	vap = ap->a_vap;
+
+	if ((vp = un->un_uppervp) != NULLVP) {
+		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+		if (error)
+			return (error);
+		/* XXX isn't this dangerouso without a lock? */
+		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
+	}
+
+	if (vp == NULLVP) {
+		vp = un->un_lowervp;
+	} else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) {
+		vp = un->un_lowervp;
+		vap = &va;
+	} else {
+		vp = NULLVP;
+	}
+
+	if (vp != NULLVP) {
+		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+		if (error)
+			return (error);
+		/* XXX isn't this dangerous without a lock? */
+		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
+	}
+
+	if ((vap != ap->a_vap) && (vap->va_type == VDIR))
+		ap->a_vap->va_nlink += vap->va_nlink;
+	return (0);
+}
+
+static int
+union_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct proc *p = ap->a_p;
+	struct vattr *vap = ap->a_vap;
+	struct vnode *uppervp;
+	int error;
+
+	/*
+	 * Disallow write attempts on filesystems mounted read-only.
+	 */
+	if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) &&
+	    (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
+	     vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
+	     vap->va_mtime.tv_sec != VNOVAL || 
+	     vap->va_mode != (mode_t)VNOVAL)) {
+		return (EROFS);
+	}
+
+	/*
+	 * Handle case of truncating lower object to zero size,
+	 * by creating a zero length upper object.  This is to
+	 * handle the case of open with O_TRUNC and O_CREAT.
+	 */
+	if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) {
+		error = union_copyup(un, (ap->a_vap->va_size != 0),
+			    ap->a_cred, ap->a_p);
+		if (error)
+			return (error);
+	}
+
+	/*
+	 * Try to set attributes in upper layer,
+	 * otherwise return read-only filesystem error.
+	 */
+	error = EROFS;
+	if ((uppervp = union_lock_upper(un, p)) != NULLVP) {
+		error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
+					ap->a_cred, ap->a_p);
+		if ((error == 0) && (ap->a_vap->va_size != VNOVAL))
+			union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL);
+		union_unlock_upper(uppervp, p);
+	}
+	return (error);
+}
+
+/*
+ *	union_getpages:
+ */
+
+static int
+union_getpages(struct vop_getpages_args *ap)
+{
+	int r;
+
+	r = vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 
+		ap->a_count, ap->a_reqpage);
+	return(r);
+}
+
+/*
+ *	union_putpages:
+ */
+
+static int
+union_putpages(struct vop_putpages_args *ap)
+{
+	int r;
+
+	r = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
+		ap->a_sync, ap->a_rtvals);
+	return(r);
+}
+
+static int
+union_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct proc *p = ap->a_uio->uio_procp;
+	struct vnode *uvp;
+	int error;
+
+	uvp = union_lock_other(un, p);
+	KASSERT(uvp != NULL, ("union_read: backing vnode missing!"));
+
+	if (ap->a_vp->v_flag & VOBJBUF)
+		union_vm_coherency(ap->a_vp, ap->a_uio, 0);
+
+	error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+	union_unlock_other(uvp, p);
+
+	/*
+	 * XXX
+	 * perhaps the size of the underlying object has changed under
+	 * our feet.  take advantage of the offset information present
+	 * in the uio structure.
+	 */
+	if (error == 0) {
+		struct union_node *un = VTOUNION(ap->a_vp);
+		off_t cur = ap->a_uio->uio_offset;
+
+		if (uvp == un->un_uppervp) {
+			if (cur > un->un_uppersz)
+				union_newsize(ap->a_vp, cur, VNOVAL);
+		} else {
+			if (cur > un->un_lowersz)
+				union_newsize(ap->a_vp, VNOVAL, cur);
+		}
+	}
+	return (error);
+}
+
+static int
+union_write(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct proc *p = ap->a_uio->uio_procp;
+	struct vnode *uppervp;
+	int error;
+
+	if ((uppervp = union_lock_upper(un, p)) == NULLVP)
+		panic("union: missing upper layer in write");
+
+	/*
+	 * Since our VM pages are associated with our vnode rather then
+	 * the real vnode, and since we do not run our reads and writes 
+	 * through our own VM cache, we have a VM/VFS coherency problem. 
+	 * We solve them by invalidating or flushing the associated VM
+	 * pages prior to allowing a normal read or write to occur.
+	 *
+	 * VM-backed writes (UIO_NOCOPY) have to be converted to normal
+	 * writes because we are not cache-coherent.  Normal writes need
+	 * to be made coherent with our VM-backing store, which we do by
+	 * first flushing any dirty VM pages associated with the write
+	 * range, and then destroying any clean VM pages associated with
+	 * the write range.
+	 */
+
+	if (ap->a_uio->uio_segflg == UIO_NOCOPY) {
+		ap->a_uio->uio_segflg = UIO_SYSSPACE;
+	} else if (ap->a_vp->v_flag & VOBJBUF) {
+		union_vm_coherency(ap->a_vp, ap->a_uio, 1);
+	}
+
+	error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+
+	/*
+	 * the size of the underlying object may be changed by the
+	 * write.
+	 */
+	if (error == 0) {
+		off_t cur = ap->a_uio->uio_offset;
+
+		if (cur > un->un_uppersz)
+			union_newsize(ap->a_vp, cur, VNOVAL);
+	}
+	union_unlock_upper(uppervp, p);
+	return (error);
+}
+
+static int
+union_lease(ap)
+	struct vop_lease_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+		struct ucred *a_cred;
+		int a_flag;
+	} */ *ap;
+{
+	struct vnode *ovp = OTHERVP(ap->a_vp);
+
+	ap->a_vp = ovp;
+	return (VCALL(ovp, VOFFSET(vop_lease), ap));
+}
+
+static int
+union_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *ovp = OTHERVP(ap->a_vp);
+
+	ap->a_vp = ovp;
+	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
+}
+
+static int
+union_poll(ap)
+	struct vop_poll_args /* {
+		struct vnode *a_vp;
+		int  a_events;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *ovp = OTHERVP(ap->a_vp);
+
+	ap->a_vp = ovp;
+	return (VCALL(ovp, VOFFSET(vop_poll), ap));
+}
+
+static int
+union_revoke(ap)
+	struct vop_revoke_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	if (UPPERVP(vp))
+		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
+	if (LOWERVP(vp))
+		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
+	vgone(vp);
+	return (0);
+}
+
+static int
+union_mmap(ap)
+	struct vop_mmap_args /* {
+		struct vnode *a_vp;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *ovp = OTHERVP(ap->a_vp);
+
+	ap->a_vp = ovp;
+	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
+}
+
+static int
+union_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int  a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error = 0;
+	struct proc *p = ap->a_p;
+	struct vnode *targetvp;
+	struct union_node *un = VTOUNION(ap->a_vp);
+
+	if ((targetvp = union_lock_other(un, p)) != NULLVP) {
+		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p);
+		union_unlock_other(targetvp, p);
+	}
+
+	return (error);
+}
+
+/*
+ *	union_remove:
+ *
+ *	Remove the specified cnp.  The dvp and vp are passed to us locked
+ *	and must remain locked on return.
+ */
+
+static int
+union_remove(ap)
+	struct vop_remove_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	struct vnode *uppervp;
+	struct vnode *upperdvp;
+	int error;
+
+	if ((upperdvp = union_lock_upper(dun, p)) == NULLVP)
+		panic("union remove: null upper vnode");
+
+	if ((uppervp = union_lock_upper(un, p)) != NULLVP) {
+		if (union_dowhiteout(un, cnp->cn_cred, p))
+			cnp->cn_flags |= DOWHITEOUT;
+		error = VOP_REMOVE(upperdvp, uppervp, cnp);
+#if 0
+		/* XXX */
+		if (!error)
+			union_removed_upper(un);
+#endif
+		union_unlock_upper(uppervp, p);
+	} else {
+		error = union_mkwhiteout(
+			    MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
+			    upperdvp, ap->a_cnp, un->un_path);
+	}
+	union_unlock_upper(upperdvp, p);
+	return (error);
+}
+
+/*
+ *	union_link:
+ *
+ *	tdvp will be locked on entry, vp will not be locked on entry.
+ *	tdvp should remain locked on return and vp should remain unlocked
+ *	on return.
+ */
+
+static int
+union_link(ap)
+	struct vop_link_args /* {
+		struct vnode *a_tdvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	struct union_node *dun = VTOUNION(ap->a_tdvp);
+	struct vnode *vp;
+	struct vnode *tdvp;
+	int error = 0;
+
+	if (ap->a_tdvp->v_op != ap->a_vp->v_op) {
+		vp = ap->a_vp;
+	} else {
+		struct union_node *tun = VTOUNION(ap->a_vp);
+
+		if (tun->un_uppervp == NULLVP) {
+			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p);
+#if 0
+			if (dun->un_uppervp == tun->un_dirvp) {
+				if (dun->un_flags & UN_ULOCK) {
+					dun->un_flags &= ~UN_ULOCK;
+					VOP_UNLOCK(dun->un_uppervp, 0, p);
+				}
+			}
+#endif
+			error = union_copyup(tun, 1, cnp->cn_cred, p);
+#if 0
+			if (dun->un_uppervp == tun->un_dirvp) {
+				vn_lock(dun->un_uppervp,
+					    LK_EXCLUSIVE | LK_RETRY, p);
+				dun->un_flags |= UN_ULOCK;
+			}
+#endif
+			VOP_UNLOCK(ap->a_vp, 0, p);
+		}
+		vp = tun->un_uppervp;
+	}
+
+	if (error)
+		return (error);
+
+	/*
+	 * Make sure upper is locked, then unlock the union directory we were 
+	 * called with to avoid a deadlock while we are calling VOP_LINK on 
+	 * the upper (with tdvp locked and vp not locked).  Our ap->a_tdvp
+	 * is expected to be locked on return.
+	 */
+
+	if ((tdvp = union_lock_upper(dun, p)) == NULLVP)
+		return (EROFS);
+
+	VOP_UNLOCK(ap->a_tdvp, 0, p);		/* unlock calling node */
+	error = VOP_LINK(tdvp, vp, cnp);	/* call link on upper */
+
+	/*
+	 * We have to unlock tdvp prior to relocking our calling node in
+	 * order to avoid a deadlock.
+	 */
+	union_unlock_upper(tdvp, p);
+	vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, p);
+	return (error);
+}
+
+static int
+union_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	int error;
+	struct vnode *fdvp = ap->a_fdvp;
+	struct vnode *fvp = ap->a_fvp;
+	struct vnode *tdvp = ap->a_tdvp;
+	struct vnode *tvp = ap->a_tvp;
+
+	/*
+	 * Figure out what fdvp to pass to our upper or lower vnode.  If we
+	 * replace the fdvp, release the original one and ref the new one.
+	 */
+
+	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
+		struct union_node *un = VTOUNION(fdvp);
+		if (un->un_uppervp == NULLVP) {
+			/*
+			 * this should never happen in normal
+			 * operation but might if there was
+			 * a problem creating the top-level shadow
+			 * directory.
+			 */
+			error = EXDEV;
+			goto bad;
+		}
+		fdvp = un->un_uppervp;
+		VREF(fdvp);
+		vrele(ap->a_fdvp);
+	}
+
+	/*
+	 * Figure out what fvp to pass to our upper or lower vnode.  If we
+	 * replace the fvp, release the original one and ref the new one.
+	 */
+
+	if (fvp->v_op == union_vnodeop_p) {	/* always true */
+		struct union_node *un = VTOUNION(fvp);
+#if 0
+		struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount);
+#endif
+
+		if (un->un_uppervp == NULLVP) {
+			switch(fvp->v_type) {
+			case VREG:
+				vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc);
+				error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_proc);
+				VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_proc);
+				if (error)
+					goto bad;
+				break;
+			case VDIR:
+				/*
+				 * XXX not yet.
+				 *
+				 * There is only one way to rename a directory
+				 * based in the lowervp, and that is to copy
+				 * the entire directory hierarchy.  Otherwise
+				 * it would not last across a reboot.
+				 */
+#if 0
+				vrele(fvp);
+				fvp = NULL;
+				vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc);
+				error = union_mkshadow(um, fdvp, 
+					    ap->a_fcnp, &un->un_uppervp);
+				VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_proc);
+				if (un->un_uppervp)
+					VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_proc);
+				if (error)
+					goto bad;
+				break;
+#endif
+			default:
+				error = EXDEV;
+				goto bad;
+			}
+		}
+
+		if (un->un_lowervp != NULLVP)
+			ap->a_fcnp->cn_flags |= DOWHITEOUT;
+		fvp = un->un_uppervp;
+		VREF(fvp);
+		vrele(ap->a_fvp);
+	}
+
+	/*
+	 * Figure out what tdvp (destination directory) to pass to the
+	 * lower level.  If we replace it with uppervp, we need to vput the 
+	 * old one.  The exclusive lock is transfered to what we will pass
+	 * down in the VOP_RENAME and we replace uppervp with a simple
+	 * reference.
+	 */
+
+	if (tdvp->v_op == union_vnodeop_p) {
+		struct union_node *un = VTOUNION(tdvp);
+
+		if (un->un_uppervp == NULLVP) {
+			/*
+			 * this should never happen in normal
+			 * operation but might if there was
+			 * a problem creating the top-level shadow
+			 * directory.
+			 */
+			error = EXDEV;
+			goto bad;
+		}
+
+		/*
+		 * new tdvp is a lock and reference on uppervp, put away
+		 * the old tdvp.
+		 */
+		tdvp = union_lock_upper(un, ap->a_tcnp->cn_proc);
+		vput(ap->a_tdvp);
+	}
+
+	/*
+	 * Figure out what tvp (destination file) to pass to the
+	 * lower level.
+	 *
+	 * If the uppervp file does not exist put away the (wrong)
+	 * file and change tvp to NULL.
+	 */
+
+	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
+		struct union_node *un = VTOUNION(tvp);
+
+		tvp = union_lock_upper(un, ap->a_tcnp->cn_proc);
+		vput(ap->a_tvp);
+		/* note: tvp may be NULL */
+	}
+
+	/*
+	 * VOP_RENAME releases/vputs prior to returning, so we have no
+	 * cleanup to do.
+	 */
+
+	return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
+
+	/*
+	 * Error.  We still have to release / vput the various elements.
+	 */
+
+bad:
+	vrele(fdvp);
+	if (fvp)
+		vrele(fvp);
+	vput(tdvp);
+	if (tvp != NULLVP) {
+		if (tvp != tdvp)
+			vput(tvp);
+		else
+			vrele(tvp);
+	}
+	return (error);
+}
+
+static int
+union_mkdir(ap)
+	struct vop_mkdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	struct vnode *upperdvp;
+	int error = EROFS;
+
+	if ((upperdvp = union_lock_upper(dun, p)) != NULLVP) {
+		struct vnode *vp;
+
+		error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap);
+		union_unlock_upper(upperdvp, p);
+
+		if (error == 0) {
+			VOP_UNLOCK(vp, 0, p);
+			UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vp->v_usecount));
+			error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount,
+				ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1);
+			UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount));
+		}
+	}
+	return (error);
+}
+
+static int
+union_rmdir(ap)
+	struct vop_rmdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	struct vnode *upperdvp;
+	struct vnode *uppervp;
+	int error;
+
+	if ((upperdvp = union_lock_upper(dun, p)) == NULLVP)
+		panic("union rmdir: null upper vnode");
+
+	if ((uppervp = union_lock_upper(un, p)) != NULLVP) {
+		if (union_dowhiteout(un, cnp->cn_cred, p))
+			cnp->cn_flags |= DOWHITEOUT;
+		error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp);
+		union_unlock_upper(uppervp, p);
+	} else {
+		error = union_mkwhiteout(
+			    MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
+			    dun->un_uppervp, ap->a_cnp, un->un_path);
+	}
+	union_unlock_upper(upperdvp, p);
+	return (error);
+}
+
+/*
+ *	union_symlink:
+ *
+ *	dvp is locked on entry and remains locked on return.  a_vpp is garbage
+ *	(unused).
+ */
+
+static int
+union_symlink(ap)
+	struct vop_symlink_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+		char *a_target;
+	} */ *ap;
+{
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	struct vnode *dvp;
+	int error = EROFS;
+
+	if ((dvp = union_lock_upper(dun, p)) != NULLVP) {
+		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
+			    ap->a_target);
+		union_unlock_upper(dvp, p);
+	}
+	return (error);
+}
+
+/*
+ * union_readdir works in concert with getdirentries and
+ * readdir(3) to provide a list of entries in the unioned
+ * directories.  getdirentries is responsible for walking
+ * down the union stack.  readdir(3) is responsible for
+ * eliminating duplicate names from the returned data stream.
+ */
+static int
+union_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+		int *a_eofflag;
+		u_long *a_cookies;
+		int a_ncookies;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct proc *p = ap->a_uio->uio_procp;
+	struct vnode *uvp;
+	int error = 0;
+
+	if ((uvp = union_lock_upper(un, p)) != NULLVP) {
+		ap->a_vp = uvp;
+		error = VCALL(uvp, VOFFSET(vop_readdir), ap);
+		union_unlock_upper(uvp, p);
+	}
+	return(error);
+}
+
+static int
+union_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error;
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct uio *uio = ap->a_uio;
+	struct proc *p = uio->uio_procp;
+	struct vnode *vp;
+
+	vp = union_lock_other(un, p);
+	KASSERT(vp != NULL, ("union_readlink: backing vnode missing!"));
+
+	ap->a_vp = vp;
+	error = VCALL(vp, VOFFSET(vop_readlink), ap);
+	union_unlock_other(vp, p);
+
+	return (error);
+}
+
+/*
+ *	union_inactive:
+ *
+ *	Called with the vnode locked.  We are expected to unlock the vnode.
+ */
+
+static int
+union_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct proc *p = ap->a_p;
+	struct union_node *un = VTOUNION(vp);
+	struct vnode **vpp;
+
+	/*
+	 * Do nothing (and _don't_ bypass).
+	 * Wait to vrele lowervp until reclaim,
+	 * so that until then our union_node is in the
+	 * cache and reusable.
+	 *
+	 * NEEDSWORK: Someday, consider inactive'ing
+	 * the lowervp and then trying to reactivate it
+	 * with capabilities (v_id)
+	 * like they do in the name lookup cache code.
+	 * That's too much work for now.
+	 */
+
+	if (un->un_dircache != 0) {
+		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
+			vrele(*vpp);
+		free (un->un_dircache, M_TEMP);
+		un->un_dircache = 0;
+	}
+
+#if 0
+	if ((un->un_flags & UN_ULOCK) && un->un_uppervp) {
+		un->un_flags &= ~UN_ULOCK;
+		VOP_UNLOCK(un->un_uppervp, 0, p);
+	}
+#endif
+
+	VOP_UNLOCK(vp, 0, p);
+
+	if ((un->un_flags & UN_CACHED) == 0)
+		vgone(vp);
+
+	return (0);
+}
+
+static int
+union_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	union_freevp(ap->a_vp);
+
+	return (0);
+}
+
+static int
+union_lock(ap)
+	struct vop_lock_args *ap;
+{
+#if 0
+	struct vnode *vp = ap->a_vp;
+	struct proc *p = ap->a_p;
+	int flags = ap->a_flags;
+	struct union_node *un;
+#endif
+	int error;
+
+	error = vop_stdlock(ap);
+#if 0
+	un = VTOUNION(vp);
+
+	if (error == 0) {
+		/*
+		 * Lock the upper if it exists and this is an exclusive lock
+		 * request.
+		 */
+		if (un->un_uppervp != NULLVP && 
+		    (flags & LK_TYPE_MASK) == LK_EXCLUSIVE) {
+			if ((un->un_flags & UN_ULOCK) == 0 && vp->v_usecount) {
+				error = vn_lock(un->un_uppervp, flags, p);
+				if (error) {
+					struct vop_unlock_args uap = { 0 };
+					uap.a_vp = ap->a_vp;
+					uap.a_flags = ap->a_flags;
+					uap.a_p = ap->a_p;
+					vop_stdunlock(&uap);
+					return (error);
+				}
+				un->un_flags |= UN_ULOCK;
+			}
+		}
+	}
+#endif
+	return (error);
+}
+
+/*
+ *	union_unlock:
+ *
+ *	Unlock our union node.  This also unlocks uppervp.  
+ */
+static int
+union_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	int error;
+
+	KASSERT((un->un_uppervp == NULL || un->un_uppervp->v_usecount > 0), ("uppervp usecount is 0"));
+
+	error = vop_stdunlock(ap);
+#if 0
+
+	/*
+	 * If no exclusive locks remain and we are holding an uppervp lock,
+	 * remove the uppervp lock.
+	 */
+
+	if ((un->un_flags & UN_ULOCK) && 
+	    lockstatus(&un->un_lock, NULL) != LK_EXCLUSIVE) {
+		un->un_flags &= ~UN_ULOCK;
+		VOP_UNLOCK(un->un_uppervp, LK_EXCLUSIVE, p);
+	}
+#endif
+	return(error);
+}
+
+/*
+ *	union_bmap:
+ *
+ *	There isn't much we can do.  We cannot push through to the real vnode
+ *	to get to the underlying device because this will bypass data
+ *	cached by the real vnode.
+ *
+ *	For some reason we cannot return the 'real' vnode either, it seems
+ *	to blow up memory maps.
+ */
+
+static int
+union_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+		int *a_runb;
+	} */ *ap;
+{
+	return(EOPNOTSUPP);
+}
+
+static int
+union_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
+			vp, UPPERVP(vp), LOWERVP(vp));
+	if (UPPERVP(vp) != NULLVP)
+		vprint("union: upper", UPPERVP(vp));
+	if (LOWERVP(vp) != NULLVP)
+		vprint("union: lower", LOWERVP(vp));
+
+	return (0);
+}
+
+static int
+union_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+	int error;
+	struct proc *p = curproc;		/* XXX */
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *vp;
+
+	vp = union_lock_other(un, p);
+	KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!"));
+
+	ap->a_vp = vp;
+	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
+	union_unlock_other(vp, p);
+
+	return (error);
+}
+
+static int
+union_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+	register struct vnode *ovp = OTHERVP(ap->a_vp);
+
+	ap->a_vp = ovp;
+	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * YYY - and it is not coherent with anything
+ *
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+static int
+union_strategy(ap)
+	struct vop_strategy_args /* {
+		struct vnode *a_vp;
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	struct vnode *othervp = OTHERVP(bp->b_vp);
+
+#ifdef DIAGNOSTIC
+	if (othervp == NULLVP)
+		panic("union_strategy: nil vp");
+	if (((bp->b_flags & B_READ) == 0) &&
+	    (othervp == LOWERVP(bp->b_vp)))
+		panic("union_strategy: writing to lowervp");
+#endif
+	return (VOP_STRATEGY(othervp, bp));
+}
+
+/*
+ * Global vfs data structures
+ */
+vop_t **union_vnodeop_p;
+static struct vnodeopv_entry_desc union_vnodeop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) vop_defaultop },
+	{ &vop_access_desc,		(vop_t *) union_access },
+	{ &vop_advlock_desc,		(vop_t *) union_advlock },
+	{ &vop_bmap_desc,		(vop_t *) union_bmap },
+	{ &vop_close_desc,		(vop_t *) union_close },
+	{ &vop_create_desc,		(vop_t *) union_create },
+	{ &vop_fsync_desc,		(vop_t *) union_fsync },
+	{ &vop_getpages_desc,		(vop_t *) union_getpages },
+	{ &vop_putpages_desc,		(vop_t *) union_putpages },
+	{ &vop_getattr_desc,		(vop_t *) union_getattr },
+	{ &vop_inactive_desc,		(vop_t *) union_inactive },
+	{ &vop_ioctl_desc,		(vop_t *) union_ioctl },
+	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
+	{ &vop_lease_desc,		(vop_t *) union_lease },
+	{ &vop_link_desc,		(vop_t *) union_link },
+	{ &vop_lock_desc,		(vop_t *) union_lock },
+	{ &vop_lookup_desc,		(vop_t *) union_lookup },
+	{ &vop_mkdir_desc,		(vop_t *) union_mkdir },
+	{ &vop_mknod_desc,		(vop_t *) union_mknod },
+	{ &vop_mmap_desc,		(vop_t *) union_mmap },
+	{ &vop_open_desc,		(vop_t *) union_open },
+	{ &vop_pathconf_desc,		(vop_t *) union_pathconf },
+	{ &vop_poll_desc,		(vop_t *) union_poll },
+	{ &vop_print_desc,		(vop_t *) union_print },
+	{ &vop_read_desc,		(vop_t *) union_read },
+	{ &vop_readdir_desc,		(vop_t *) union_readdir },
+	{ &vop_readlink_desc,		(vop_t *) union_readlink },
+	{ &vop_reclaim_desc,		(vop_t *) union_reclaim },
+	{ &vop_remove_desc,		(vop_t *) union_remove },
+	{ &vop_rename_desc,		(vop_t *) union_rename },
+	{ &vop_revoke_desc,		(vop_t *) union_revoke },
+	{ &vop_rmdir_desc,		(vop_t *) union_rmdir },
+	{ &vop_setattr_desc,		(vop_t *) union_setattr },
+	{ &vop_strategy_desc,		(vop_t *) union_strategy },
+	{ &vop_symlink_desc,		(vop_t *) union_symlink },
+	{ &vop_unlock_desc,		(vop_t *) union_unlock },
+	{ &vop_whiteout_desc,		(vop_t *) union_whiteout },
+	{ &vop_write_desc,		(vop_t *) union_write },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc union_vnodeop_opv_desc =
+	{ &union_vnodeop_p, union_vnodeop_entries };
+
+VNODEOP_SET(union_vnodeop_opv_desc);