diff options
Diffstat (limited to 'sys/fs/unionfs')
-rw-r--r-- | sys/fs/unionfs/union.h | 155 | ||||
-rw-r--r-- | sys/fs/unionfs/union_subr.c | 1373 | ||||
-rw-r--r-- | sys/fs/unionfs/union_vfsops.c | 509 | ||||
-rw-r--r-- | sys/fs/unionfs/union_vnops.c | 1981 |
4 files changed, 4018 insertions, 0 deletions
diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h new file mode 100644 index 0000000..3fb0b6e --- /dev/null +++ b/sys/fs/unionfs/union.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 1994 The Regents of the University of California. + * Copyright (c) 1994 Jan-Simon Pendry. + * All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)union.h 8.9 (Berkeley) 12/10/94 + * $FreeBSD$ + */ + +struct union_args { + char *target; /* Target of loopback */ + int mntflags; /* Options on the mount */ +}; + +#define UNMNT_ABOVE 0x0001 /* Target appears below mount point */ +#define UNMNT_BELOW 0x0002 /* Target appears below mount point */ +#define UNMNT_REPLACE 0x0003 /* Target replaces mount point */ +#define UNMNT_OPMASK 0x0003 + +struct union_mount { + struct vnode *um_uppervp; /* UN_ULOCK holds locking state */ + struct vnode *um_lowervp; /* Left unlocked */ + struct ucred *um_cred; /* Credentials of user calling mount */ + int um_cmode; /* cmask from mount process */ + int um_op; /* Operation mode */ +}; + +#ifdef KERNEL + +#ifndef DIAGNOSTIC +#define DIAGNOSTIC +#endif + +/* + * DEFDIRMODE is the mode bits used to create a shadow directory. + */ +#define VRWXMODE (VREAD|VWRITE|VEXEC) +#define VRWMODE (VREAD|VWRITE) +#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6)) +#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6)) + +/* + * A cache of vnode references (hangs off v_data) + * + * Placing un_lock as the first elements theoretically allows us to + * use the vop_stdlock functions. However, we need to make sure of + * certain side effects so we will still punch in our own code. + */ +struct union_node { + struct lock un_lock; + LIST_ENTRY(union_node) un_cache; /* Hash chain */ + struct vnode *un_vnode; /* Back pointer */ + struct vnode *un_uppervp; /* overlaying object */ + struct vnode *un_lowervp; /* underlying object */ + struct vnode *un_dirvp; /* Parent dir of uppervp */ + struct vnode *un_pvp; /* Parent vnode */ + char *un_path; /* saved component name */ + int un_openl; /* # of opens on lowervp */ + int un_exclcnt; /* exclusive count */ + unsigned int un_flags; + struct vnode **un_dircache; /* cached union stack */ + off_t un_uppersz; /* size of upper object */ + off_t un_lowersz; /* size of lower object */ +#ifdef DIAGNOSTIC + pid_t un_pid; +#endif +}; + +/* + * XXX UN_ULOCK - indicates that the uppervp is locked + * + * UN_CACHED - node is in the union cache + */ + +/*#define UN_ULOCK 0x04*/ /* Upper node is locked */ +#define UN_CACHED 0x10 /* In union cache */ + +/* + * Hash table locking flags + */ + +#define UNVP_WANT 0x01 +#define UNVP_LOCKED 0x02 + +extern int union_allocvp __P((struct vnode **, struct mount *, + struct vnode *, + struct vnode *, + struct componentname *, struct vnode *, + struct vnode *, int)); +extern int union_freevp __P((struct vnode *)); +extern struct vnode *union_dircache __P((struct vnode *, struct proc *)); +extern int union_copyup __P((struct union_node *, int, struct ucred *, + struct proc *)); +extern int union_dowhiteout __P((struct union_node *, struct ucred *, + struct proc *)); +extern int union_mkshadow __P((struct union_mount *, struct vnode *, + struct componentname *, struct vnode **)); +extern int union_mkwhiteout __P((struct union_mount *, struct vnode *, + struct componentname *, char *)); +extern int union_cn_close __P((struct vnode *, int, struct ucred *, + struct proc *)); +extern void union_removed_upper __P((struct union_node *un)); +extern struct vnode *union_lowervp __P((struct vnode *)); +extern void union_newsize __P((struct vnode *, off_t, off_t)); +extern void union_vm_coherency __P((struct vnode *, struct uio *, int)); + +extern int (*union_dircheckp) __P((struct proc *, struct vnode **, + struct file *)); + +#define MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data)) +#define VTOUNION(vp) ((struct union_node *)(vp)->v_data) +#define UNIONTOV(un) ((un)->un_vnode) +#define LOWERVP(vp) (VTOUNION(vp)->un_lowervp) +#define UPPERVP(vp) (VTOUNION(vp)->un_uppervp) +#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp)) + +#define UDEBUG(x) if (uniondebug) printf x +#define UDEBUG_ENABLED 1 + +extern vop_t **union_vnodeop_p; +extern struct vfsops union_vfsops; +extern int uniondebug; + +#endif /* KERNEL */ diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c new file mode 100644 index 0000000..6b88bef --- /dev/null +++ b/sys/fs/unionfs/union_subr.c @@ -0,0 +1,1373 @@ +/* + * Copyright (c) 1994 Jan-Simon Pendry + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/malloc.h> +#include <sys/fcntl.h> +#include <sys/file.h> +#include <sys/filedesc.h> +#include <sys/module.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> /* for vnode_pager_setsize */ +#include <vm/vm_zone.h> +#include <vm/vm_object.h> /* for vm cache coherency */ +#include <miscfs/union/union.h> + +#include <sys/proc.h> + +extern int union_init __P((void)); + +/* must be power of two, otherwise change UNION_HASH() */ +#define NHASH 32 + +/* unsigned int ... */ +#define UNION_HASH(u, l) \ + (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1)) + +static LIST_HEAD(unhead, union_node) unhead[NHASH]; +static int unvplock[NHASH]; + +static void union_dircache_r __P((struct vnode *vp, struct vnode ***vppp, + int *cntp)); +static int union_list_lock __P((int ix)); +static void union_list_unlock __P((int ix)); +static int union_relookup __P((struct union_mount *um, struct vnode *dvp, + struct vnode **vpp, + struct componentname *cnp, + struct componentname *cn, char *path, + int pathlen)); +static void union_updatevp __P((struct union_node *un, + struct vnode *uppervp, + struct vnode *lowervp)); +static void union_newlower __P((struct union_node *, struct vnode *)); +static void union_newupper __P((struct union_node *, struct vnode *)); +static int union_copyfile __P((struct vnode *, struct vnode *, + struct ucred *, struct proc *)); +static int union_vn_create __P((struct vnode **, struct union_node *, + struct proc *)); +static int union_vn_close __P((struct vnode *, int, struct ucred *, + struct proc *)); + +int +union_init() +{ + int i; + + for (i = 0; i < NHASH; i++) + LIST_INIT(&unhead[i]); + bzero((caddr_t)unvplock, sizeof(unvplock)); + return (0); +} + +static int +union_list_lock(ix) + int ix; +{ + if (unvplock[ix] & UNVP_LOCKED) { + unvplock[ix] |= UNVP_WANT; + (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); + return (1); + } + unvplock[ix] |= UNVP_LOCKED; + return (0); +} + +static void +union_list_unlock(ix) + int ix; +{ + unvplock[ix] &= ~UNVP_LOCKED; + + if (unvplock[ix] & UNVP_WANT) { + unvplock[ix] &= ~UNVP_WANT; + wakeup((caddr_t) &unvplock[ix]); + } +} + +/* + * union_updatevp: + * + * The uppervp, if not NULL, must be referenced and not locked by us + * The lowervp, if not NULL, must be referenced. + * + * if uppervp and lowervp match pointers already installed, nothing + * happens. The passed vp's (when matching) are not adjusted. This + * routine may only be called by union_newupper() and union_newlower(). + */ + +static void +union_updatevp(un, uppervp, lowervp) + struct union_node *un; + struct vnode *uppervp; + struct vnode *lowervp; +{ + int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); + int nhash = UNION_HASH(uppervp, lowervp); + int docache = (lowervp != NULLVP || uppervp != NULLVP); + int lhash, uhash; + + /* + * Ensure locking is ordered from lower to higher + * to avoid deadlocks. + */ + if (nhash < ohash) { + lhash = nhash; + uhash = ohash; + } else { + lhash = ohash; + uhash = nhash; + } + + if (lhash != uhash) { + while (union_list_lock(lhash)) + continue; + } + + while (union_list_lock(uhash)) + continue; + + if (ohash != nhash || !docache) { + if (un->un_flags & UN_CACHED) { + un->un_flags &= ~UN_CACHED; + LIST_REMOVE(un, un_cache); + } + } + + if (ohash != nhash) + union_list_unlock(ohash); + + if (un->un_lowervp != lowervp) { + if (un->un_lowervp) { + vrele(un->un_lowervp); + if (un->un_path) { + free(un->un_path, M_TEMP); + un->un_path = 0; + } + } + un->un_lowervp = lowervp; + un->un_lowersz = VNOVAL; + } + + if (un->un_uppervp != uppervp) { + if (un->un_uppervp) + vrele(un->un_uppervp); + un->un_uppervp = uppervp; + un->un_uppersz = VNOVAL; + } + + if (docache && (ohash != nhash)) { + LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); + un->un_flags |= UN_CACHED; + } + + union_list_unlock(nhash); +} + +/* + * Set a new lowervp. The passed lowervp must be referenced and will be + * stored in the vp in a referenced state. + */ + +static void +union_newlower(un, lowervp) + struct union_node *un; + struct vnode *lowervp; +{ + union_updatevp(un, un->un_uppervp, lowervp); +} + +/* + * Set a new uppervp. The passed uppervp must be locked and will be + * stored in the vp in a locked state. The caller should not unlock + * uppervp. + */ + +static void +union_newupper(un, uppervp) + struct union_node *un; + struct vnode *uppervp; +{ + union_updatevp(un, uppervp, un->un_lowervp); +} + +/* + * Keep track of size changes in the underlying vnodes. + * If the size changes, then callback to the vm layer + * giving priority to the upper layer size. + */ +void +union_newsize(vp, uppersz, lowersz) + struct vnode *vp; + off_t uppersz, lowersz; +{ + struct union_node *un; + off_t sz; + + /* only interested in regular files */ + if (vp->v_type != VREG) + return; + + un = VTOUNION(vp); + sz = VNOVAL; + + if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { + un->un_uppersz = uppersz; + if (sz == VNOVAL) + sz = un->un_uppersz; + } + + if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { + un->un_lowersz = lowersz; + if (sz == VNOVAL) + sz = un->un_lowersz; + } + + if (sz != VNOVAL) { + UDEBUG(("union: %s size now %ld\n", + (uppersz != VNOVAL ? "upper" : "lower"), (long)sz)); + vnode_pager_setsize(vp, sz); + } +} + +/* + * union_allocvp: allocate a union_node and associate it with a + * parent union_node and one or two vnodes. + * + * vpp Holds the returned vnode locked and referenced if no + * error occurs. + * + * mp Holds the mount point. mp may or may not be busied. + * allocvp makes no changes to mp. + * + * dvp Holds the parent union_node to the one we wish to create. + * XXX may only be used to traverse an uncopied lowervp-based + * tree? XXX + * + * dvp may or may not be locked. allocvp makes no changes + * to dvp. + * + * upperdvp Holds the parent vnode to uppervp, generally used along + * with path component information to create a shadow of + * lowervp when uppervp does not exist. + * + * upperdvp is referenced but unlocked on entry, and will be + * dereferenced on return. + * + * uppervp Holds the new uppervp vnode to be stored in the + * union_node we are allocating. uppervp is referenced but + * not locked, and will be dereferenced on return. + * + * lowervp Holds the new lowervp vnode to be stored in the + * union_node we are allocating. uppervp is referenced but + * not locked, and will be dereferenced on return. + * + * cnp Holds path component information to be coupled with + * lowervp and upperdvp to allow unionfs to create an uppervp + * later on. Only used if lowervp is valid. The conents + * of cnp is only valid for the duration of the call. + * + * docache Determine whether this node should be entered in the + * cache or whether it should be destroyed as soon as possible. + * + * all union_nodes are maintained on a singly-linked + * list. new nodes are only allocated when they cannot + * be found on this list. entries on the list are + * removed when the vfs reclaim entry is called. + * + * a single lock is kept for the entire list. this is + * needed because the getnewvnode() function can block + * waiting for a vnode to become free, in which case there + * may be more than one process trying to get the same + * vnode. this lock is only taken if we are going to + * call getnewvnode, since the kernel itself is single-threaded. + * + * if an entry is found on the list, then call vget() to + * take a reference. this is done because there may be + * zero references to it and so it needs to removed from + * the vnode free list. + */ + +int +union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache) + struct vnode **vpp; + struct mount *mp; + struct vnode *dvp; /* parent union vnode */ + struct vnode *upperdvp; /* parent vnode of uppervp */ + struct componentname *cnp; /* may be null */ + struct vnode *uppervp; /* may be null */ + struct vnode *lowervp; /* may be null */ + int docache; +{ + int error; + struct union_node *un = 0; + struct vnode *xlowervp = NULLVP; + struct union_mount *um = MOUNTTOUNIONMOUNT(mp); + struct proc *p = (cnp) ? cnp->cn_proc : curproc; + int hash = 0; + int vflag; + int try; + + if (uppervp == NULLVP && lowervp == NULLVP) + panic("union: unidentifiable allocation"); + + if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { + xlowervp = lowervp; + lowervp = NULLVP; + } + + /* detect the root vnode (and aliases) */ + vflag = 0; + if ((uppervp == um->um_uppervp) && + ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { + if (lowervp == NULLVP) { + lowervp = um->um_lowervp; + if (lowervp != NULLVP) + VREF(lowervp); + } + vflag = VROOT; + } + +loop: + if (!docache) { + un = 0; + } else for (try = 0; try < 3; try++) { + switch (try) { + case 0: + if (lowervp == NULLVP) + continue; + hash = UNION_HASH(uppervp, lowervp); + break; + + case 1: + if (uppervp == NULLVP) + continue; + hash = UNION_HASH(uppervp, NULLVP); + break; + + case 2: + if (lowervp == NULLVP) + continue; + hash = UNION_HASH(NULLVP, lowervp); + break; + } + + while (union_list_lock(hash)) + continue; + + for (un = unhead[hash].lh_first; un != 0; + un = un->un_cache.le_next) { + if ((un->un_lowervp == lowervp || + un->un_lowervp == NULLVP) && + (un->un_uppervp == uppervp || + un->un_uppervp == NULLVP) && + (UNIONTOV(un)->v_mount == mp)) { + if (vget(UNIONTOV(un), 0, + cnp ? cnp->cn_proc : NULL)) { + union_list_unlock(hash); + goto loop; + } + break; + } + } + + union_list_unlock(hash); + + if (un) + break; + } + + if (un) { + /* + * Obtain a lock on the union_node. Everything is unlocked + * except for dvp, so check that case. If they match, our + * new un is already locked. Otherwise we have to lock our + * new un. + * + * A potential deadlock situation occurs when we are holding + * one lock while trying to get another. We must follow + * strict ordering rules to avoid it. We try to locate dvp + * by scanning up from un_vnode, since the most likely + * scenario is un being under dvp. + */ + + if (dvp && un->un_vnode != dvp) { + struct vnode *scan = un->un_vnode; + + do { + scan = VTOUNION(scan)->un_pvp; + } while (scan && scan->v_tag == VT_UNION && scan != dvp); + if (scan != dvp) { + /* + * our new un is above dvp (we never saw dvp + * while moving up the tree). + */ + VREF(dvp); + VOP_UNLOCK(dvp, 0, p); + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + vrele(dvp); + } else { + /* + * our new un is under dvp + */ + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + } + } else if (dvp == NULLVP) { + /* + * dvp is NULL, we need to lock un. + */ + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + } else { + /* + * dvp == un->un_vnode, we are already locked. + */ + error = 0; + } + + if (error) + goto loop; + + /* + * At this point, the union_node is locked and referenced. + * + * uppervp is locked and referenced or NULL, lowervp is + * referenced or NULL. + */ + UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n", + un, un->un_vnode, un->un_uppervp, + (un->un_uppervp ? un->un_uppervp->v_usecount : -99), + uppervp, + (uppervp ? uppervp->v_usecount : -99) + )); + + if (uppervp != un->un_uppervp) { + KASSERT(uppervp == NULL || uppervp->v_usecount > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", uppervp->v_usecount)); + union_newupper(un, uppervp); + } else if (uppervp) { + KASSERT(uppervp->v_usecount > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", uppervp->v_usecount)); + vrele(uppervp); + } + + /* + * Save information about the lower layer. + * This needs to keep track of pathname + * and directory information which union_vn_create + * might need. + */ + if (lowervp != un->un_lowervp) { + union_newlower(un, lowervp); + if (cnp && (lowervp != NULLVP)) { + un->un_path = malloc(cnp->cn_namelen+1, + M_TEMP, M_WAITOK); + bcopy(cnp->cn_nameptr, un->un_path, + cnp->cn_namelen); + un->un_path[cnp->cn_namelen] = '\0'; + } + } else if (lowervp) { + vrele(lowervp); + } + + /* + * and upperdvp + */ + if (upperdvp != un->un_dirvp) { + if (un->un_dirvp) + vrele(un->un_dirvp); + un->un_dirvp = upperdvp; + } else if (upperdvp) { + vrele(upperdvp); + } + + *vpp = UNIONTOV(un); + return (0); + } + + if (docache) { + /* + * otherwise lock the vp list while we call getnewvnode + * since that can block. + */ + hash = UNION_HASH(uppervp, lowervp); + + if (union_list_lock(hash)) + goto loop; + } + + /* + * Create new node rather then replace old node + */ + + error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); + if (error) { + /* + * If an error occurs clear out vnodes. + */ + if (lowervp) + vrele(lowervp); + if (uppervp) + vrele(uppervp); + if (upperdvp) + vrele(upperdvp); + *vpp = NULL; + goto out; + } + + MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), + M_TEMP, M_WAITOK); + + (*vpp)->v_flag |= vflag; + if (uppervp) + (*vpp)->v_type = uppervp->v_type; + else + (*vpp)->v_type = lowervp->v_type; + + un = VTOUNION(*vpp); + bzero(un, sizeof(*un)); + + lockinit(&un->un_lock, PVFS, "unlock", 0, 0); + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); + + un->un_vnode = *vpp; + un->un_uppervp = uppervp; + un->un_uppersz = VNOVAL; + un->un_lowervp = lowervp; + un->un_lowersz = VNOVAL; + un->un_dirvp = upperdvp; + un->un_pvp = dvp; /* only parent dir in new allocation */ + if (dvp != NULLVP) + VREF(dvp); + un->un_dircache = 0; + un->un_openl = 0; + + if (cnp && (lowervp != NULLVP)) { + un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); + bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); + un->un_path[cnp->cn_namelen] = '\0'; + } else { + un->un_path = 0; + un->un_dirvp = NULL; + } + + if (docache) { + LIST_INSERT_HEAD(&unhead[hash], un, un_cache); + un->un_flags |= UN_CACHED; + } + +out: + if (xlowervp) + vrele(xlowervp); + + if (docache) + union_list_unlock(hash); + + return (error); +} + +int +union_freevp(vp) + struct vnode *vp; +{ + struct union_node *un = VTOUNION(vp); + + if (un->un_flags & UN_CACHED) { + un->un_flags &= ~UN_CACHED; + LIST_REMOVE(un, un_cache); + } + + if (un->un_pvp != NULLVP) { + vrele(un->un_pvp); + un->un_pvp = NULL; + } + if (un->un_uppervp != NULLVP) { + vrele(un->un_uppervp); + un->un_uppervp = NULL; + } + if (un->un_lowervp != NULLVP) { + vrele(un->un_lowervp); + un->un_lowervp = NULL; + } + if (un->un_dirvp != NULLVP) { + vrele(un->un_dirvp); + un->un_dirvp = NULL; + } + if (un->un_path) { + free(un->un_path, M_TEMP); + un->un_path = NULL; + } + + FREE(vp->v_data, M_TEMP); + vp->v_data = 0; + + return (0); +} + +/* + * copyfile. copy the vnode (fvp) to the vnode (tvp) + * using a sequence of reads and writes. both (fvp) + * and (tvp) are locked on entry and exit. + * + * fvp and tvp are both exclusive locked on call, but their refcount's + * haven't been bumped at all. + */ +static int +union_copyfile(fvp, tvp, cred, p) + struct vnode *fvp; + struct vnode *tvp; + struct ucred *cred; + struct proc *p; +{ + char *buf; + struct uio uio; + struct iovec iov; + int error = 0; + + /* + * strategy: + * allocate a buffer of size MAXBSIZE. + * loop doing reads and writes, keeping track + * of the current uio offset. + * give up at the first sign of trouble. + */ + + bzero(&uio, sizeof(uio)); + + uio.uio_procp = p; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_offset = 0; + + VOP_LEASE(fvp, p, cred, LEASE_READ); + VOP_LEASE(tvp, p, cred, LEASE_WRITE); + + buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); + + /* ugly loop follows... */ + do { + off_t offset = uio.uio_offset; + int count; + int bufoffset; + + /* + * Setup for big read + */ + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + iov.iov_base = buf; + iov.iov_len = MAXBSIZE; + uio.uio_resid = iov.iov_len; + uio.uio_rw = UIO_READ; + + if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0) + break; + + /* + * Get bytes read, handle read eof case and setup for + * write loop + */ + if ((count = MAXBSIZE - uio.uio_resid) == 0) + break; + bufoffset = 0; + + /* + * Write until an error occurs or our buffer has been + * exhausted, then update the offset for the next read. + */ + while (bufoffset < count) { + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + iov.iov_base = buf + bufoffset; + iov.iov_len = count - bufoffset; + uio.uio_offset = offset + bufoffset; + uio.uio_rw = UIO_WRITE; + uio.uio_resid = iov.iov_len; + + if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0) + break; + bufoffset += (count - bufoffset) - uio.uio_resid; + } + uio.uio_offset = offset + bufoffset; + } while (error == 0); + + free(buf, M_TEMP); + return (error); +} + +/* + * + * un's vnode is assumed to be locked on entry and remains locked on exit. + */ + +int +union_copyup(un, docopy, cred, p) + struct union_node *un; + int docopy; + struct ucred *cred; + struct proc *p; +{ + int error; + struct vnode *lvp, *uvp; + + /* + * If the user does not have read permission, the vnode should not + * be copied to upper layer. + */ + vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_ACCESS(un->un_lowervp, VREAD, cred, p); + VOP_UNLOCK(un->un_lowervp, 0, p); + if (error) + return (error); + + error = union_vn_create(&uvp, un, p); + if (error) + return (error); + + lvp = un->un_lowervp; + + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); + if (docopy) { + /* + * XX - should not ignore errors + * from VOP_CLOSE + */ + vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_OPEN(lvp, FREAD, cred, p); + if (error == 0 && vn_canvmio(lvp) == TRUE) + error = vfs_object_create(lvp, p, cred); + if (error == 0) { + error = union_copyfile(lvp, uvp, cred, p); + VOP_UNLOCK(lvp, 0, p); + (void) VOP_CLOSE(lvp, FREAD, cred, p); + } + if (error == 0) + UDEBUG(("union: copied up %s\n", un->un_path)); + + } + VOP_UNLOCK(uvp, 0, p); + union_newupper(un, uvp); + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); + union_vn_close(uvp, FWRITE, cred, p); + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); + /* + * Subsequent IOs will go to the top layer, so + * call close on the lower vnode and open on the + * upper vnode to ensure that the filesystem keeps + * its references counts right. This doesn't do + * the right thing with (cred) and (FREAD) though. + * Ignoring error returns is not right, either. + */ + if (error == 0) { + int i; + + for (i = 0; i < un->un_openl; i++) { + (void) VOP_CLOSE(lvp, FREAD, cred, p); + (void) VOP_OPEN(uvp, FREAD, cred, p); + } + if (un->un_openl) { + if (vn_canvmio(uvp) == TRUE) + error = vfs_object_create(uvp, p, cred); + } + un->un_openl = 0; + } + + return (error); + +} + +/* + * union_relookup: + * + * dvp should be locked on entry and will be locked on return. No + * net change in the ref count will occur. + * + * If an error is returned, *vpp will be invalid, otherwise it + * will hold a locked, referenced vnode. If *vpp == dvp then + * remember that only one exclusive lock is held. + */ + +static int +union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) + struct union_mount *um; + struct vnode *dvp; + struct vnode **vpp; + struct componentname *cnp; + struct componentname *cn; + char *path; + int pathlen; +{ + int error; + + /* + * A new componentname structure must be faked up because + * there is no way to know where the upper level cnp came + * from or what it is being used for. This must duplicate + * some of the work done by NDINIT, some of the work done + * by namei, some of the work done by lookup and some of + * the work done by VOP_LOOKUP when given a CREATE flag. + * Conclusion: Horrible. + */ + cn->cn_namelen = pathlen; + cn->cn_pnbuf = zalloc(namei_zone); + bcopy(path, cn->cn_pnbuf, cn->cn_namelen); + cn->cn_pnbuf[cn->cn_namelen] = '\0'; + + cn->cn_nameiop = CREATE; + cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); + cn->cn_proc = cnp->cn_proc; + if (um->um_op == UNMNT_ABOVE) + cn->cn_cred = cnp->cn_cred; + else + cn->cn_cred = um->um_cred; + cn->cn_nameptr = cn->cn_pnbuf; + cn->cn_consume = cnp->cn_consume; + + VREF(dvp); + VOP_UNLOCK(dvp, 0, cnp->cn_proc); + + /* + * Pass dvp unlocked and referenced on call to relookup(). + * + * If an error occurs, dvp will be returned unlocked and dereferenced. + */ + + if ((error = relookup(dvp, vpp, cn)) != 0) { + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_proc); + return(error); + } + + /* + * If no error occurs, dvp will be returned locked with the reference + * left as before, and vpp will be returned referenced and locked. + * + * We want to return with dvp as it was passed to us, so we get + * rid of our reference. + */ + vrele(dvp); + return (0); +} + +/* + * Create a shadow directory in the upper layer. + * The new vnode is returned locked. + * + * (um) points to the union mount structure for access to the + * the mounting process's credentials. + * (dvp) is the directory in which to create the shadow directory, + * it is locked (but not ref'd) on entry and return. + * (cnp) is the componentname to be created. + * (vpp) is the returned newly created shadow directory, which + * is returned locked and ref'd + */ +int +union_mkshadow(um, dvp, cnp, vpp) + struct union_mount *um; + struct vnode *dvp; + struct componentname *cnp; + struct vnode **vpp; +{ + int error; + struct vattr va; + struct proc *p = cnp->cn_proc; + struct componentname cn; + + error = union_relookup(um, dvp, vpp, cnp, &cn, + cnp->cn_nameptr, cnp->cn_namelen); + if (error) + return (error); + + if (*vpp) { + if (cn.cn_flags & HASBUF) { + zfree(namei_zone, cn.cn_pnbuf); + cn.cn_flags &= ~HASBUF; + } + if (dvp == *vpp) + vrele(*vpp); + else + vput(*vpp); + *vpp = NULLVP; + return (EEXIST); + } + + /* + * policy: when creating the shadow directory in the + * upper layer, create it owned by the user who did + * the mount, group from parent directory, and mode + * 777 modified by umask (ie mostly identical to the + * mkdir syscall). (jsp, kb) + */ + + VATTR_NULL(&va); + va.va_type = VDIR; + va.va_mode = um->um_cmode; + + /* VOP_LEASE: dvp is locked */ + VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); + + error = VOP_MKDIR(dvp, vpp, &cn, &va); + if (cn.cn_flags & HASBUF) { + zfree(namei_zone, cn.cn_pnbuf); + cn.cn_flags &= ~HASBUF; + } + /*vput(dvp);*/ + return (error); +} + +/* + * Create a whiteout entry in the upper layer. + * + * (um) points to the union mount structure for access to the + * the mounting process's credentials. + * (dvp) is the directory in which to create the whiteout. + * it is locked on entry and return. + * (cnp) is the componentname to be created. + */ +int +union_mkwhiteout(um, dvp, cnp, path) + struct union_mount *um; + struct vnode *dvp; + struct componentname *cnp; + char *path; +{ + int error; + struct proc *p = cnp->cn_proc; + struct vnode *wvp; + struct componentname cn; + + error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); + if (error) + return (error); + + if (wvp) { + if (cn.cn_flags & HASBUF) { + zfree(namei_zone, cn.cn_pnbuf); + cn.cn_flags &= ~HASBUF; + } + if (wvp == dvp) + vrele(wvp); + else + vput(wvp); + return (EEXIST); + } + + /* VOP_LEASE: dvp is locked */ + VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE); + + error = VOP_WHITEOUT(dvp, &cn, CREATE); + if (cn.cn_flags & HASBUF) { + zfree(namei_zone, cn.cn_pnbuf); + cn.cn_flags &= ~HASBUF; + } + return (error); +} + +/* + * union_vn_create: creates and opens a new shadow file + * on the upper union layer. this function is similar + * in spirit to calling vn_open but it avoids calling namei(). + * the problem with calling namei is that a) it locks too many + * things, and b) it doesn't start at the "right" directory, + * whereas relookup is told where to start. + * + * On entry, the vnode associated with un is locked. It remains locked + * on return. + * + * If no error occurs, *vpp contains a locked referenced vnode for your + * use. If an error occurs *vpp iis undefined. + */ +static int +union_vn_create(vpp, un, p) + struct vnode **vpp; + struct union_node *un; + struct proc *p; +{ + struct vnode *vp; + struct ucred *cred = p->p_ucred; + struct vattr vat; + struct vattr *vap = &vat; + int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); + int error; + int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask; + struct componentname cn; + + *vpp = NULLVP; + + /* + * Build a new componentname structure (for the same + * reasons outlines in union_mkshadow). + * The difference here is that the file is owned by + * the current user, rather than by the person who + * did the mount, since the current user needs to be + * able to write the file (that's why it is being + * copied in the first place). + */ + cn.cn_namelen = strlen(un->un_path); + cn.cn_pnbuf = zalloc(namei_zone); + bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); + cn.cn_nameiop = CREATE; + cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); + cn.cn_proc = p; + cn.cn_cred = p->p_ucred; + cn.cn_nameptr = cn.cn_pnbuf; + cn.cn_consume = 0; + + /* + * Pass dvp unlocked and referenced on call to relookup(). + * + * If an error occurs, dvp will be returned unlocked and dereferenced. + */ + VREF(un->un_dirvp); + error = relookup(un->un_dirvp, &vp, &cn); + if (error) + return (error); + + /* + * If no error occurs, dvp will be returned locked with the reference + * left as before, and vpp will be returned referenced and locked. + */ + if (vp) { + vput(un->un_dirvp); + if (cn.cn_flags & HASBUF) { + zfree(namei_zone, cn.cn_pnbuf); + cn.cn_flags &= ~HASBUF; + } + if (vp == un->un_dirvp) + vrele(vp); + else + vput(vp); + return (EEXIST); + } + + /* + * Good - there was no race to create the file + * so go ahead and create it. The permissions + * on the file will be 0666 modified by the + * current user's umask. Access to the file, while + * it is unioned, will require access to the top *and* + * bottom files. Access when not unioned will simply + * require access to the top-level file. + * TODO: confirm choice of access permissions. + */ + VATTR_NULL(vap); + vap->va_type = VREG; + vap->va_mode = cmode; + VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE); + error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); + if (cn.cn_flags & HASBUF) { + zfree(namei_zone, cn.cn_pnbuf); + cn.cn_flags &= ~HASBUF; + } + vput(un->un_dirvp); + if (error) + return (error); + + error = VOP_OPEN(vp, fmode, cred, p); + if (error == 0 && vn_canvmio(vp) == TRUE) + error = vfs_object_create(vp, p, cred); + if (error) { + vput(vp); + return (error); + } + vp->v_writecount++; + *vpp = vp; + return (0); +} + +static int +union_vn_close(vp, fmode, cred, p) + struct vnode *vp; + int fmode; + struct ucred *cred; + struct proc *p; +{ + + if (fmode & FWRITE) + --vp->v_writecount; + return (VOP_CLOSE(vp, fmode, cred, p)); +} + +#if 0 + +/* + * union_removed_upper: + * + * called with union_node unlocked. XXX + */ + +void +union_removed_upper(un) + struct union_node *un; +{ + struct proc *p = curproc; /* XXX */ + struct vnode **vpp; + + /* + * Do not set the uppervp to NULLVP. If lowervp is NULLVP, + * union node will have neither uppervp nor lowervp. We remove + * the union node from cache, so that it will not be referrenced. + */ + union_newupper(un, NULLVP); + if (un->un_dircache != 0) { + for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) + vrele(*vpp); + free(un->un_dircache, M_TEMP); + un->un_dircache = 0; + } + + if (un->un_flags & UN_CACHED) { + un->un_flags &= ~UN_CACHED; + LIST_REMOVE(un, un_cache); + } +} + +#endif + +/* + * determine whether a whiteout is needed + * during a remove/rmdir operation. + */ +int +union_dowhiteout(un, cred, p) + struct union_node *un; + struct ucred *cred; + struct proc *p; +{ + struct vattr va; + + if (un->un_lowervp != NULLVP) + return (1); + + if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 && + (va.va_flags & OPAQUE)) + return (1); + + return (0); +} + +static void +union_dircache_r(vp, vppp, cntp) + struct vnode *vp; + struct vnode ***vppp; + int *cntp; +{ + struct union_node *un; + + if (vp->v_op != union_vnodeop_p) { + if (vppp) { + VREF(vp); + *(*vppp)++ = vp; + if (--(*cntp) == 0) + panic("union: dircache table too small"); + } else { + (*cntp)++; + } + + return; + } + + un = VTOUNION(vp); + if (un->un_uppervp != NULLVP) + union_dircache_r(un->un_uppervp, vppp, cntp); + if (un->un_lowervp != NULLVP) + union_dircache_r(un->un_lowervp, vppp, cntp); +} + +struct vnode * +union_dircache(vp, p) + struct vnode *vp; + struct proc *p; +{ + int cnt; + struct vnode *nvp; + struct vnode **vpp; + struct vnode **dircache; + struct union_node *un; + int error; + + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + dircache = VTOUNION(vp)->un_dircache; + + nvp = NULLVP; + + if (dircache == NULL) { + cnt = 0; + union_dircache_r(vp, 0, &cnt); + cnt++; + dircache = malloc(cnt * sizeof(struct vnode *), + M_TEMP, M_WAITOK); + vpp = dircache; + union_dircache_r(vp, &vpp, &cnt); + *vpp = NULLVP; + vpp = dircache + 1; + } else { + vpp = dircache; + do { + if (*vpp++ == VTOUNION(vp)->un_uppervp) + break; + } while (*vpp != NULLVP); + } + + if (*vpp == NULLVP) + goto out; + + /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);*/ + UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? (*vpp)->v_usecount : -99))); + VREF(*vpp); + error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0); + UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? (*vpp)->v_usecount : -99))); + if (error) + goto out; + + VTOUNION(vp)->un_dircache = 0; + un = VTOUNION(nvp); + un->un_dircache = dircache; + +out: + VOP_UNLOCK(vp, 0, p); + return (nvp); +} + +/* + * Guarentee coherency with the VM cache by invalidating any clean VM pages + * associated with this write and updating any dirty VM pages. Since our + * vnode is locked, other processes will not be able to read the pages in + * again until after our write completes. + * + * We also have to be coherent with reads, by flushing any pending dirty + * pages prior to issuing the read. + * + * XXX this is somewhat of a hack at the moment. To support this properly + * we would have to be able to run VOP_READ and VOP_WRITE through the VM + * cache. Then we wouldn't need to worry about coherency. + */ + +void +union_vm_coherency(struct vnode *vp, struct uio *uio, int cleanfls) +{ + vm_object_t object; + vm_pindex_t pstart; + vm_pindex_t pend; + int pgoff; + + if ((object = vp->v_object) == NULL) + return; + + pgoff = uio->uio_offset & PAGE_MASK; + pstart = uio->uio_offset / PAGE_SIZE; + pend = pstart + (uio->uio_resid + pgoff + PAGE_MASK) / PAGE_SIZE; + + vm_object_page_clean(object, pstart, pend, OBJPC_SYNC); + if (cleanfls) + vm_object_page_remove(object, pstart, pend, TRUE); +} + +/* + * Module glue to remove #ifdef UNION from vfs_syscalls.c + */ +static int +union_dircheck(struct proc *p, struct vnode **vp, struct file *fp) +{ + int error = 0; + + if ((*vp)->v_op == union_vnodeop_p) { + struct vnode *lvp; + + lvp = union_dircache(*vp, p); + if (lvp != NULLVP) { + struct vattr va; + + /* + * If the directory is opaque, + * then don't show lower entries + */ + error = VOP_GETATTR(*vp, &va, fp->f_cred, p); + if (va.va_flags & OPAQUE) { + vput(lvp); + lvp = NULL; + } + } + + if (lvp != NULLVP) { + error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); + if (error == 0 && vn_canvmio(lvp) == TRUE) + error = vfs_object_create(lvp, p, fp->f_cred); + if (error) { + vput(lvp); + return (error); + } + VOP_UNLOCK(lvp, 0, p); + fp->f_data = (caddr_t) lvp; + fp->f_offset = 0; + error = vn_close(*vp, FREAD, fp->f_cred, p); + if (error) + return (error); + *vp = lvp; + return -1; /* goto unionread */ + } + } + return error; +} + +static int +union_modevent(module_t mod, int type, void *data) +{ + switch (type) { + case MOD_LOAD: + union_dircheckp = union_dircheck; + break; + case MOD_UNLOAD: + union_dircheckp = NULL; + break; + default: + break; + } + return 0; +} + +static moduledata_t union_mod = { + "union_dircheck", + union_modevent, + NULL +}; + +DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c new file mode 100644 index 0000000..8b58e55 --- /dev/null +++ b/sys/fs/unionfs/union_vfsops.c @@ -0,0 +1,509 @@ +/* + * Copyright (c) 1994, 1995 The Regents of the University of California. + * Copyright (c) 1994, 1995 Jan-Simon Pendry. + * All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)union_vfsops.c 8.20 (Berkeley) 5/20/95 + * $FreeBSD$ + */ + +/* + * Union Layer + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <sys/malloc.h> +#include <sys/filedesc.h> +#include <miscfs/union/union.h> +#include <vm/vm_zone.h> + +static MALLOC_DEFINE(M_UNIONFSMNT, "UNION mount", "UNION mount structure"); + +extern int union_init __P((struct vfsconf *)); +static int union_mount __P((struct mount *mp, char *path, caddr_t data, + struct nameidata *ndp, struct proc *p)); +static int union_root __P((struct mount *mp, struct vnode **vpp)); +static int union_statfs __P((struct mount *mp, struct statfs *sbp, + struct proc *p)); +static int union_unmount __P((struct mount *mp, int mntflags, + struct proc *p)); + +/* + * Mount union filesystem + */ +static int +union_mount(mp, path, data, ndp, p) + struct mount *mp; + char *path; + caddr_t data; + struct nameidata *ndp; + struct proc *p; +{ + int error = 0; + struct union_args args; + struct vnode *lowerrootvp = NULLVP; + struct vnode *upperrootvp = NULLVP; + struct union_mount *um = 0; + struct ucred *cred = 0; + char *cp = 0; + int len; + u_int size; + + UDEBUG(("union_mount(mp = %p)\n", (void *)mp)); + + /* + * Disable clustered write, otherwise system becomes unstable. + */ + mp->mnt_flag |= MNT_NOCLUSTERW; + + /* + * Update is a no-op + */ + if (mp->mnt_flag & MNT_UPDATE) { + /* + * Need to provide. + * 1. a way to convert between rdonly and rdwr mounts. + * 2. support for nfs exports. + */ + error = EOPNOTSUPP; + goto bad; + } + + /* + * Get argument + */ + error = copyin(data, (caddr_t)&args, sizeof(struct union_args)); + if (error) + goto bad; + + /* + * Obtain lower vnode. Vnode is stored in mp->mnt_vnodecovered. + * We need to reference it but not lock it. + */ + + lowerrootvp = mp->mnt_vnodecovered; + VREF(lowerrootvp); + +#if 0 + /* + * Unlock lower node to avoid deadlock. + */ + if (lowerrootvp->v_op == union_vnodeop_p) + VOP_UNLOCK(lowerrootvp, 0, p); +#endif + + /* + * Obtain upper vnode by calling namei() on the path. The + * upperrootvp will be turned referenced but not locked. + */ + NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT, + UIO_USERSPACE, args.target, p); + + error = namei(ndp); + +#if 0 + if (lowerrootvp->v_op == union_vnodeop_p) + vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, p); +#endif + if (error) + goto bad; + + NDFREE(ndp, NDF_ONLY_PNBUF); + upperrootvp = ndp->ni_vp; + vrele(ndp->ni_dvp); + ndp->ni_dvp = NULL; + + UDEBUG(("mount_root UPPERVP %p locked = %d\n", upperrootvp, + VOP_ISLOCKED(upperrootvp, NULL))); + + /* + * Check multi union mount to avoid `lock myself again' panic. + * Also require that it be a directory. + */ + if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) { +#ifdef DIAGNOSTIC + printf("union_mount: multi union mount?\n"); +#endif + error = EDEADLK; + goto bad; + } + + if (upperrootvp->v_type != VDIR) { + error = EINVAL; + goto bad; + } + + /* + * Allocate our union_mount structure and populate the fields. + * The vnode references are stored in the union_mount as held, + * unlocked references. Depending on the _BELOW flag, the + * filesystems are viewed in a different order. In effect this + * is the same as providing a mount-under option to the mount + * syscall. + */ + + um = (struct union_mount *) malloc(sizeof(struct union_mount), + M_UNIONFSMNT, M_WAITOK); + + bzero(um, sizeof(struct union_mount)); + + um->um_op = args.mntflags & UNMNT_OPMASK; + + switch (um->um_op) { + case UNMNT_ABOVE: + um->um_lowervp = lowerrootvp; + um->um_uppervp = upperrootvp; + upperrootvp = NULL; + lowerrootvp = NULL; + break; + + case UNMNT_BELOW: + um->um_lowervp = upperrootvp; + um->um_uppervp = lowerrootvp; + upperrootvp = NULL; + lowerrootvp = NULL; + break; + + case UNMNT_REPLACE: + vrele(lowerrootvp); + lowerrootvp = NULL; + um->um_uppervp = upperrootvp; + um->um_lowervp = lowerrootvp; + upperrootvp = NULL; + break; + + default: + error = EINVAL; + goto bad; + } + + /* + * Unless the mount is readonly, ensure that the top layer + * supports whiteout operations + */ + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + error = VOP_WHITEOUT(um->um_uppervp, NULL, LOOKUP); + if (error) + goto bad; + } + + um->um_cred = p->p_ucred; + crhold(um->um_cred); + um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask; + + /* + * Depending on what you think the MNT_LOCAL flag might mean, + * you may want the && to be || on the conditional below. + * At the moment it has been defined that the filesystem is + * only local if it is all local, ie the MNT_LOCAL flag implies + * that the entire namespace is local. If you think the MNT_LOCAL + * flag implies that some of the files might be stored locally + * then you will want to change the conditional. + */ + if (um->um_op == UNMNT_ABOVE) { + if (((um->um_lowervp == NULLVP) || + (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) && + (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL)) + mp->mnt_flag |= MNT_LOCAL; + } + + /* + * Copy in the upper layer's RDONLY flag. This is for the benefit + * of lookup() which explicitly checks the flag, rather than asking + * the filesystem for its own opinion. This means, that an update + * mount of the underlying filesystem to go from rdonly to rdwr + * will leave the unioned view as read-only. + */ + mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY); + + mp->mnt_data = (qaddr_t) um; + vfs_getnewfsid(mp); + + (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); + bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); + + switch (um->um_op) { + case UNMNT_ABOVE: + cp = "<above>:"; + break; + case UNMNT_BELOW: + cp = "<below>:"; + break; + case UNMNT_REPLACE: + cp = ""; + break; + } + len = strlen(cp); + bcopy(cp, mp->mnt_stat.f_mntfromname, len); + + cp = mp->mnt_stat.f_mntfromname + len; + len = MNAMELEN - len; + + (void) copyinstr(args.target, cp, len - 1, &size); + bzero(cp + size, len - size); + + (void)union_statfs(mp, &mp->mnt_stat, p); + + UDEBUG(("union_mount: from %s, on %s\n", + mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname)); + return (0); + +bad: + if (um) { + if (um->um_uppervp) + vrele(um->um_uppervp); + if (um->um_lowervp) + vrele(um->um_lowervp); + /* XXX other fields */ + free(um, M_UNIONFSMNT); + } + if (cred) + crfree(cred); + if (upperrootvp) + vrele(upperrootvp); + if (lowerrootvp) + vrele(lowerrootvp); + return (error); +} + +/* + * Free reference to union layer + */ +static int +union_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + struct union_mount *um = MOUNTTOUNIONMOUNT(mp); + struct vnode *um_rootvp; + int error; + int freeing; + int flags = 0; + + UDEBUG(("union_unmount(mp = %p)\n", (void *)mp)); + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + if ((error = union_root(mp, &um_rootvp)) != 0) + return (error); + + /* + * Keep flushing vnodes from the mount list. + * This is needed because of the un_pvp held + * reference to the parent vnode. + * If more vnodes have been freed on a given pass, + * the try again. The loop will iterate at most + * (d) times, where (d) is the maximum tree depth + * in the filesystem. + */ + for (freeing = 0; vflush(mp, um_rootvp, flags) != 0;) { + struct vnode *vp; + int n; + + /* count #vnodes held on mount list */ + for (n = 0, vp = mp->mnt_vnodelist.lh_first; + vp != NULLVP; + vp = vp->v_mntvnodes.le_next) + n++; + + /* if this is unchanged then stop */ + if (n == freeing) + break; + + /* otherwise try once more time */ + freeing = n; + } + + /* At this point the root vnode should have a single reference */ + if (um_rootvp->v_usecount > 1) { + vput(um_rootvp); + return (EBUSY); + } + +#ifdef DEBUG + vprint("union root", um_rootvp); +#endif + /* + * Discard references to upper and lower target vnodes. + */ + if (um->um_lowervp) + vrele(um->um_lowervp); + vrele(um->um_uppervp); + crfree(um->um_cred); + /* + * Release reference on underlying root vnode + */ + vput(um_rootvp); + /* + * And blow it away for future re-use + */ + vgone(um_rootvp); + /* + * Finally, throw away the union_mount structure + */ + free(mp->mnt_data, M_UNIONFSMNT); /* XXX */ + mp->mnt_data = 0; + return (0); +} + +static int +union_root(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + struct union_mount *um = MOUNTTOUNIONMOUNT(mp); + int error; + + /* + * Supply an unlocked reference to um_uppervp and to um_lowervp. It + * is possible for um_uppervp to be locked without the associated + * root union_node being locked. We let union_allocvp() deal with + * it. + */ + UDEBUG(("union_root UPPERVP %p locked = %d\n", um->um_uppervp, + VOP_ISLOCKED(um->um_uppervp, NULL))); + + VREF(um->um_uppervp); + if (um->um_lowervp) + VREF(um->um_lowervp); + + error = union_allocvp(vpp, mp, NULLVP, NULLVP, NULL, + um->um_uppervp, um->um_lowervp, 1); + UDEBUG(("error %d\n", error)); + UDEBUG(("union_root2 UPPERVP %p locked = %d\n", um->um_uppervp, + VOP_ISLOCKED(um->um_uppervp, NULL))); + + return (error); +} + +static int +union_statfs(mp, sbp, p) + struct mount *mp; + struct statfs *sbp; + struct proc *p; +{ + int error; + struct union_mount *um = MOUNTTOUNIONMOUNT(mp); + struct statfs mstat; + int lbsize; + + UDEBUG(("union_statfs(mp = %p, lvp = %p, uvp = %p)\n", + (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp)); + + bzero(&mstat, sizeof(mstat)); + + if (um->um_lowervp) { + error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p); + if (error) + return (error); + } + + /* now copy across the "interesting" information and fake the rest */ +#if 0 + sbp->f_type = mstat.f_type; + sbp->f_flags = mstat.f_flags; + sbp->f_bsize = mstat.f_bsize; + sbp->f_iosize = mstat.f_iosize; +#endif + lbsize = mstat.f_bsize; + sbp->f_blocks = mstat.f_blocks; + sbp->f_bfree = mstat.f_bfree; + sbp->f_bavail = mstat.f_bavail; + sbp->f_files = mstat.f_files; + sbp->f_ffree = mstat.f_ffree; + + error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p); + if (error) + return (error); + + sbp->f_flags = mstat.f_flags; + sbp->f_bsize = mstat.f_bsize; + sbp->f_iosize = mstat.f_iosize; + + /* + * if the lower and upper blocksizes differ, then frig the + * block counts so that the sizes reported by df make some + * kind of sense. none of this makes sense though. + */ + + if (mstat.f_bsize != lbsize) + sbp->f_blocks = ((off_t) sbp->f_blocks * lbsize) / mstat.f_bsize; + + /* + * The "total" fields count total resources in all layers, + * the "free" fields count only those resources which are + * free in the upper layer (since only the upper layer + * is writeable). + */ + sbp->f_blocks += mstat.f_blocks; + sbp->f_bfree = mstat.f_bfree; + sbp->f_bavail = mstat.f_bavail; + sbp->f_files += mstat.f_files; + sbp->f_ffree = mstat.f_ffree; + + if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; + bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); + bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } + return (0); +} + +static struct vfsops union_vfsops = { + union_mount, + vfs_stdstart, /* underlying start already done */ + union_unmount, + union_root, + vfs_stdquotactl, + union_statfs, + vfs_stdsync, /* XXX assumes no cached data on union level */ + vfs_stdvget, + vfs_stdfhtovp, + vfs_stdcheckexp, + vfs_stdvptofh, + union_init, + vfs_stduninit, + vfs_stdextattrctl, +}; + +VFS_SET(union_vfsops, union, VFCF_LOOPBACK); diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c new file mode 100644 index 0000000..03d41a0 --- /dev/null +++ b/sys/fs/unionfs/union_vnops.c @@ -0,0 +1,1981 @@ +/* + * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry. + * Copyright (c) 1992, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)union_vnops.c 8.32 (Berkeley) 6/23/95 + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/fcntl.h> +#include <sys/stat.h> +#include <sys/kernel.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <sys/malloc.h> +#include <sys/buf.h> +#include <sys/lock.h> +#include <sys/sysctl.h> +#include <miscfs/union/union.h> + +#include <vm/vm.h> +#include <vm/vnode_pager.h> + +#include <vm/vm_page.h> +#include <vm/vm_object.h> + +int uniondebug = 0; + +#if UDEBUG_ENABLED +SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, ""); +#else +SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, ""); +#endif + +static int union_access __P((struct vop_access_args *ap)); +static int union_advlock __P((struct vop_advlock_args *ap)); +static int union_bmap __P((struct vop_bmap_args *ap)); +static int union_close __P((struct vop_close_args *ap)); +static int union_create __P((struct vop_create_args *ap)); +static int union_fsync __P((struct vop_fsync_args *ap)); +static int union_getattr __P((struct vop_getattr_args *ap)); +static int union_inactive __P((struct vop_inactive_args *ap)); +static int union_ioctl __P((struct vop_ioctl_args *ap)); +static int union_lease __P((struct vop_lease_args *ap)); +static int union_link __P((struct vop_link_args *ap)); +static int union_lock __P((struct vop_lock_args *ap)); +static int union_lookup __P((struct vop_lookup_args *ap)); +static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvp, + struct vnode **vpp, + struct componentname *cnp)); +static int union_mkdir __P((struct vop_mkdir_args *ap)); +static int union_mknod __P((struct vop_mknod_args *ap)); +static int union_mmap __P((struct vop_mmap_args *ap)); +static int union_open __P((struct vop_open_args *ap)); +static int union_pathconf __P((struct vop_pathconf_args *ap)); +static int union_print __P((struct vop_print_args *ap)); +static int union_read __P((struct vop_read_args *ap)); +static int union_readdir __P((struct vop_readdir_args *ap)); +static int union_readlink __P((struct vop_readlink_args *ap)); +static int union_reclaim __P((struct vop_reclaim_args *ap)); +static int union_remove __P((struct vop_remove_args *ap)); +static int union_rename __P((struct vop_rename_args *ap)); +static int union_revoke __P((struct vop_revoke_args *ap)); +static int union_rmdir __P((struct vop_rmdir_args *ap)); +static int union_poll __P((struct vop_poll_args *ap)); +static int union_setattr __P((struct vop_setattr_args *ap)); +static int union_strategy __P((struct vop_strategy_args *ap)); +static int union_getpages __P((struct vop_getpages_args *ap)); +static int union_putpages __P((struct vop_putpages_args *ap)); +static int union_symlink __P((struct vop_symlink_args *ap)); +static int union_unlock __P((struct vop_unlock_args *ap)); +static int union_whiteout __P((struct vop_whiteout_args *ap)); +static int union_write __P((struct vop_read_args *ap)); + +static __inline +struct vnode * +union_lock_upper(struct union_node *un, struct proc *p) +{ + struct vnode *uppervp; + + if ((uppervp = un->un_uppervp) != NULL) { + VREF(uppervp); + vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p); + } + KASSERT((uppervp == NULL || uppervp->v_usecount > 0), ("uppervp usecount is 0")); + return(uppervp); +} + +static __inline +void +union_unlock_upper(struct vnode *uppervp, struct proc *p) +{ + vput(uppervp); +} + +static __inline +struct vnode * +union_lock_other(struct union_node *un, struct proc *p) +{ + struct vnode *vp; + + if (un->un_uppervp != NULL) { + vp = union_lock_upper(un, p); + } else if ((vp = un->un_lowervp) != NULL) { + VREF(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p); + } + return(vp); +} + +static __inline +void +union_unlock_other(struct vnode *vp, struct proc *p) +{ + vput(vp); +} + +/* + * union_lookup: + * + * udvp must be exclusively locked on call and will remain + * exclusively locked on return. This is the mount point + * for out filesystem. + * + * dvp Our base directory, locked and referenced. + * The passed dvp will be dereferenced and unlocked on return + * and a new dvp will be returned which is locked and + * referenced in the same variable. + * + * vpp is filled in with the result if no error occured, + * locked and ref'd. + * + * If an error is returned, *vpp is set to NULLVP. If no + * error occurs, *vpp is returned with a reference and an + * exclusive lock. + */ + +static int +union_lookup1(udvp, pdvp, vpp, cnp) + struct vnode *udvp; + struct vnode **pdvp; + struct vnode **vpp; + struct componentname *cnp; +{ + int error; + struct proc *p = cnp->cn_proc; + struct vnode *dvp = *pdvp; + struct vnode *tdvp; + struct mount *mp; + + /* + * If stepping up the directory tree, check for going + * back across the mount point, in which case do what + * lookup would do by stepping back down the mount + * hierarchy. + */ + if (cnp->cn_flags & ISDOTDOT) { + while ((dvp != udvp) && (dvp->v_flag & VROOT)) { + /* + * Don't do the NOCROSSMOUNT check + * at this level. By definition, + * union fs deals with namespaces, not + * filesystems. + */ + tdvp = dvp; + dvp = dvp->v_mount->mnt_vnodecovered; + VREF(dvp); + vput(tdvp); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + } + } + + /* + * Set return dvp to be the upperdvp 'parent directory. + */ + *pdvp = dvp; + + /* + * If the VOP_LOOKUP call generates an error, tdvp is invalid and no + * changes will have been made to dvp, so we are set to return. + */ + + error = VOP_LOOKUP(dvp, &tdvp, cnp); + if (error) { + UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags)); + *vpp = NULL; + return (error); + } + + /* + * The parent directory will have been unlocked, unless lookup + * found the last component or if dvp == tdvp (tdvp must be locked). + * + * We want our dvp to remain locked and ref'd. We also want tdvp + * to remain locked and ref'd. + */ + UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags)); + + if (dvp != tdvp && (cnp->cn_flags & ISLASTCN) == 0) + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + + /* + * Lastly check if the current node is a mount point in + * which case walk up the mount hierarchy making sure not to + * bump into the root of the mount tree (ie. dvp != udvp). + * + * We use dvp as a temporary variable here, it is no longer related + * to the dvp above. However, we have to ensure that both *pdvp and + * tdvp are locked on return. + */ + + dvp = tdvp; + while ( + dvp != udvp && + (dvp->v_type == VDIR) && + (mp = dvp->v_mountedhere) + ) { + int relock_pdvp = 0; + + if (vfs_busy(mp, 0, 0, p)) + continue; + + if (dvp == *pdvp) + relock_pdvp = 1; + vput(dvp); + dvp = NULL; + error = VFS_ROOT(mp, &dvp); + + vfs_unbusy(mp, p); + + if (relock_pdvp) + vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, p); + + if (error) { + *vpp = NULL; + return (error); + } + } + *vpp = dvp; + return (0); +} + +static int +union_lookup(ap) + struct vop_lookup_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap; +{ + int error; + int uerror, lerror; + struct vnode *uppervp, *lowervp; + struct vnode *upperdvp, *lowerdvp; + struct vnode *dvp = ap->a_dvp; /* starting dir */ + struct union_node *dun = VTOUNION(dvp); /* associated union node */ + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + int lockparent = cnp->cn_flags & LOCKPARENT; + struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount); + struct ucred *saved_cred = NULL; + int iswhiteout; + struct vattr va; + + *ap->a_vpp = NULLVP; + + /* + * Disallow write attemps to the filesystem mounted read-only. + */ + if ((cnp->cn_flags & ISLASTCN) && + (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { + return (EROFS); + } + + /* + * For any lookup's we do, always return with the parent locked + */ + cnp->cn_flags |= LOCKPARENT; + + lowerdvp = dun->un_lowervp; + uppervp = NULLVP; + lowervp = NULLVP; + iswhiteout = 0; + + uerror = ENOENT; + lerror = ENOENT; + + /* + * Get a private lock on uppervp and a reference, effectively + * taking it out of the union_node's control. + * + * We must lock upperdvp while holding our lock on dvp + * to avoid a deadlock. + */ + upperdvp = union_lock_upper(dun, p); + + /* + * do the lookup in the upper level. + * if that level comsumes additional pathnames, + * then assume that something special is going + * on and just return that vnode. + */ + if (upperdvp != NULLVP) { + /* + * We do not have to worry about the DOTDOT case, we've + * already unlocked dvp. + */ + UDEBUG(("A %p\n", upperdvp)); + + /* + * Do the lookup. We must supply a locked and referenced + * upperdvp to the function and will get a new locked and + * referenced upperdvp back with the old having been + * dereferenced. + * + * If an error is returned, uppervp will be NULLVP. If no + * error occurs, uppervp will be the locked and referenced + * return vnode or possibly NULL, depending on what is being + * requested. It is possible that the returned uppervp + * will be the same as upperdvp. + */ + uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp); + UDEBUG(( + "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n", + uerror, + upperdvp, + upperdvp->v_usecount, + VOP_ISLOCKED(upperdvp, NULL), + uppervp, + (uppervp ? uppervp->v_usecount : -99), + (uppervp ? VOP_ISLOCKED(uppervp, NULL) : -99) + )); + + /* + * Disallow write attemps to the filesystem mounted read-only. + */ + if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) && + (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { + error = EROFS; + goto out; + } + + /* + * Special case. If cn_consume != 0 skip out. The result + * of the lookup is transfered to our return variable. If + * an error occured we have to throw away the results. + */ + + if (cnp->cn_consume != 0) { + if ((error = uerror) == 0) { + *ap->a_vpp = uppervp; + uppervp = NULL; + } + goto out; + } + + /* + * Calculate whiteout, fall through + */ + + if (uerror == ENOENT || uerror == EJUSTRETURN) { + if (cnp->cn_flags & ISWHITEOUT) { + iswhiteout = 1; + } else if (lowerdvp != NULLVP) { + int terror; + + terror = VOP_GETATTR(upperdvp, &va, + cnp->cn_cred, cnp->cn_proc); + if (terror == 0 && (va.va_flags & OPAQUE)) + iswhiteout = 1; + } + } + } + + /* + * in a similar way to the upper layer, do the lookup + * in the lower layer. this time, if there is some + * component magic going on, then vput whatever we got + * back from the upper layer and return the lower vnode + * instead. + */ + + if (lowerdvp != NULLVP && !iswhiteout) { + int nameiop; + + UDEBUG(("B %p\n", lowerdvp)); + + /* + * Force only LOOKUPs on the lower node, since + * we won't be making changes to it anyway. + */ + nameiop = cnp->cn_nameiop; + cnp->cn_nameiop = LOOKUP; + if (um->um_op == UNMNT_BELOW) { + saved_cred = cnp->cn_cred; + cnp->cn_cred = um->um_cred; + } + + /* + * We shouldn't have to worry about locking interactions + * between the lower layer and our union layer (w.r.t. + * `..' processing) because we don't futz with lowervp + * locks in the union-node instantiation code path. + * + * union_lookup1() requires lowervp to be locked on entry, + * and it will be unlocked on return. The ref count will + * not change. On return lowervp doesn't represent anything + * to us so we NULL it out. + */ + VREF(lowerdvp); + vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); + lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp); + if (lowerdvp == lowervp) + vrele(lowerdvp); + else + vput(lowerdvp); + lowerdvp = NULL; /* lowerdvp invalid after vput */ + + if (um->um_op == UNMNT_BELOW) + cnp->cn_cred = saved_cred; + cnp->cn_nameiop = nameiop; + + if (cnp->cn_consume != 0 || lerror == EACCES) { + if ((error = lerror) == 0) { + *ap->a_vpp = lowervp; + lowervp = NULL; + } + goto out; + } + } else { + UDEBUG(("C %p\n", lowerdvp)); + if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) { + if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) { + VREF(lowervp); + vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p); + lerror = 0; + } + } + } + + /* + * Ok. Now we have uerror, uppervp, upperdvp, lerror, and lowervp. + * + * 1. If both layers returned an error, select the upper layer. + * + * 2. If the upper layer faile and the bottom layer succeeded, + * two subcases occur: + * + * a. The bottom vnode is not a directory, in which case + * just return a new union vnode referencing an + * empty top layer and the existing bottom layer. + * + * b. The button vnode is a directory, in which case + * create a new directory in the top layer and + * and fall through to case 3. + * + * 3. If the top layer succeeded then return a new union + * vnode referencing whatever the new top layer and + * whatever the bottom layer returned. + */ + + /* case 1. */ + if ((uerror != 0) && (lerror != 0)) { + error = uerror; + goto out; + } + + /* case 2. */ + if (uerror != 0 /* && (lerror == 0) */ ) { + if (lowervp->v_type == VDIR) { /* case 2b. */ + KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL")); + /* + * oops, uppervp has a problem, we may have to shadow. + */ + uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); + if (uerror) { + error = uerror; + goto out; + } + } + } + + /* + * Must call union_allocvp with both the upper and lower vnodes + * referenced and the upper vnode locked. ap->a_vpp is returned + * referenced and locked. lowervp, uppervp, and upperdvp are + * absorbed by union_allocvp() whether it succeeds or fails. + * + * upperdvp is the parent directory of uppervp which may be + * different, depending on the path, from dvp->un_uppervp. That's + * why it is a separate argument. Note that it must be unlocked. + * + * dvp must be locked on entry to the call and will be locked on + * return. + */ + + if (uppervp && uppervp != upperdvp) + VOP_UNLOCK(uppervp, 0, p); + if (lowervp) + VOP_UNLOCK(lowervp, 0, p); + if (upperdvp) + VOP_UNLOCK(upperdvp, 0, p); + + error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, + uppervp, lowervp, 1); + + UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? ((*ap->a_vpp)->v_usecount) : -99)); + + uppervp = NULL; + upperdvp = NULL; + lowervp = NULL; + + /* + * Termination Code + * + * - put away any extra junk laying around. Note that lowervp + * (if not NULL) will never be the same as *ap->a_vp and + * neither will uppervp, because when we set that state we + * NULL-out lowervp or uppervp. On the otherhand, upperdvp + * may match uppervp or *ap->a_vpp. + * + * - relock/unlock dvp if appropriate. + */ + +out: + if (upperdvp) { + if (upperdvp == uppervp || upperdvp == *ap->a_vpp) + vrele(upperdvp); + else + vput(upperdvp); + } + + if (uppervp) + vput(uppervp); + + if (lowervp) + vput(lowervp); + + /* + * Restore LOCKPARENT state + */ + + if (!lockparent) + cnp->cn_flags &= ~LOCKPARENT; + + UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp, + ((*ap->a_vpp) ? (*ap->a_vpp)->v_usecount : -99), + lowervp, uppervp)); + + /* + * dvp lock state, determine whether to relock dvp. dvp is expected + * to be locked on return if: + * + * - there was an error (except not EJUSTRETURN), or + * - we hit the last component and lockparent is true + * + * dvp_is_locked is the current state of the dvp lock, not counting + * the possibility that *ap->a_vpp == dvp (in which case it is locked + * anyway). Note that *ap->a_vpp == dvp only if no error occured. + */ + + if (*ap->a_vpp != dvp) { + if ((error == 0 || error == EJUSTRETURN) && + (!lockparent || (cnp->cn_flags & ISLASTCN) == 0)) { + VOP_UNLOCK(dvp, 0, p); + } + } + + /* + * Diagnostics + */ + +#ifdef DIAGNOSTIC + if (cnp->cn_namelen == 1 && + cnp->cn_nameptr[0] == '.' && + *ap->a_vpp != dvp) { + panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp); + } +#endif + + return (error); +} + +/* + * union_create: + * + * a_dvp is locked on entry and remains locked on return. a_vpp is returned + * locked if no error occurs, otherwise it is garbage. + */ + +static int +union_create(ap) + struct vop_create_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + struct union_node *dun = VTOUNION(ap->a_dvp); + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; + + if ((dvp = union_lock_upper(dun, p)) != NULL) { + struct vnode *vp; + struct mount *mp; + + error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); + if (error == 0) { + mp = ap->a_dvp->v_mount; + VOP_UNLOCK(vp, 0, p); + UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vp->v_usecount)); + error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, + cnp, vp, NULLVP, 1); + UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount)); + } + union_unlock_upper(dvp, p); + } + return (error); +} + +static int +union_whiteout(ap) + struct vop_whiteout_args /* { + struct vnode *a_dvp; + struct componentname *a_cnp; + int a_flags; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_dvp); + struct componentname *cnp = ap->a_cnp; + struct vnode *uppervp; + int error = EOPNOTSUPP; + + if ((uppervp = union_lock_upper(un, cnp->cn_proc)) != NULLVP) { + error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags); + union_unlock_upper(uppervp, cnp->cn_proc); + } + return(error); +} + +/* + * union_mknod: + * + * a_dvp is locked on entry and should remain locked on return. + * a_vpp is garbagre whether an error occurs or not. + */ + +static int +union_mknod(ap) + struct vop_mknod_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + struct union_node *dun = VTOUNION(ap->a_dvp); + struct componentname *cnp = ap->a_cnp; + struct vnode *dvp; + int error = EROFS; + + if ((dvp = union_lock_upper(dun, cnp->cn_proc)) != NULL) { + error = VOP_MKNOD(dvp, ap->a_vpp, cnp, ap->a_vap); + union_unlock_upper(dvp, cnp->cn_proc); + } + return (error); +} + +/* + * union_open: + * + * run open VOP. When opening the underlying vnode we have to mimic + * vn_open. What we *really* need to do to avoid screwups if the + * open semantics change is to call vn_open(). For example, ufs blows + * up if you open a file but do not vmio it prior to writing. + */ + +static int +union_open(ap) + struct vop_open_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct vnode *tvp; + int mode = ap->a_mode; + struct ucred *cred = ap->a_cred; + struct proc *p = ap->a_p; + int error = 0; + int tvpisupper = 1; + + /* + * If there is an existing upper vp then simply open that. + * The upper vp takes precedence over the lower vp. When opening + * a lower vp for writing copy it to the uppervp and then open the + * uppervp. + * + * At the end of this section tvp will be left locked. + */ + if ((tvp = union_lock_upper(un, p)) == NULLVP) { + /* + * If the lower vnode is being opened for writing, then + * copy the file contents to the upper vnode and open that, + * otherwise can simply open the lower vnode. + */ + tvp = un->un_lowervp; + if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) { + int docopy = !(mode & O_TRUNC); + error = union_copyup(un, docopy, cred, p); + tvp = union_lock_upper(un, p); + } else { + un->un_openl++; + VREF(tvp); + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); + tvpisupper = 0; + } + } + + /* + * We are holding the correct vnode, open it + */ + + if (error == 0) + error = VOP_OPEN(tvp, mode, cred, p); + + /* + * Absolutely necessary or UFS will blowup + */ + if (error == 0 && vn_canvmio(tvp) == TRUE) { + error = vfs_object_create(tvp, p, cred); + } + + /* + * Release any locks held + */ + if (tvpisupper) { + if (tvp) + union_unlock_upper(tvp, p); + } else { + vput(tvp); + } + return (error); +} + +/* + * union_close: + * + * It is unclear whether a_vp is passed locked or unlocked. Whatever + * the case we do not change it. + */ + +static int +union_close(ap) + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct vnode *vp; + + if ((vp = un->un_uppervp) == NULLVP) { +#ifdef UNION_DIAGNOSTIC + if (un->un_openl <= 0) + panic("union: un_openl cnt"); +#endif + --un->un_openl; + vp = un->un_lowervp; + } + ap->a_vp = vp; + return (VCALL(vp, VOFFSET(vop_close), ap)); +} + +/* + * Check access permission on the union vnode. + * The access check being enforced is to check + * against both the underlying vnode, and any + * copied vnode. This ensures that no additional + * file permissions are given away simply because + * the user caused an implicit file copy. + */ +static int +union_access(ap) + struct vop_access_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_p; + int error = EACCES; + struct vnode *vp; + + /* + * Disallow write attempts on filesystems mounted read-only. + */ + if ((ap->a_mode & VWRITE) && + (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { + switch (ap->a_vp->v_type) { + case VREG: + case VDIR: + case VLNK: + return (EROFS); + default: + break; + } + } + + if ((vp = union_lock_upper(un, p)) != NULLVP) { + ap->a_vp = vp; + error = VCALL(vp, VOFFSET(vop_access), ap); + union_unlock_upper(vp, p); + return(error); + } + + if ((vp = un->un_lowervp) != NULLVP) { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + ap->a_vp = vp; + + /* + * Remove VWRITE from a_mode if our mount point is RW, because + * we want to allow writes and lowervp may be read-only. + */ + if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0) + ap->a_mode &= ~VWRITE; + + error = VCALL(vp, VOFFSET(vop_access), ap); + if (error == 0) { + struct union_mount *um; + + um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount); + + if (um->um_op == UNMNT_BELOW) { + ap->a_cred = um->um_cred; + error = VCALL(vp, VOFFSET(vop_access), ap); + } + } + VOP_UNLOCK(vp, 0, p); + } + return(error); +} + +/* + * We handle getattr only to change the fsid and + * track object sizes + * + * It's not clear whether VOP_GETATTR is to be + * called with the vnode locked or not. stat() calls + * it with (vp) locked, and fstat calls it with + * (vp) unlocked. + * + * Because of this we cannot use our normal locking functions + * if we do not intend to lock the main a_vp node. At the moment + * we are running without any specific locking at all, but beware + * to any programmer that care must be taken if locking is added + * to this function. + */ + +static int +union_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + int error; + struct union_node *un = VTOUNION(ap->a_vp); + struct vnode *vp; + struct vattr *vap; + struct vattr va; + + /* + * Some programs walk the filesystem hierarchy by counting + * links to directories to avoid stat'ing all the time. + * This means the link count on directories needs to be "correct". + * The only way to do that is to call getattr on both layers + * and fix up the link count. The link count will not necessarily + * be accurate but will be large enough to defeat the tree walkers. + */ + + vap = ap->a_vap; + + if ((vp = un->un_uppervp) != NULLVP) { + error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); + if (error) + return (error); + /* XXX isn't this dangerouso without a lock? */ + union_newsize(ap->a_vp, vap->va_size, VNOVAL); + } + + if (vp == NULLVP) { + vp = un->un_lowervp; + } else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) { + vp = un->un_lowervp; + vap = &va; + } else { + vp = NULLVP; + } + + if (vp != NULLVP) { + error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); + if (error) + return (error); + /* XXX isn't this dangerous without a lock? */ + union_newsize(ap->a_vp, VNOVAL, vap->va_size); + } + + if ((vap != ap->a_vap) && (vap->va_type == VDIR)) + ap->a_vap->va_nlink += vap->va_nlink; + return (0); +} + +static int +union_setattr(ap) + struct vop_setattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_p; + struct vattr *vap = ap->a_vap; + struct vnode *uppervp; + int error; + + /* + * Disallow write attempts on filesystems mounted read-only. + */ + if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) && + (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || + vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || + vap->va_mode != (mode_t)VNOVAL)) { + return (EROFS); + } + + /* + * Handle case of truncating lower object to zero size, + * by creating a zero length upper object. This is to + * handle the case of open with O_TRUNC and O_CREAT. + */ + if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) { + error = union_copyup(un, (ap->a_vap->va_size != 0), + ap->a_cred, ap->a_p); + if (error) + return (error); + } + + /* + * Try to set attributes in upper layer, + * otherwise return read-only filesystem error. + */ + error = EROFS; + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { + error = VOP_SETATTR(un->un_uppervp, ap->a_vap, + ap->a_cred, ap->a_p); + if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) + union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL); + union_unlock_upper(uppervp, p); + } + return (error); +} + +/* + * union_getpages: + */ + +static int +union_getpages(struct vop_getpages_args *ap) +{ + int r; + + r = vnode_pager_generic_getpages(ap->a_vp, ap->a_m, + ap->a_count, ap->a_reqpage); + return(r); +} + +/* + * union_putpages: + */ + +static int +union_putpages(struct vop_putpages_args *ap) +{ + int r; + + r = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_sync, ap->a_rtvals); + return(r); +} + +static int +union_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_uio->uio_procp; + struct vnode *uvp; + int error; + + uvp = union_lock_other(un, p); + KASSERT(uvp != NULL, ("union_read: backing vnode missing!")); + + if (ap->a_vp->v_flag & VOBJBUF) + union_vm_coherency(ap->a_vp, ap->a_uio, 0); + + error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred); + union_unlock_other(uvp, p); + + /* + * XXX + * perhaps the size of the underlying object has changed under + * our feet. take advantage of the offset information present + * in the uio structure. + */ + if (error == 0) { + struct union_node *un = VTOUNION(ap->a_vp); + off_t cur = ap->a_uio->uio_offset; + + if (uvp == un->un_uppervp) { + if (cur > un->un_uppersz) + union_newsize(ap->a_vp, cur, VNOVAL); + } else { + if (cur > un->un_lowersz) + union_newsize(ap->a_vp, VNOVAL, cur); + } + } + return (error); +} + +static int +union_write(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_uio->uio_procp; + struct vnode *uppervp; + int error; + + if ((uppervp = union_lock_upper(un, p)) == NULLVP) + panic("union: missing upper layer in write"); + + /* + * Since our VM pages are associated with our vnode rather then + * the real vnode, and since we do not run our reads and writes + * through our own VM cache, we have a VM/VFS coherency problem. + * We solve them by invalidating or flushing the associated VM + * pages prior to allowing a normal read or write to occur. + * + * VM-backed writes (UIO_NOCOPY) have to be converted to normal + * writes because we are not cache-coherent. Normal writes need + * to be made coherent with our VM-backing store, which we do by + * first flushing any dirty VM pages associated with the write + * range, and then destroying any clean VM pages associated with + * the write range. + */ + + if (ap->a_uio->uio_segflg == UIO_NOCOPY) { + ap->a_uio->uio_segflg = UIO_SYSSPACE; + } else if (ap->a_vp->v_flag & VOBJBUF) { + union_vm_coherency(ap->a_vp, ap->a_uio, 1); + } + + error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred); + + /* + * the size of the underlying object may be changed by the + * write. + */ + if (error == 0) { + off_t cur = ap->a_uio->uio_offset; + + if (cur > un->un_uppersz) + union_newsize(ap->a_vp, cur, VNOVAL); + } + union_unlock_upper(uppervp, p); + return (error); +} + +static int +union_lease(ap) + struct vop_lease_args /* { + struct vnode *a_vp; + struct proc *a_p; + struct ucred *a_cred; + int a_flag; + } */ *ap; +{ + struct vnode *ovp = OTHERVP(ap->a_vp); + + ap->a_vp = ovp; + return (VCALL(ovp, VOFFSET(vop_lease), ap)); +} + +static int +union_ioctl(ap) + struct vop_ioctl_args /* { + struct vnode *a_vp; + int a_command; + caddr_t a_data; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *ovp = OTHERVP(ap->a_vp); + + ap->a_vp = ovp; + return (VCALL(ovp, VOFFSET(vop_ioctl), ap)); +} + +static int +union_poll(ap) + struct vop_poll_args /* { + struct vnode *a_vp; + int a_events; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *ovp = OTHERVP(ap->a_vp); + + ap->a_vp = ovp; + return (VCALL(ovp, VOFFSET(vop_poll), ap)); +} + +static int +union_revoke(ap) + struct vop_revoke_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + if (UPPERVP(vp)) + VOP_REVOKE(UPPERVP(vp), ap->a_flags); + if (LOWERVP(vp)) + VOP_REVOKE(LOWERVP(vp), ap->a_flags); + vgone(vp); + return (0); +} + +static int +union_mmap(ap) + struct vop_mmap_args /* { + struct vnode *a_vp; + int a_fflags; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *ovp = OTHERVP(ap->a_vp); + + ap->a_vp = ovp; + return (VCALL(ovp, VOFFSET(vop_mmap), ap)); +} + +static int +union_fsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap; +{ + int error = 0; + struct proc *p = ap->a_p; + struct vnode *targetvp; + struct union_node *un = VTOUNION(ap->a_vp); + + if ((targetvp = union_lock_other(un, p)) != NULLVP) { + error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p); + union_unlock_other(targetvp, p); + } + + return (error); +} + +/* + * union_remove: + * + * Remove the specified cnp. The dvp and vp are passed to us locked + * and must remain locked on return. + */ + +static int +union_remove(ap) + struct vop_remove_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + struct union_node *dun = VTOUNION(ap->a_dvp); + struct union_node *un = VTOUNION(ap->a_vp); + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + struct vnode *uppervp; + struct vnode *upperdvp; + int error; + + if ((upperdvp = union_lock_upper(dun, p)) == NULLVP) + panic("union remove: null upper vnode"); + + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { + if (union_dowhiteout(un, cnp->cn_cred, p)) + cnp->cn_flags |= DOWHITEOUT; + error = VOP_REMOVE(upperdvp, uppervp, cnp); +#if 0 + /* XXX */ + if (!error) + union_removed_upper(un); +#endif + union_unlock_upper(uppervp, p); + } else { + error = union_mkwhiteout( + MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), + upperdvp, ap->a_cnp, un->un_path); + } + union_unlock_upper(upperdvp, p); + return (error); +} + +/* + * union_link: + * + * tdvp will be locked on entry, vp will not be locked on entry. + * tdvp should remain locked on return and vp should remain unlocked + * on return. + */ + +static int +union_link(ap) + struct vop_link_args /* { + struct vnode *a_tdvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + struct union_node *dun = VTOUNION(ap->a_tdvp); + struct vnode *vp; + struct vnode *tdvp; + int error = 0; + + if (ap->a_tdvp->v_op != ap->a_vp->v_op) { + vp = ap->a_vp; + } else { + struct union_node *tun = VTOUNION(ap->a_vp); + + if (tun->un_uppervp == NULLVP) { + vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); +#if 0 + if (dun->un_uppervp == tun->un_dirvp) { + if (dun->un_flags & UN_ULOCK) { + dun->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(dun->un_uppervp, 0, p); + } + } +#endif + error = union_copyup(tun, 1, cnp->cn_cred, p); +#if 0 + if (dun->un_uppervp == tun->un_dirvp) { + vn_lock(dun->un_uppervp, + LK_EXCLUSIVE | LK_RETRY, p); + dun->un_flags |= UN_ULOCK; + } +#endif + VOP_UNLOCK(ap->a_vp, 0, p); + } + vp = tun->un_uppervp; + } + + if (error) + return (error); + + /* + * Make sure upper is locked, then unlock the union directory we were + * called with to avoid a deadlock while we are calling VOP_LINK on + * the upper (with tdvp locked and vp not locked). Our ap->a_tdvp + * is expected to be locked on return. + */ + + if ((tdvp = union_lock_upper(dun, p)) == NULLVP) + return (EROFS); + + VOP_UNLOCK(ap->a_tdvp, 0, p); /* unlock calling node */ + error = VOP_LINK(tdvp, vp, cnp); /* call link on upper */ + + /* + * We have to unlock tdvp prior to relocking our calling node in + * order to avoid a deadlock. + */ + union_unlock_upper(tdvp, p); + vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, p); + return (error); +} + +static int +union_rename(ap) + struct vop_rename_args /* { + struct vnode *a_fdvp; + struct vnode *a_fvp; + struct componentname *a_fcnp; + struct vnode *a_tdvp; + struct vnode *a_tvp; + struct componentname *a_tcnp; + } */ *ap; +{ + int error; + struct vnode *fdvp = ap->a_fdvp; + struct vnode *fvp = ap->a_fvp; + struct vnode *tdvp = ap->a_tdvp; + struct vnode *tvp = ap->a_tvp; + + /* + * Figure out what fdvp to pass to our upper or lower vnode. If we + * replace the fdvp, release the original one and ref the new one. + */ + + if (fdvp->v_op == union_vnodeop_p) { /* always true */ + struct union_node *un = VTOUNION(fdvp); + if (un->un_uppervp == NULLVP) { + /* + * this should never happen in normal + * operation but might if there was + * a problem creating the top-level shadow + * directory. + */ + error = EXDEV; + goto bad; + } + fdvp = un->un_uppervp; + VREF(fdvp); + vrele(ap->a_fdvp); + } + + /* + * Figure out what fvp to pass to our upper or lower vnode. If we + * replace the fvp, release the original one and ref the new one. + */ + + if (fvp->v_op == union_vnodeop_p) { /* always true */ + struct union_node *un = VTOUNION(fvp); +#if 0 + struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount); +#endif + + if (un->un_uppervp == NULLVP) { + switch(fvp->v_type) { + case VREG: + vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc); + error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_proc); + VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_proc); + if (error) + goto bad; + break; + case VDIR: + /* + * XXX not yet. + * + * There is only one way to rename a directory + * based in the lowervp, and that is to copy + * the entire directory hierarchy. Otherwise + * it would not last across a reboot. + */ +#if 0 + vrele(fvp); + fvp = NULL; + vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc); + error = union_mkshadow(um, fdvp, + ap->a_fcnp, &un->un_uppervp); + VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_proc); + if (un->un_uppervp) + VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_proc); + if (error) + goto bad; + break; +#endif + default: + error = EXDEV; + goto bad; + } + } + + if (un->un_lowervp != NULLVP) + ap->a_fcnp->cn_flags |= DOWHITEOUT; + fvp = un->un_uppervp; + VREF(fvp); + vrele(ap->a_fvp); + } + + /* + * Figure out what tdvp (destination directory) to pass to the + * lower level. If we replace it with uppervp, we need to vput the + * old one. The exclusive lock is transfered to what we will pass + * down in the VOP_RENAME and we replace uppervp with a simple + * reference. + */ + + if (tdvp->v_op == union_vnodeop_p) { + struct union_node *un = VTOUNION(tdvp); + + if (un->un_uppervp == NULLVP) { + /* + * this should never happen in normal + * operation but might if there was + * a problem creating the top-level shadow + * directory. + */ + error = EXDEV; + goto bad; + } + + /* + * new tdvp is a lock and reference on uppervp, put away + * the old tdvp. + */ + tdvp = union_lock_upper(un, ap->a_tcnp->cn_proc); + vput(ap->a_tdvp); + } + + /* + * Figure out what tvp (destination file) to pass to the + * lower level. + * + * If the uppervp file does not exist put away the (wrong) + * file and change tvp to NULL. + */ + + if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) { + struct union_node *un = VTOUNION(tvp); + + tvp = union_lock_upper(un, ap->a_tcnp->cn_proc); + vput(ap->a_tvp); + /* note: tvp may be NULL */ + } + + /* + * VOP_RENAME releases/vputs prior to returning, so we have no + * cleanup to do. + */ + + return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp)); + + /* + * Error. We still have to release / vput the various elements. + */ + +bad: + vrele(fdvp); + if (fvp) + vrele(fvp); + vput(tdvp); + if (tvp != NULLVP) { + if (tvp != tdvp) + vput(tvp); + else + vrele(tvp); + } + return (error); +} + +static int +union_mkdir(ap) + struct vop_mkdir_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + struct union_node *dun = VTOUNION(ap->a_dvp); + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + struct vnode *upperdvp; + int error = EROFS; + + if ((upperdvp = union_lock_upper(dun, p)) != NULLVP) { + struct vnode *vp; + + error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap); + union_unlock_upper(upperdvp, p); + + if (error == 0) { + VOP_UNLOCK(vp, 0, p); + UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vp->v_usecount)); + error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, + ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1); + UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount)); + } + } + return (error); +} + +static int +union_rmdir(ap) + struct vop_rmdir_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + struct union_node *dun = VTOUNION(ap->a_dvp); + struct union_node *un = VTOUNION(ap->a_vp); + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + struct vnode *upperdvp; + struct vnode *uppervp; + int error; + + if ((upperdvp = union_lock_upper(dun, p)) == NULLVP) + panic("union rmdir: null upper vnode"); + + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { + if (union_dowhiteout(un, cnp->cn_cred, p)) + cnp->cn_flags |= DOWHITEOUT; + error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp); + union_unlock_upper(uppervp, p); + } else { + error = union_mkwhiteout( + MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), + dun->un_uppervp, ap->a_cnp, un->un_path); + } + union_unlock_upper(upperdvp, p); + return (error); +} + +/* + * union_symlink: + * + * dvp is locked on entry and remains locked on return. a_vpp is garbage + * (unused). + */ + +static int +union_symlink(ap) + struct vop_symlink_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + char *a_target; + } */ *ap; +{ + struct union_node *dun = VTOUNION(ap->a_dvp); + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; + + if ((dvp = union_lock_upper(dun, p)) != NULLVP) { + error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap, + ap->a_target); + union_unlock_upper(dvp, p); + } + return (error); +} + +/* + * union_readdir works in concert with getdirentries and + * readdir(3) to provide a list of entries in the unioned + * directories. getdirentries is responsible for walking + * down the union stack. readdir(3) is responsible for + * eliminating duplicate names from the returned data stream. + */ +static int +union_readdir(ap) + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + int *a_eofflag; + u_long *a_cookies; + int a_ncookies; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_uio->uio_procp; + struct vnode *uvp; + int error = 0; + + if ((uvp = union_lock_upper(un, p)) != NULLVP) { + ap->a_vp = uvp; + error = VCALL(uvp, VOFFSET(vop_readdir), ap); + union_unlock_upper(uvp, p); + } + return(error); +} + +static int +union_readlink(ap) + struct vop_readlink_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + } */ *ap; +{ + int error; + struct union_node *un = VTOUNION(ap->a_vp); + struct uio *uio = ap->a_uio; + struct proc *p = uio->uio_procp; + struct vnode *vp; + + vp = union_lock_other(un, p); + KASSERT(vp != NULL, ("union_readlink: backing vnode missing!")); + + ap->a_vp = vp; + error = VCALL(vp, VOFFSET(vop_readlink), ap); + union_unlock_other(vp, p); + + return (error); +} + +/* + * union_inactive: + * + * Called with the vnode locked. We are expected to unlock the vnode. + */ + +static int +union_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct proc *p = ap->a_p; + struct union_node *un = VTOUNION(vp); + struct vnode **vpp; + + /* + * Do nothing (and _don't_ bypass). + * Wait to vrele lowervp until reclaim, + * so that until then our union_node is in the + * cache and reusable. + * + * NEEDSWORK: Someday, consider inactive'ing + * the lowervp and then trying to reactivate it + * with capabilities (v_id) + * like they do in the name lookup cache code. + * That's too much work for now. + */ + + if (un->un_dircache != 0) { + for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) + vrele(*vpp); + free (un->un_dircache, M_TEMP); + un->un_dircache = 0; + } + +#if 0 + if ((un->un_flags & UN_ULOCK) && un->un_uppervp) { + un->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(un->un_uppervp, 0, p); + } +#endif + + VOP_UNLOCK(vp, 0, p); + + if ((un->un_flags & UN_CACHED) == 0) + vgone(vp); + + return (0); +} + +static int +union_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + union_freevp(ap->a_vp); + + return (0); +} + +static int +union_lock(ap) + struct vop_lock_args *ap; +{ +#if 0 + struct vnode *vp = ap->a_vp; + struct proc *p = ap->a_p; + int flags = ap->a_flags; + struct union_node *un; +#endif + int error; + + error = vop_stdlock(ap); +#if 0 + un = VTOUNION(vp); + + if (error == 0) { + /* + * Lock the upper if it exists and this is an exclusive lock + * request. + */ + if (un->un_uppervp != NULLVP && + (flags & LK_TYPE_MASK) == LK_EXCLUSIVE) { + if ((un->un_flags & UN_ULOCK) == 0 && vp->v_usecount) { + error = vn_lock(un->un_uppervp, flags, p); + if (error) { + struct vop_unlock_args uap = { 0 }; + uap.a_vp = ap->a_vp; + uap.a_flags = ap->a_flags; + uap.a_p = ap->a_p; + vop_stdunlock(&uap); + return (error); + } + un->un_flags |= UN_ULOCK; + } + } + } +#endif + return (error); +} + +/* + * union_unlock: + * + * Unlock our union node. This also unlocks uppervp. + */ +static int +union_unlock(ap) + struct vop_unlock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + int error; + + KASSERT((un->un_uppervp == NULL || un->un_uppervp->v_usecount > 0), ("uppervp usecount is 0")); + + error = vop_stdunlock(ap); +#if 0 + + /* + * If no exclusive locks remain and we are holding an uppervp lock, + * remove the uppervp lock. + */ + + if ((un->un_flags & UN_ULOCK) && + lockstatus(&un->un_lock, NULL) != LK_EXCLUSIVE) { + un->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(un->un_uppervp, LK_EXCLUSIVE, p); + } +#endif + return(error); +} + +/* + * union_bmap: + * + * There isn't much we can do. We cannot push through to the real vnode + * to get to the underlying device because this will bypass data + * cached by the real vnode. + * + * For some reason we cannot return the 'real' vnode either, it seems + * to blow up memory maps. + */ + +static int +union_bmap(ap) + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct vnode **a_vpp; + daddr_t *a_bnp; + int *a_runp; + int *a_runb; + } */ *ap; +{ + return(EOPNOTSUPP); +} + +static int +union_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n", + vp, UPPERVP(vp), LOWERVP(vp)); + if (UPPERVP(vp) != NULLVP) + vprint("union: upper", UPPERVP(vp)); + if (LOWERVP(vp) != NULLVP) + vprint("union: lower", LOWERVP(vp)); + + return (0); +} + +static int +union_pathconf(ap) + struct vop_pathconf_args /* { + struct vnode *a_vp; + int a_name; + int *a_retval; + } */ *ap; +{ + int error; + struct proc *p = curproc; /* XXX */ + struct union_node *un = VTOUNION(ap->a_vp); + struct vnode *vp; + + vp = union_lock_other(un, p); + KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!")); + + ap->a_vp = vp; + error = VCALL(vp, VOFFSET(vop_pathconf), ap); + union_unlock_other(vp, p); + + return (error); +} + +static int +union_advlock(ap) + struct vop_advlock_args /* { + struct vnode *a_vp; + caddr_t a_id; + int a_op; + struct flock *a_fl; + int a_flags; + } */ *ap; +{ + register struct vnode *ovp = OTHERVP(ap->a_vp); + + ap->a_vp = ovp; + return (VCALL(ovp, VOFFSET(vop_advlock), ap)); +} + + +/* + * XXX - vop_strategy must be hand coded because it has no + * YYY - and it is not coherent with anything + * + * vnode in its arguments. + * This goes away with a merged VM/buffer cache. + */ +static int +union_strategy(ap) + struct vop_strategy_args /* { + struct vnode *a_vp; + struct buf *a_bp; + } */ *ap; +{ + struct buf *bp = ap->a_bp; + struct vnode *othervp = OTHERVP(bp->b_vp); + +#ifdef DIAGNOSTIC + if (othervp == NULLVP) + panic("union_strategy: nil vp"); + if (((bp->b_flags & B_READ) == 0) && + (othervp == LOWERVP(bp->b_vp))) + panic("union_strategy: writing to lowervp"); +#endif + return (VOP_STRATEGY(othervp, bp)); +} + +/* + * Global vfs data structures + */ +vop_t **union_vnodeop_p; +static struct vnodeopv_entry_desc union_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_access_desc, (vop_t *) union_access }, + { &vop_advlock_desc, (vop_t *) union_advlock }, + { &vop_bmap_desc, (vop_t *) union_bmap }, + { &vop_close_desc, (vop_t *) union_close }, + { &vop_create_desc, (vop_t *) union_create }, + { &vop_fsync_desc, (vop_t *) union_fsync }, + { &vop_getpages_desc, (vop_t *) union_getpages }, + { &vop_putpages_desc, (vop_t *) union_putpages }, + { &vop_getattr_desc, (vop_t *) union_getattr }, + { &vop_inactive_desc, (vop_t *) union_inactive }, + { &vop_ioctl_desc, (vop_t *) union_ioctl }, + { &vop_islocked_desc, (vop_t *) vop_stdislocked }, + { &vop_lease_desc, (vop_t *) union_lease }, + { &vop_link_desc, (vop_t *) union_link }, + { &vop_lock_desc, (vop_t *) union_lock }, + { &vop_lookup_desc, (vop_t *) union_lookup }, + { &vop_mkdir_desc, (vop_t *) union_mkdir }, + { &vop_mknod_desc, (vop_t *) union_mknod }, + { &vop_mmap_desc, (vop_t *) union_mmap }, + { &vop_open_desc, (vop_t *) union_open }, + { &vop_pathconf_desc, (vop_t *) union_pathconf }, + { &vop_poll_desc, (vop_t *) union_poll }, + { &vop_print_desc, (vop_t *) union_print }, + { &vop_read_desc, (vop_t *) union_read }, + { &vop_readdir_desc, (vop_t *) union_readdir }, + { &vop_readlink_desc, (vop_t *) union_readlink }, + { &vop_reclaim_desc, (vop_t *) union_reclaim }, + { &vop_remove_desc, (vop_t *) union_remove }, + { &vop_rename_desc, (vop_t *) union_rename }, + { &vop_revoke_desc, (vop_t *) union_revoke }, + { &vop_rmdir_desc, (vop_t *) union_rmdir }, + { &vop_setattr_desc, (vop_t *) union_setattr }, + { &vop_strategy_desc, (vop_t *) union_strategy }, + { &vop_symlink_desc, (vop_t *) union_symlink }, + { &vop_unlock_desc, (vop_t *) union_unlock }, + { &vop_whiteout_desc, (vop_t *) union_whiteout }, + { &vop_write_desc, (vop_t *) union_write }, + { NULL, NULL } +}; +static struct vnodeopv_desc union_vnodeop_opv_desc = + { &union_vnodeop_p, union_vnodeop_entries }; + +VNODEOP_SET(union_vnodeop_opv_desc); |