diff options
-rw-r--r-- | sys/fs/nullfs/null_subr.c | 38 | ||||
-rw-r--r-- | sys/fs/nullfs/null_vfsops.c | 53 | ||||
-rw-r--r-- | sys/fs/nullfs/null_vnops.c | 293 | ||||
-rw-r--r-- | sys/miscfs/nullfs/null_subr.c | 38 | ||||
-rw-r--r-- | sys/miscfs/nullfs/null_vfsops.c | 53 | ||||
-rw-r--r-- | sys/miscfs/nullfs/null_vnops.c | 293 |
6 files changed, 572 insertions, 196 deletions
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c index 3671f0a..efb1357 100644 --- a/sys/fs/nullfs/null_subr.c +++ b/sys/fs/nullfs/null_subr.c @@ -99,6 +99,7 @@ nullfs_uninit(vfsp) /* * Return a VREF'ed alias for lower vnode if already exists, else 0. + * Lower vnode should be locked on entry and will be left locked on exit. */ static struct vnode * null_node_find(mp, lowervp) @@ -128,10 +129,15 @@ loop: * stuff, but we don't want to lock * the lower node. */ - if (vget(vp, 0, p)) { + if (vget(vp, LK_EXCLUSIVE | LK_CANRECURSE, p)) { printf ("null_node_find: vget failed.\n"); goto loop; }; + /* + * Now we got both vnodes locked, so release the + * lower one. + */ + VOP_UNLOCK(lowervp, 0, p); return (vp); } } @@ -184,14 +190,30 @@ null_node_alloc(mp, lowervp, vpp) */ othervp = null_node_find(mp, lowervp); if (othervp) { + vp->v_data = NULL; FREE(xp, M_NULLFSNODE); vp->v_type = VBAD; /* node is discarded */ - vp->v_usecount = 0; /* XXX */ + vrele(vp); *vpp = othervp; return 0; }; + + /* + * From NetBSD: + * Now lock the new node. We rely on the fact that we were passed + * a locked vnode. If the lower node is exporting a struct lock + * (v_vnlock != NULL) then we just set the upper v_vnlock to the + * lower one, and both are now locked. If the lower node is exporting + * NULL, then we copy that up and manually lock the new vnode. + */ + lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, p); - VREF(lowervp); /* Extra VREF will be vrele'd in null_node_create */ + vp->v_vnlock = lowervp->v_vnlock; + error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_THISLAYER, p); + if (error) + panic("null_node_alloc: can't lock new vnode\n"); + + VREF(lowervp); hd = NULL_NHASH(lowervp); LIST_INSERT_HEAD(hd, xp, null_hash); lockmgr(&null_hashlock, LK_RELEASE, NULL, p); @@ -200,9 +222,9 @@ null_node_alloc(mp, lowervp, vpp) /* - * Try to find an existing null_node vnode refering - * to it, otherwise make a new null_node vnode which - * contains a reference to the lower vnode. + * Try to find an existing null_node vnode refering to the given underlying + * vnode (which should be locked). If no vnode found, create a new null_node + * vnode which contains a reference to the lower vnode. */ int null_node_create(mp, lowervp, newvpp) @@ -218,10 +240,10 @@ null_node_create(mp, lowervp, newvpp) * null_node_find has taken another reference * to the alias vnode. */ + vrele(lowervp); #ifdef NULLFS_DEBUG vprint("null_node_create: exists", aliasvp); #endif - /* VREF(aliasvp); --- done in null_node_find */ } else { int error; @@ -242,8 +264,6 @@ null_node_create(mp, lowervp, newvpp) */ } - vrele(lowervp); - #ifdef DIAGNOSTIC if (lowervp->v_usecount < 1) { /* Should never happen... */ diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c index 5b49fbb..45f950e 100644 --- a/sys/fs/nullfs/null_vfsops.c +++ b/sys/fs/nullfs/null_vfsops.c @@ -73,6 +73,8 @@ static int nullfs_sync(struct mount *mp, int waitfor, static int nullfs_unmount(struct mount *mp, int mntflags, struct proc *p); static int nullfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp); static int nullfs_vptofh(struct vnode *vp, struct fid *fhp); +static int nullfs_extattrctl(struct mount *mp, int cmd, + const char *attrname, caddr_t arg, struct proc *p); /* * Mount null layer @@ -224,7 +226,7 @@ nullfs_unmount(mp, mntflags, p) int mntflags; struct proc *p; { - struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp; + struct vnode *vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp; void *mntdata; int error; int flags = 0; @@ -234,39 +236,37 @@ nullfs_unmount(mp, mntflags, p) if (mntflags & MNT_FORCE) flags |= FORCECLOSE; - /* - * Clear out buffer cache. I don't think we - * ever get anything cached at this level at the - * moment, but who knows... - */ -#if 0 - mntflushbuf(mp, 0); - if (mntinvalbuf(mp, 1)) - return (EBUSY); -#endif - if (nullm_rootvp->v_usecount > 1) + error = VFS_ROOT(mp, &vp); + if (error) + return (error); + if (vp->v_usecount > 2) { + NULLFSDEBUG("nullfs_unmount: rootvp is busy(%d)\n", + vp->v_usecount); + vput(vp); return (EBUSY); - error = vflush(mp, nullm_rootvp, flags); + } + error = vflush(mp, vp, flags); if (error) return (error); #ifdef NULLFS_DEBUG - vprint("alias root of lower", nullm_rootvp); + vprint("alias root of lower", vp); #endif + vput(vp); /* * Release reference on underlying root vnode */ - vrele(nullm_rootvp); + vrele(vp); /* * And blow it away for future re-use */ - vgone(nullm_rootvp); + vgone(vp); /* * Finally, throw away the null_mount structure */ mntdata = mp->mnt_data; mp->mnt_data = 0; - free(mntdata, M_NULLFSMNT); /* XXX */ + free(mntdata, M_NULLFSMNT); return 0; } @@ -287,13 +287,10 @@ nullfs_root(mp, vpp) */ vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp; VREF(vp); + #ifdef NULLFS_DEBUG if (VOP_ISLOCKED(vp, NULL)) { - /* - * XXX - * Should we check type of node? - */ - printf("nullfs_root: multi null mount?\n"); + Debugger("root vnode is locked.\n"); vrele(vp); return (EDEADLK); } @@ -370,8 +367,12 @@ nullfs_vget(mp, ino, vpp) ino_t ino; struct vnode **vpp; { + int error; + error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp); + if (error) + return (error); - return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp); + return (null_node_create(mp, *vpp, vpp)); } static int @@ -380,8 +381,12 @@ nullfs_fhtovp(mp, fidp, vpp) struct fid *fidp; struct vnode **vpp; { + int error; + error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp); + if (error) + return (error); - return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp); + return (null_node_create(mp, *vpp, vpp)); } static int diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c index 5692df6..0b7cb96 100644 --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -176,6 +176,8 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/proc.h> #include <sys/sysctl.h> #include <sys/vnode.h> #include <sys/mount.h> @@ -183,13 +185,22 @@ #include <sys/malloc.h> #include <miscfs/nullfs/null.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vnode_pager.h> + static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, &null_bug_bypass, 0, ""); static int null_access(struct vop_access_args *ap); +static int null_createvobject(struct vop_createvobject_args *ap); +static int null_destroyvobject(struct vop_destroyvobject_args *ap); static int null_getattr(struct vop_getattr_args *ap); +static int null_getvobject(struct vop_getvobject_args *ap); static int null_inactive(struct vop_inactive_args *ap); +static int null_islocked(struct vop_islocked_args *ap); static int null_lock(struct vop_lock_args *ap); static int null_lookup(struct vop_lookup_args *ap); static int null_open(struct vop_open_args *ap); @@ -277,7 +288,7 @@ null_bypass(ap) * of vrele'ing their vp's. We must account for * that. (This should go away in the future.) */ - if (reles & 1) + if (reles & VDESC_VP0_WILLRELE) VREF(*this_vp_p); } @@ -287,7 +298,12 @@ null_bypass(ap) * Call the operation on the lower layer * with the modified argument structure. */ - error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); + if (vps_p[0] && *vps_p[0]) + error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); + else { + printf("null_bypass: no map for %s\n", descp->vdesc_name); + error = EINVAL; + } /* * Maintain the illusion of call-by-value @@ -300,7 +316,11 @@ null_bypass(ap) break; /* bail out at end of list */ if (old_vps[i]) { *(vps_p[i]) = old_vps[i]; - if (reles & 1) +#if 0 + if (reles & VDESC_VP0_WILLUNLOCK) + VOP_UNLOCK(*(vps_p[i]), LK_THISLAYER, curproc); +#endif + if (reles & VDESC_VP0_WILLRELE) vrele(*(vps_p[i])); } } @@ -345,44 +365,43 @@ null_lookup(ap) } */ *ap; { struct componentname *cnp = ap->a_cnp; + struct vnode *dvp = ap->a_dvp; struct proc *p = cnp->cn_proc; int flags = cnp->cn_flags; - struct vop_lock_args lockargs; - struct vop_unlock_args unlockargs; - struct vnode *dvp, *vp; + struct vnode *vp, *ldvp, *lvp; int error; - if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) && + if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); - error = null_bypass((struct vop_generic_args *)ap); + /* + * Although it is possible to call null_bypass(), we'll do + * a direct call to reduce overhead + */ + ldvp = NULLVPTOLOWERVP(dvp); + vp = lvp = NULL; + error = VOP_LOOKUP(ldvp, &lvp, cnp); if (error == EJUSTRETURN && (flags & ISLASTCN) && - (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) && + (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) error = EROFS; + /* - * We must do the same locking and unlocking at this layer as - * is done in the layers below us. We could figure this out - * based on the error return and the LASTCN, LOCKPARENT, and - * LOCKLEAF flags. However, it is more expidient to just find - * out the state of the lower level vnodes and set ours to the - * same state. + * Rely only on the PDIRUNLOCK flag which should be carefully + * tracked by underlying filesystem. */ - dvp = ap->a_dvp; - vp = *ap->a_vpp; - if (dvp == vp) - return (error); - if (!VOP_ISLOCKED(dvp, NULL)) { - unlockargs.a_vp = dvp; - unlockargs.a_flags = 0; - unlockargs.a_p = p; - vop_nounlock(&unlockargs); - } - if (vp != NULLVP && VOP_ISLOCKED(vp, NULL)) { - lockargs.a_vp = vp; - lockargs.a_flags = LK_SHARED; - lockargs.a_p = p; - vop_nolock(&lockargs); + if (cnp->cn_flags & PDIRUNLOCK) + VOP_UNLOCK(dvp, LK_THISLAYER, p); + if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) { + if (ldvp == lvp) { + *ap->a_vpp = dvp; + VREF(dvp); + vrele(lvp); + } else { + error = null_node_create(dvp->v_mount, lvp, &vp); + if (error == 0) + *ap->a_vpp = vp; + } } return (error); } @@ -430,6 +449,7 @@ null_setattr(ap) return (EROFS); } } + return (null_bypass((struct vop_generic_args *)ap)); } @@ -454,6 +474,9 @@ null_getattr(ap) return (0); } +/* + * Handle to disallow write access if mounted read-only. + */ static int null_access(ap) struct vop_access_args /* { @@ -559,12 +582,62 @@ null_lock(ap) struct proc *a_p; } */ *ap; { + struct vnode *vp = ap->a_vp; + int flags = ap->a_flags; + struct proc *p = ap->a_p; + struct vnode *lvp; + int error; - vop_nolock(ap); - if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN) - return (0); - ap->a_flags &= ~LK_INTERLOCK; - return (null_bypass((struct vop_generic_args *)ap)); + if (flags & LK_THISLAYER) { + if (vp->v_vnlock != NULL) + return 0; /* lock is shared across layers */ + error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER, + &vp->v_interlock, p); + return (error); + } + + if (vp->v_vnlock != NULL) { + /* + * The lower level has exported a struct lock to us. Use + * it so that all vnodes in the stack lock and unlock + * simultaneously. Note: we don't DRAIN the lock as DRAIN + * decommissions the lock - just because our vnode is + * going away doesn't mean the struct lock below us is. + * LK_EXCLUSIVE is fine. + */ + if ((flags & LK_TYPE_MASK) == LK_DRAIN) { + NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n"); + return(lockmgr(vp->v_vnlock, + (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, + &vp->v_interlock, p)); + } + return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, p)); + } else { + /* + * To prevent race conditions involving doing a lookup + * on "..", we have to lock the lower node, then lock our + * node. Most of the time it won't matter that we lock our + * node (as any locking would need the lower one locked + * first). But we can LK_DRAIN the upper lock as a step + * towards decomissioning it. + */ + lvp = NULLVPTOLOWERVP(vp); + if (flags & LK_INTERLOCK) { + simple_unlock(&vp->v_interlock); + flags &= ~LK_INTERLOCK; + } + if ((flags & LK_TYPE_MASK) == LK_DRAIN) { + error = VOP_LOCK(lvp, + (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, p); + } else + error = VOP_LOCK(lvp, flags, p); + if (error) + return (error); + error = lockmgr(&vp->v_lock, flags, &vp->v_interlock, p); + if (error) + VOP_UNLOCK(lvp, 0, p); + return (error); + } } /* @@ -580,11 +653,46 @@ null_unlock(ap) struct proc *a_p; } */ *ap; { - vop_nounlock(ap); - ap->a_flags &= ~LK_INTERLOCK; - return (null_bypass((struct vop_generic_args *)ap)); + struct vnode *vp = ap->a_vp; + int flags = ap->a_flags; + struct proc *p = ap->a_p; + + if (vp->v_vnlock != NULL) { + if (flags & LK_THISLAYER) + return 0; /* the lock is shared across layers */ + flags &= ~LK_THISLAYER; + return (lockmgr(vp->v_vnlock, flags | LK_RELEASE, + &vp->v_interlock, p)); + } + if ((flags & LK_THISLAYER) == 0) { + if (flags & LK_INTERLOCK) + simple_unlock(&vp->v_interlock); + VOP_UNLOCK(NULLVPTOLOWERVP(vp), flags & ~LK_INTERLOCK, p); + } else + flags &= ~LK_THISLAYER; + return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, p)); +} + +static int +null_islocked(ap) + struct vop_islocked_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct proc *p = ap->a_p; + + if (vp->v_vnlock != NULL) + return (lockstatus(vp->v_vnlock, p)); + return (lockstatus(&vp->v_lock, p)); } +/* + * There is no way to tell that someone issued remove/rmdir operation + * on the underlying filesystem. For now we just have to release lowevrp + * as soon as possible. + */ static int null_inactive(ap) struct vop_inactive_args /* { @@ -593,27 +701,34 @@ null_inactive(ap) } */ *ap; { struct vnode *vp = ap->a_vp; + struct proc *p = ap->a_p; struct null_node *xp = VTONULL(vp); struct vnode *lowervp = xp->null_lowervp; + + lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, p); + LIST_REMOVE(xp, null_hash); + lockmgr(&null_hashlock, LK_RELEASE, NULL, p); + + xp->null_lowervp = NULLVP; + if (vp->v_vnlock != NULL) { + vp->v_vnlock = &vp->v_lock; /* we no longer share the lock */ + } else + VOP_UNLOCK(vp, LK_THISLAYER, p); + + vput(lowervp); /* - * Do nothing (and _don't_ bypass). - * Wait to vrele lowervp until reclaim, - * so that until then our null_node is in the - * cache and reusable. - * We still have to tell the lower layer the vnode - * is now inactive though. - * - * NEEDSWORK: Someday, consider inactive'ing - * the lowervp and then trying to reactivate it - * with capabilities (v_id) - * like they do in the name lookup cache code. - * That's too much work for now. + * Now it is safe to drop references to the lower vnode. + * VOP_INACTIVE() will be called by vrele() if necessary. */ - VOP_INACTIVE(lowervp, ap->a_p); - VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + vrele (lowervp); + return (0); } +/* + * We can free memory in null_inactive, but we do this + * here. (Possible to guard vp->v_data to point somewhere) + */ static int null_reclaim(ap) struct vop_reclaim_args /* { @@ -622,21 +737,11 @@ null_reclaim(ap) } */ *ap; { struct vnode *vp = ap->a_vp; - struct null_node *xp = VTONULL(vp); - struct vnode *lowervp = xp->null_lowervp; + void *vdata = vp->v_data; - /* - * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p, - * so we can't call VOPs on ourself. - */ - /* After this assignment, this node will not be re-used. */ - xp->null_lowervp = NULLVP; - lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, ap->a_p); - LIST_REMOVE(xp, null_hash); - lockmgr(&null_hashlock, LK_RELEASE, NULL, ap->a_p); - FREE(vp->v_data, M_TEMP); vp->v_data = NULL; - vrele (lowervp); + FREE(vdata, M_NULLFSNODE); + return (0); } @@ -652,16 +757,74 @@ null_print(ap) } /* + * Let an underlying filesystem do the work + */ +static int +null_createvobject(ap) + struct vop_createvobject_args /* { + struct vnode *vp; + struct ucred *cred; + struct proc *p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL; + int error; + + if (vp->v_type == VNON || lowervp == NULL) + return 0; + error = VOP_CREATEVOBJECT(lowervp, ap->a_cred, ap->a_p); + if (error) + return (error); + vp->v_flag |= VOBJBUF; + return (0); +} + +/* + * We have nothing to destroy and this operation shouldn't be bypassed. + */ +static int +null_destroyvobject(ap) + struct vop_destroyvobject_args /* { + struct vnode *vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + vp->v_flag &= ~VOBJBUF; + return (0); +} + +static int +null_getvobject(ap) + struct vop_getvobject_args /* { + struct vnode *vp; + struct vm_object **objpp; + } */ *ap; +{ + struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp); + + if (lvp == NULL) + return EINVAL; + return (VOP_GETVOBJECT(lvp, ap->a_objpp)); +} + +/* * Global vfs data structures */ vop_t **null_vnodeop_p; static struct vnodeopv_entry_desc null_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) null_bypass }, + { &vop_access_desc, (vop_t *) null_access }, { &vop_bmap_desc, (vop_t *) vop_eopnotsupp }, + { &vop_createvobject_desc, (vop_t *) null_createvobject }, + { &vop_destroyvobject_desc, (vop_t *) null_destroyvobject }, { &vop_getattr_desc, (vop_t *) null_getattr }, + { &vop_getvobject_desc, (vop_t *) null_getvobject }, { &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount}, { &vop_inactive_desc, (vop_t *) null_inactive }, + { &vop_islocked_desc, (vop_t *) null_islocked }, { &vop_lock_desc, (vop_t *) null_lock }, { &vop_lookup_desc, (vop_t *) null_lookup }, { &vop_open_desc, (vop_t *) null_open }, diff --git a/sys/miscfs/nullfs/null_subr.c b/sys/miscfs/nullfs/null_subr.c index 3671f0a..efb1357 100644 --- a/sys/miscfs/nullfs/null_subr.c +++ b/sys/miscfs/nullfs/null_subr.c @@ -99,6 +99,7 @@ nullfs_uninit(vfsp) /* * Return a VREF'ed alias for lower vnode if already exists, else 0. + * Lower vnode should be locked on entry and will be left locked on exit. */ static struct vnode * null_node_find(mp, lowervp) @@ -128,10 +129,15 @@ loop: * stuff, but we don't want to lock * the lower node. */ - if (vget(vp, 0, p)) { + if (vget(vp, LK_EXCLUSIVE | LK_CANRECURSE, p)) { printf ("null_node_find: vget failed.\n"); goto loop; }; + /* + * Now we got both vnodes locked, so release the + * lower one. + */ + VOP_UNLOCK(lowervp, 0, p); return (vp); } } @@ -184,14 +190,30 @@ null_node_alloc(mp, lowervp, vpp) */ othervp = null_node_find(mp, lowervp); if (othervp) { + vp->v_data = NULL; FREE(xp, M_NULLFSNODE); vp->v_type = VBAD; /* node is discarded */ - vp->v_usecount = 0; /* XXX */ + vrele(vp); *vpp = othervp; return 0; }; + + /* + * From NetBSD: + * Now lock the new node. We rely on the fact that we were passed + * a locked vnode. If the lower node is exporting a struct lock + * (v_vnlock != NULL) then we just set the upper v_vnlock to the + * lower one, and both are now locked. If the lower node is exporting + * NULL, then we copy that up and manually lock the new vnode. + */ + lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, p); - VREF(lowervp); /* Extra VREF will be vrele'd in null_node_create */ + vp->v_vnlock = lowervp->v_vnlock; + error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_THISLAYER, p); + if (error) + panic("null_node_alloc: can't lock new vnode\n"); + + VREF(lowervp); hd = NULL_NHASH(lowervp); LIST_INSERT_HEAD(hd, xp, null_hash); lockmgr(&null_hashlock, LK_RELEASE, NULL, p); @@ -200,9 +222,9 @@ null_node_alloc(mp, lowervp, vpp) /* - * Try to find an existing null_node vnode refering - * to it, otherwise make a new null_node vnode which - * contains a reference to the lower vnode. + * Try to find an existing null_node vnode refering to the given underlying + * vnode (which should be locked). If no vnode found, create a new null_node + * vnode which contains a reference to the lower vnode. */ int null_node_create(mp, lowervp, newvpp) @@ -218,10 +240,10 @@ null_node_create(mp, lowervp, newvpp) * null_node_find has taken another reference * to the alias vnode. */ + vrele(lowervp); #ifdef NULLFS_DEBUG vprint("null_node_create: exists", aliasvp); #endif - /* VREF(aliasvp); --- done in null_node_find */ } else { int error; @@ -242,8 +264,6 @@ null_node_create(mp, lowervp, newvpp) */ } - vrele(lowervp); - #ifdef DIAGNOSTIC if (lowervp->v_usecount < 1) { /* Should never happen... */ diff --git a/sys/miscfs/nullfs/null_vfsops.c b/sys/miscfs/nullfs/null_vfsops.c index 5b49fbb..45f950e 100644 --- a/sys/miscfs/nullfs/null_vfsops.c +++ b/sys/miscfs/nullfs/null_vfsops.c @@ -73,6 +73,8 @@ static int nullfs_sync(struct mount *mp, int waitfor, static int nullfs_unmount(struct mount *mp, int mntflags, struct proc *p); static int nullfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp); static int nullfs_vptofh(struct vnode *vp, struct fid *fhp); +static int nullfs_extattrctl(struct mount *mp, int cmd, + const char *attrname, caddr_t arg, struct proc *p); /* * Mount null layer @@ -224,7 +226,7 @@ nullfs_unmount(mp, mntflags, p) int mntflags; struct proc *p; { - struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp; + struct vnode *vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp; void *mntdata; int error; int flags = 0; @@ -234,39 +236,37 @@ nullfs_unmount(mp, mntflags, p) if (mntflags & MNT_FORCE) flags |= FORCECLOSE; - /* - * Clear out buffer cache. I don't think we - * ever get anything cached at this level at the - * moment, but who knows... - */ -#if 0 - mntflushbuf(mp, 0); - if (mntinvalbuf(mp, 1)) - return (EBUSY); -#endif - if (nullm_rootvp->v_usecount > 1) + error = VFS_ROOT(mp, &vp); + if (error) + return (error); + if (vp->v_usecount > 2) { + NULLFSDEBUG("nullfs_unmount: rootvp is busy(%d)\n", + vp->v_usecount); + vput(vp); return (EBUSY); - error = vflush(mp, nullm_rootvp, flags); + } + error = vflush(mp, vp, flags); if (error) return (error); #ifdef NULLFS_DEBUG - vprint("alias root of lower", nullm_rootvp); + vprint("alias root of lower", vp); #endif + vput(vp); /* * Release reference on underlying root vnode */ - vrele(nullm_rootvp); + vrele(vp); /* * And blow it away for future re-use */ - vgone(nullm_rootvp); + vgone(vp); /* * Finally, throw away the null_mount structure */ mntdata = mp->mnt_data; mp->mnt_data = 0; - free(mntdata, M_NULLFSMNT); /* XXX */ + free(mntdata, M_NULLFSMNT); return 0; } @@ -287,13 +287,10 @@ nullfs_root(mp, vpp) */ vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp; VREF(vp); + #ifdef NULLFS_DEBUG if (VOP_ISLOCKED(vp, NULL)) { - /* - * XXX - * Should we check type of node? - */ - printf("nullfs_root: multi null mount?\n"); + Debugger("root vnode is locked.\n"); vrele(vp); return (EDEADLK); } @@ -370,8 +367,12 @@ nullfs_vget(mp, ino, vpp) ino_t ino; struct vnode **vpp; { + int error; + error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp); + if (error) + return (error); - return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp); + return (null_node_create(mp, *vpp, vpp)); } static int @@ -380,8 +381,12 @@ nullfs_fhtovp(mp, fidp, vpp) struct fid *fidp; struct vnode **vpp; { + int error; + error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp); + if (error) + return (error); - return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp); + return (null_node_create(mp, *vpp, vpp)); } static int diff --git a/sys/miscfs/nullfs/null_vnops.c b/sys/miscfs/nullfs/null_vnops.c index 5692df6..0b7cb96 100644 --- a/sys/miscfs/nullfs/null_vnops.c +++ b/sys/miscfs/nullfs/null_vnops.c @@ -176,6 +176,8 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/proc.h> #include <sys/sysctl.h> #include <sys/vnode.h> #include <sys/mount.h> @@ -183,13 +185,22 @@ #include <sys/malloc.h> #include <miscfs/nullfs/null.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vnode_pager.h> + static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, &null_bug_bypass, 0, ""); static int null_access(struct vop_access_args *ap); +static int null_createvobject(struct vop_createvobject_args *ap); +static int null_destroyvobject(struct vop_destroyvobject_args *ap); static int null_getattr(struct vop_getattr_args *ap); +static int null_getvobject(struct vop_getvobject_args *ap); static int null_inactive(struct vop_inactive_args *ap); +static int null_islocked(struct vop_islocked_args *ap); static int null_lock(struct vop_lock_args *ap); static int null_lookup(struct vop_lookup_args *ap); static int null_open(struct vop_open_args *ap); @@ -277,7 +288,7 @@ null_bypass(ap) * of vrele'ing their vp's. We must account for * that. (This should go away in the future.) */ - if (reles & 1) + if (reles & VDESC_VP0_WILLRELE) VREF(*this_vp_p); } @@ -287,7 +298,12 @@ null_bypass(ap) * Call the operation on the lower layer * with the modified argument structure. */ - error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); + if (vps_p[0] && *vps_p[0]) + error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); + else { + printf("null_bypass: no map for %s\n", descp->vdesc_name); + error = EINVAL; + } /* * Maintain the illusion of call-by-value @@ -300,7 +316,11 @@ null_bypass(ap) break; /* bail out at end of list */ if (old_vps[i]) { *(vps_p[i]) = old_vps[i]; - if (reles & 1) +#if 0 + if (reles & VDESC_VP0_WILLUNLOCK) + VOP_UNLOCK(*(vps_p[i]), LK_THISLAYER, curproc); +#endif + if (reles & VDESC_VP0_WILLRELE) vrele(*(vps_p[i])); } } @@ -345,44 +365,43 @@ null_lookup(ap) } */ *ap; { struct componentname *cnp = ap->a_cnp; + struct vnode *dvp = ap->a_dvp; struct proc *p = cnp->cn_proc; int flags = cnp->cn_flags; - struct vop_lock_args lockargs; - struct vop_unlock_args unlockargs; - struct vnode *dvp, *vp; + struct vnode *vp, *ldvp, *lvp; int error; - if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) && + if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); - error = null_bypass((struct vop_generic_args *)ap); + /* + * Although it is possible to call null_bypass(), we'll do + * a direct call to reduce overhead + */ + ldvp = NULLVPTOLOWERVP(dvp); + vp = lvp = NULL; + error = VOP_LOOKUP(ldvp, &lvp, cnp); if (error == EJUSTRETURN && (flags & ISLASTCN) && - (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) && + (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) error = EROFS; + /* - * We must do the same locking and unlocking at this layer as - * is done in the layers below us. We could figure this out - * based on the error return and the LASTCN, LOCKPARENT, and - * LOCKLEAF flags. However, it is more expidient to just find - * out the state of the lower level vnodes and set ours to the - * same state. + * Rely only on the PDIRUNLOCK flag which should be carefully + * tracked by underlying filesystem. */ - dvp = ap->a_dvp; - vp = *ap->a_vpp; - if (dvp == vp) - return (error); - if (!VOP_ISLOCKED(dvp, NULL)) { - unlockargs.a_vp = dvp; - unlockargs.a_flags = 0; - unlockargs.a_p = p; - vop_nounlock(&unlockargs); - } - if (vp != NULLVP && VOP_ISLOCKED(vp, NULL)) { - lockargs.a_vp = vp; - lockargs.a_flags = LK_SHARED; - lockargs.a_p = p; - vop_nolock(&lockargs); + if (cnp->cn_flags & PDIRUNLOCK) + VOP_UNLOCK(dvp, LK_THISLAYER, p); + if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) { + if (ldvp == lvp) { + *ap->a_vpp = dvp; + VREF(dvp); + vrele(lvp); + } else { + error = null_node_create(dvp->v_mount, lvp, &vp); + if (error == 0) + *ap->a_vpp = vp; + } } return (error); } @@ -430,6 +449,7 @@ null_setattr(ap) return (EROFS); } } + return (null_bypass((struct vop_generic_args *)ap)); } @@ -454,6 +474,9 @@ null_getattr(ap) return (0); } +/* + * Handle to disallow write access if mounted read-only. + */ static int null_access(ap) struct vop_access_args /* { @@ -559,12 +582,62 @@ null_lock(ap) struct proc *a_p; } */ *ap; { + struct vnode *vp = ap->a_vp; + int flags = ap->a_flags; + struct proc *p = ap->a_p; + struct vnode *lvp; + int error; - vop_nolock(ap); - if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN) - return (0); - ap->a_flags &= ~LK_INTERLOCK; - return (null_bypass((struct vop_generic_args *)ap)); + if (flags & LK_THISLAYER) { + if (vp->v_vnlock != NULL) + return 0; /* lock is shared across layers */ + error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER, + &vp->v_interlock, p); + return (error); + } + + if (vp->v_vnlock != NULL) { + /* + * The lower level has exported a struct lock to us. Use + * it so that all vnodes in the stack lock and unlock + * simultaneously. Note: we don't DRAIN the lock as DRAIN + * decommissions the lock - just because our vnode is + * going away doesn't mean the struct lock below us is. + * LK_EXCLUSIVE is fine. + */ + if ((flags & LK_TYPE_MASK) == LK_DRAIN) { + NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n"); + return(lockmgr(vp->v_vnlock, + (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, + &vp->v_interlock, p)); + } + return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, p)); + } else { + /* + * To prevent race conditions involving doing a lookup + * on "..", we have to lock the lower node, then lock our + * node. Most of the time it won't matter that we lock our + * node (as any locking would need the lower one locked + * first). But we can LK_DRAIN the upper lock as a step + * towards decomissioning it. + */ + lvp = NULLVPTOLOWERVP(vp); + if (flags & LK_INTERLOCK) { + simple_unlock(&vp->v_interlock); + flags &= ~LK_INTERLOCK; + } + if ((flags & LK_TYPE_MASK) == LK_DRAIN) { + error = VOP_LOCK(lvp, + (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, p); + } else + error = VOP_LOCK(lvp, flags, p); + if (error) + return (error); + error = lockmgr(&vp->v_lock, flags, &vp->v_interlock, p); + if (error) + VOP_UNLOCK(lvp, 0, p); + return (error); + } } /* @@ -580,11 +653,46 @@ null_unlock(ap) struct proc *a_p; } */ *ap; { - vop_nounlock(ap); - ap->a_flags &= ~LK_INTERLOCK; - return (null_bypass((struct vop_generic_args *)ap)); + struct vnode *vp = ap->a_vp; + int flags = ap->a_flags; + struct proc *p = ap->a_p; + + if (vp->v_vnlock != NULL) { + if (flags & LK_THISLAYER) + return 0; /* the lock is shared across layers */ + flags &= ~LK_THISLAYER; + return (lockmgr(vp->v_vnlock, flags | LK_RELEASE, + &vp->v_interlock, p)); + } + if ((flags & LK_THISLAYER) == 0) { + if (flags & LK_INTERLOCK) + simple_unlock(&vp->v_interlock); + VOP_UNLOCK(NULLVPTOLOWERVP(vp), flags & ~LK_INTERLOCK, p); + } else + flags &= ~LK_THISLAYER; + return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, p)); +} + +static int +null_islocked(ap) + struct vop_islocked_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct proc *p = ap->a_p; + + if (vp->v_vnlock != NULL) + return (lockstatus(vp->v_vnlock, p)); + return (lockstatus(&vp->v_lock, p)); } +/* + * There is no way to tell that someone issued remove/rmdir operation + * on the underlying filesystem. For now we just have to release lowevrp + * as soon as possible. + */ static int null_inactive(ap) struct vop_inactive_args /* { @@ -593,27 +701,34 @@ null_inactive(ap) } */ *ap; { struct vnode *vp = ap->a_vp; + struct proc *p = ap->a_p; struct null_node *xp = VTONULL(vp); struct vnode *lowervp = xp->null_lowervp; + + lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, p); + LIST_REMOVE(xp, null_hash); + lockmgr(&null_hashlock, LK_RELEASE, NULL, p); + + xp->null_lowervp = NULLVP; + if (vp->v_vnlock != NULL) { + vp->v_vnlock = &vp->v_lock; /* we no longer share the lock */ + } else + VOP_UNLOCK(vp, LK_THISLAYER, p); + + vput(lowervp); /* - * Do nothing (and _don't_ bypass). - * Wait to vrele lowervp until reclaim, - * so that until then our null_node is in the - * cache and reusable. - * We still have to tell the lower layer the vnode - * is now inactive though. - * - * NEEDSWORK: Someday, consider inactive'ing - * the lowervp and then trying to reactivate it - * with capabilities (v_id) - * like they do in the name lookup cache code. - * That's too much work for now. + * Now it is safe to drop references to the lower vnode. + * VOP_INACTIVE() will be called by vrele() if necessary. */ - VOP_INACTIVE(lowervp, ap->a_p); - VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + vrele (lowervp); + return (0); } +/* + * We can free memory in null_inactive, but we do this + * here. (Possible to guard vp->v_data to point somewhere) + */ static int null_reclaim(ap) struct vop_reclaim_args /* { @@ -622,21 +737,11 @@ null_reclaim(ap) } */ *ap; { struct vnode *vp = ap->a_vp; - struct null_node *xp = VTONULL(vp); - struct vnode *lowervp = xp->null_lowervp; + void *vdata = vp->v_data; - /* - * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p, - * so we can't call VOPs on ourself. - */ - /* After this assignment, this node will not be re-used. */ - xp->null_lowervp = NULLVP; - lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, ap->a_p); - LIST_REMOVE(xp, null_hash); - lockmgr(&null_hashlock, LK_RELEASE, NULL, ap->a_p); - FREE(vp->v_data, M_TEMP); vp->v_data = NULL; - vrele (lowervp); + FREE(vdata, M_NULLFSNODE); + return (0); } @@ -652,16 +757,74 @@ null_print(ap) } /* + * Let an underlying filesystem do the work + */ +static int +null_createvobject(ap) + struct vop_createvobject_args /* { + struct vnode *vp; + struct ucred *cred; + struct proc *p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL; + int error; + + if (vp->v_type == VNON || lowervp == NULL) + return 0; + error = VOP_CREATEVOBJECT(lowervp, ap->a_cred, ap->a_p); + if (error) + return (error); + vp->v_flag |= VOBJBUF; + return (0); +} + +/* + * We have nothing to destroy and this operation shouldn't be bypassed. + */ +static int +null_destroyvobject(ap) + struct vop_destroyvobject_args /* { + struct vnode *vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + vp->v_flag &= ~VOBJBUF; + return (0); +} + +static int +null_getvobject(ap) + struct vop_getvobject_args /* { + struct vnode *vp; + struct vm_object **objpp; + } */ *ap; +{ + struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp); + + if (lvp == NULL) + return EINVAL; + return (VOP_GETVOBJECT(lvp, ap->a_objpp)); +} + +/* * Global vfs data structures */ vop_t **null_vnodeop_p; static struct vnodeopv_entry_desc null_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) null_bypass }, + { &vop_access_desc, (vop_t *) null_access }, { &vop_bmap_desc, (vop_t *) vop_eopnotsupp }, + { &vop_createvobject_desc, (vop_t *) null_createvobject }, + { &vop_destroyvobject_desc, (vop_t *) null_destroyvobject }, { &vop_getattr_desc, (vop_t *) null_getattr }, + { &vop_getvobject_desc, (vop_t *) null_getvobject }, { &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount}, { &vop_inactive_desc, (vop_t *) null_inactive }, + { &vop_islocked_desc, (vop_t *) null_islocked }, { &vop_lock_desc, (vop_t *) null_lock }, { &vop_lookup_desc, (vop_t *) null_lookup }, { &vop_open_desc, (vop_t *) null_open }, |