diff options
Diffstat (limited to 'sys/fs/nullfs/null_vnops.c')
-rw-r--r-- | sys/fs/nullfs/null_vnops.c | 293 |
1 files changed, 228 insertions, 65 deletions
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c index 5692df6..0b7cb96 100644 --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -176,6 +176,8 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/proc.h> #include <sys/sysctl.h> #include <sys/vnode.h> #include <sys/mount.h> @@ -183,13 +185,22 @@ #include <sys/malloc.h> #include <miscfs/nullfs/null.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vnode_pager.h> + static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, &null_bug_bypass, 0, ""); static int null_access(struct vop_access_args *ap); +static int null_createvobject(struct vop_createvobject_args *ap); +static int null_destroyvobject(struct vop_destroyvobject_args *ap); static int null_getattr(struct vop_getattr_args *ap); +static int null_getvobject(struct vop_getvobject_args *ap); static int null_inactive(struct vop_inactive_args *ap); +static int null_islocked(struct vop_islocked_args *ap); static int null_lock(struct vop_lock_args *ap); static int null_lookup(struct vop_lookup_args *ap); static int null_open(struct vop_open_args *ap); @@ -277,7 +288,7 @@ null_bypass(ap) * of vrele'ing their vp's. We must account for * that. (This should go away in the future.) */ - if (reles & 1) + if (reles & VDESC_VP0_WILLRELE) VREF(*this_vp_p); } @@ -287,7 +298,12 @@ null_bypass(ap) * Call the operation on the lower layer * with the modified argument structure. */ - error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); + if (vps_p[0] && *vps_p[0]) + error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); + else { + printf("null_bypass: no map for %s\n", descp->vdesc_name); + error = EINVAL; + } /* * Maintain the illusion of call-by-value @@ -300,7 +316,11 @@ null_bypass(ap) break; /* bail out at end of list */ if (old_vps[i]) { *(vps_p[i]) = old_vps[i]; - if (reles & 1) +#if 0 + if (reles & VDESC_VP0_WILLUNLOCK) + VOP_UNLOCK(*(vps_p[i]), LK_THISLAYER, curproc); +#endif + if (reles & VDESC_VP0_WILLRELE) vrele(*(vps_p[i])); } } @@ -345,44 +365,43 @@ null_lookup(ap) } */ *ap; { struct componentname *cnp = ap->a_cnp; + struct vnode *dvp = ap->a_dvp; struct proc *p = cnp->cn_proc; int flags = cnp->cn_flags; - struct vop_lock_args lockargs; - struct vop_unlock_args unlockargs; - struct vnode *dvp, *vp; + struct vnode *vp, *ldvp, *lvp; int error; - if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) && + if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); - error = null_bypass((struct vop_generic_args *)ap); + /* + * Although it is possible to call null_bypass(), we'll do + * a direct call to reduce overhead + */ + ldvp = NULLVPTOLOWERVP(dvp); + vp = lvp = NULL; + error = VOP_LOOKUP(ldvp, &lvp, cnp); if (error == EJUSTRETURN && (flags & ISLASTCN) && - (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) && + (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) error = EROFS; + /* - * We must do the same locking and unlocking at this layer as - * is done in the layers below us. We could figure this out - * based on the error return and the LASTCN, LOCKPARENT, and - * LOCKLEAF flags. However, it is more expidient to just find - * out the state of the lower level vnodes and set ours to the - * same state. + * Rely only on the PDIRUNLOCK flag which should be carefully + * tracked by underlying filesystem. */ - dvp = ap->a_dvp; - vp = *ap->a_vpp; - if (dvp == vp) - return (error); - if (!VOP_ISLOCKED(dvp, NULL)) { - unlockargs.a_vp = dvp; - unlockargs.a_flags = 0; - unlockargs.a_p = p; - vop_nounlock(&unlockargs); - } - if (vp != NULLVP && VOP_ISLOCKED(vp, NULL)) { - lockargs.a_vp = vp; - lockargs.a_flags = LK_SHARED; - lockargs.a_p = p; - vop_nolock(&lockargs); + if (cnp->cn_flags & PDIRUNLOCK) + VOP_UNLOCK(dvp, LK_THISLAYER, p); + if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) { + if (ldvp == lvp) { + *ap->a_vpp = dvp; + VREF(dvp); + vrele(lvp); + } else { + error = null_node_create(dvp->v_mount, lvp, &vp); + if (error == 0) + *ap->a_vpp = vp; + } } return (error); } @@ -430,6 +449,7 @@ null_setattr(ap) return (EROFS); } } + return (null_bypass((struct vop_generic_args *)ap)); } @@ -454,6 +474,9 @@ null_getattr(ap) return (0); } +/* + * Handle to disallow write access if mounted read-only. + */ static int null_access(ap) struct vop_access_args /* { @@ -559,12 +582,62 @@ null_lock(ap) struct proc *a_p; } */ *ap; { + struct vnode *vp = ap->a_vp; + int flags = ap->a_flags; + struct proc *p = ap->a_p; + struct vnode *lvp; + int error; - vop_nolock(ap); - if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN) - return (0); - ap->a_flags &= ~LK_INTERLOCK; - return (null_bypass((struct vop_generic_args *)ap)); + if (flags & LK_THISLAYER) { + if (vp->v_vnlock != NULL) + return 0; /* lock is shared across layers */ + error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER, + &vp->v_interlock, p); + return (error); + } + + if (vp->v_vnlock != NULL) { + /* + * The lower level has exported a struct lock to us. Use + * it so that all vnodes in the stack lock and unlock + * simultaneously. Note: we don't DRAIN the lock as DRAIN + * decommissions the lock - just because our vnode is + * going away doesn't mean the struct lock below us is. + * LK_EXCLUSIVE is fine. + */ + if ((flags & LK_TYPE_MASK) == LK_DRAIN) { + NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n"); + return(lockmgr(vp->v_vnlock, + (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, + &vp->v_interlock, p)); + } + return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, p)); + } else { + /* + * To prevent race conditions involving doing a lookup + * on "..", we have to lock the lower node, then lock our + * node. Most of the time it won't matter that we lock our + * node (as any locking would need the lower one locked + * first). But we can LK_DRAIN the upper lock as a step + * towards decomissioning it. + */ + lvp = NULLVPTOLOWERVP(vp); + if (flags & LK_INTERLOCK) { + simple_unlock(&vp->v_interlock); + flags &= ~LK_INTERLOCK; + } + if ((flags & LK_TYPE_MASK) == LK_DRAIN) { + error = VOP_LOCK(lvp, + (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, p); + } else + error = VOP_LOCK(lvp, flags, p); + if (error) + return (error); + error = lockmgr(&vp->v_lock, flags, &vp->v_interlock, p); + if (error) + VOP_UNLOCK(lvp, 0, p); + return (error); + } } /* @@ -580,11 +653,46 @@ null_unlock(ap) struct proc *a_p; } */ *ap; { - vop_nounlock(ap); - ap->a_flags &= ~LK_INTERLOCK; - return (null_bypass((struct vop_generic_args *)ap)); + struct vnode *vp = ap->a_vp; + int flags = ap->a_flags; + struct proc *p = ap->a_p; + + if (vp->v_vnlock != NULL) { + if (flags & LK_THISLAYER) + return 0; /* the lock is shared across layers */ + flags &= ~LK_THISLAYER; + return (lockmgr(vp->v_vnlock, flags | LK_RELEASE, + &vp->v_interlock, p)); + } + if ((flags & LK_THISLAYER) == 0) { + if (flags & LK_INTERLOCK) + simple_unlock(&vp->v_interlock); + VOP_UNLOCK(NULLVPTOLOWERVP(vp), flags & ~LK_INTERLOCK, p); + } else + flags &= ~LK_THISLAYER; + return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, p)); +} + +static int +null_islocked(ap) + struct vop_islocked_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct proc *p = ap->a_p; + + if (vp->v_vnlock != NULL) + return (lockstatus(vp->v_vnlock, p)); + return (lockstatus(&vp->v_lock, p)); } +/* + * There is no way to tell that someone issued remove/rmdir operation + * on the underlying filesystem. For now we just have to release lowevrp + * as soon as possible. + */ static int null_inactive(ap) struct vop_inactive_args /* { @@ -593,27 +701,34 @@ null_inactive(ap) } */ *ap; { struct vnode *vp = ap->a_vp; + struct proc *p = ap->a_p; struct null_node *xp = VTONULL(vp); struct vnode *lowervp = xp->null_lowervp; + + lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, p); + LIST_REMOVE(xp, null_hash); + lockmgr(&null_hashlock, LK_RELEASE, NULL, p); + + xp->null_lowervp = NULLVP; + if (vp->v_vnlock != NULL) { + vp->v_vnlock = &vp->v_lock; /* we no longer share the lock */ + } else + VOP_UNLOCK(vp, LK_THISLAYER, p); + + vput(lowervp); /* - * Do nothing (and _don't_ bypass). - * Wait to vrele lowervp until reclaim, - * so that until then our null_node is in the - * cache and reusable. - * We still have to tell the lower layer the vnode - * is now inactive though. - * - * NEEDSWORK: Someday, consider inactive'ing - * the lowervp and then trying to reactivate it - * with capabilities (v_id) - * like they do in the name lookup cache code. - * That's too much work for now. + * Now it is safe to drop references to the lower vnode. + * VOP_INACTIVE() will be called by vrele() if necessary. */ - VOP_INACTIVE(lowervp, ap->a_p); - VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + vrele (lowervp); + return (0); } +/* + * We can free memory in null_inactive, but we do this + * here. (Possible to guard vp->v_data to point somewhere) + */ static int null_reclaim(ap) struct vop_reclaim_args /* { @@ -622,21 +737,11 @@ null_reclaim(ap) } */ *ap; { struct vnode *vp = ap->a_vp; - struct null_node *xp = VTONULL(vp); - struct vnode *lowervp = xp->null_lowervp; + void *vdata = vp->v_data; - /* - * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p, - * so we can't call VOPs on ourself. - */ - /* After this assignment, this node will not be re-used. */ - xp->null_lowervp = NULLVP; - lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, ap->a_p); - LIST_REMOVE(xp, null_hash); - lockmgr(&null_hashlock, LK_RELEASE, NULL, ap->a_p); - FREE(vp->v_data, M_TEMP); vp->v_data = NULL; - vrele (lowervp); + FREE(vdata, M_NULLFSNODE); + return (0); } @@ -652,16 +757,74 @@ null_print(ap) } /* + * Let an underlying filesystem do the work + */ +static int +null_createvobject(ap) + struct vop_createvobject_args /* { + struct vnode *vp; + struct ucred *cred; + struct proc *p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL; + int error; + + if (vp->v_type == VNON || lowervp == NULL) + return 0; + error = VOP_CREATEVOBJECT(lowervp, ap->a_cred, ap->a_p); + if (error) + return (error); + vp->v_flag |= VOBJBUF; + return (0); +} + +/* + * We have nothing to destroy and this operation shouldn't be bypassed. + */ +static int +null_destroyvobject(ap) + struct vop_destroyvobject_args /* { + struct vnode *vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + vp->v_flag &= ~VOBJBUF; + return (0); +} + +static int +null_getvobject(ap) + struct vop_getvobject_args /* { + struct vnode *vp; + struct vm_object **objpp; + } */ *ap; +{ + struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp); + + if (lvp == NULL) + return EINVAL; + return (VOP_GETVOBJECT(lvp, ap->a_objpp)); +} + +/* * Global vfs data structures */ vop_t **null_vnodeop_p; static struct vnodeopv_entry_desc null_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) null_bypass }, + { &vop_access_desc, (vop_t *) null_access }, { &vop_bmap_desc, (vop_t *) vop_eopnotsupp }, + { &vop_createvobject_desc, (vop_t *) null_createvobject }, + { &vop_destroyvobject_desc, (vop_t *) null_destroyvobject }, { &vop_getattr_desc, (vop_t *) null_getattr }, + { &vop_getvobject_desc, (vop_t *) null_getvobject }, { &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount}, { &vop_inactive_desc, (vop_t *) null_inactive }, + { &vop_islocked_desc, (vop_t *) null_islocked }, { &vop_lock_desc, (vop_t *) null_lock }, { &vop_lookup_desc, (vop_t *) null_lookup }, { &vop_open_desc, (vop_t *) null_open }, |