summaryrefslogtreecommitdiffstats
path: root/sys/fs/nullfs/null_vnops.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/fs/nullfs/null_vnops.c')
-rw-r--r--sys/fs/nullfs/null_vnops.c293
1 files changed, 228 insertions, 65 deletions
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index 5692df6..0b7cb96 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -176,6 +176,8 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/mount.h>
@@ -183,13 +185,22 @@
#include <sys/malloc.h>
#include <miscfs/nullfs/null.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
&null_bug_bypass, 0, "");
static int null_access(struct vop_access_args *ap);
+static int null_createvobject(struct vop_createvobject_args *ap);
+static int null_destroyvobject(struct vop_destroyvobject_args *ap);
static int null_getattr(struct vop_getattr_args *ap);
+static int null_getvobject(struct vop_getvobject_args *ap);
static int null_inactive(struct vop_inactive_args *ap);
+static int null_islocked(struct vop_islocked_args *ap);
static int null_lock(struct vop_lock_args *ap);
static int null_lookup(struct vop_lookup_args *ap);
static int null_open(struct vop_open_args *ap);
@@ -277,7 +288,7 @@ null_bypass(ap)
* of vrele'ing their vp's. We must account for
* that. (This should go away in the future.)
*/
- if (reles & 1)
+ if (reles & VDESC_VP0_WILLRELE)
VREF(*this_vp_p);
}
@@ -287,7 +298,12 @@ null_bypass(ap)
* Call the operation on the lower layer
* with the modified argument structure.
*/
- error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+ if (vps_p[0] && *vps_p[0])
+ error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+ else {
+ printf("null_bypass: no map for %s\n", descp->vdesc_name);
+ error = EINVAL;
+ }
/*
* Maintain the illusion of call-by-value
@@ -300,7 +316,11 @@ null_bypass(ap)
break; /* bail out at end of list */
if (old_vps[i]) {
*(vps_p[i]) = old_vps[i];
- if (reles & 1)
+#if 0
+ if (reles & VDESC_VP0_WILLUNLOCK)
+ VOP_UNLOCK(*(vps_p[i]), LK_THISLAYER, curproc);
+#endif
+ if (reles & VDESC_VP0_WILLRELE)
vrele(*(vps_p[i]));
}
}
@@ -345,44 +365,43 @@ null_lookup(ap)
} */ *ap;
{
struct componentname *cnp = ap->a_cnp;
+ struct vnode *dvp = ap->a_dvp;
struct proc *p = cnp->cn_proc;
int flags = cnp->cn_flags;
- struct vop_lock_args lockargs;
- struct vop_unlock_args unlockargs;
- struct vnode *dvp, *vp;
+ struct vnode *vp, *ldvp, *lvp;
int error;
- if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+ if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
return (EROFS);
- error = null_bypass((struct vop_generic_args *)ap);
+ /*
+ * Although it is possible to call null_bypass(), we'll do
+ * a direct call to reduce overhead
+ */
+ ldvp = NULLVPTOLOWERVP(dvp);
+ vp = lvp = NULL;
+ error = VOP_LOOKUP(ldvp, &lvp, cnp);
if (error == EJUSTRETURN && (flags & ISLASTCN) &&
- (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+ (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
error = EROFS;
+
/*
- * We must do the same locking and unlocking at this layer as
- * is done in the layers below us. We could figure this out
- * based on the error return and the LASTCN, LOCKPARENT, and
- * LOCKLEAF flags. However, it is more expidient to just find
- * out the state of the lower level vnodes and set ours to the
- * same state.
+ * Rely only on the PDIRUNLOCK flag which should be carefully
+ * tracked by underlying filesystem.
*/
- dvp = ap->a_dvp;
- vp = *ap->a_vpp;
- if (dvp == vp)
- return (error);
- if (!VOP_ISLOCKED(dvp, NULL)) {
- unlockargs.a_vp = dvp;
- unlockargs.a_flags = 0;
- unlockargs.a_p = p;
- vop_nounlock(&unlockargs);
- }
- if (vp != NULLVP && VOP_ISLOCKED(vp, NULL)) {
- lockargs.a_vp = vp;
- lockargs.a_flags = LK_SHARED;
- lockargs.a_p = p;
- vop_nolock(&lockargs);
+ if (cnp->cn_flags & PDIRUNLOCK)
+ VOP_UNLOCK(dvp, LK_THISLAYER, p);
+ if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
+ if (ldvp == lvp) {
+ *ap->a_vpp = dvp;
+ VREF(dvp);
+ vrele(lvp);
+ } else {
+ error = null_node_create(dvp->v_mount, lvp, &vp);
+ if (error == 0)
+ *ap->a_vpp = vp;
+ }
}
return (error);
}
@@ -430,6 +449,7 @@ null_setattr(ap)
return (EROFS);
}
}
+
return (null_bypass((struct vop_generic_args *)ap));
}
@@ -454,6 +474,9 @@ null_getattr(ap)
return (0);
}
+/*
+ * Handle to disallow write access if mounted read-only.
+ */
static int
null_access(ap)
struct vop_access_args /* {
@@ -559,12 +582,62 @@ null_lock(ap)
struct proc *a_p;
} */ *ap;
{
+ struct vnode *vp = ap->a_vp;
+ int flags = ap->a_flags;
+ struct proc *p = ap->a_p;
+ struct vnode *lvp;
+ int error;
- vop_nolock(ap);
- if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN)
- return (0);
- ap->a_flags &= ~LK_INTERLOCK;
- return (null_bypass((struct vop_generic_args *)ap));
+ if (flags & LK_THISLAYER) {
+ if (vp->v_vnlock != NULL)
+ return 0; /* lock is shared across layers */
+ error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER,
+ &vp->v_interlock, p);
+ return (error);
+ }
+
+ if (vp->v_vnlock != NULL) {
+ /*
+ * The lower level has exported a struct lock to us. Use
+ * it so that all vnodes in the stack lock and unlock
+ * simultaneously. Note: we don't DRAIN the lock as DRAIN
+ * decommissions the lock - just because our vnode is
+ * going away doesn't mean the struct lock below us is.
+ * LK_EXCLUSIVE is fine.
+ */
+ if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
+ NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n");
+ return(lockmgr(vp->v_vnlock,
+ (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
+ &vp->v_interlock, p));
+ }
+ return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, p));
+ } else {
+ /*
+ * To prevent race conditions involving doing a lookup
+ * on "..", we have to lock the lower node, then lock our
+ * node. Most of the time it won't matter that we lock our
+ * node (as any locking would need the lower one locked
+ * first). But we can LK_DRAIN the upper lock as a step
+ * towards decomissioning it.
+ */
+ lvp = NULLVPTOLOWERVP(vp);
+ if (flags & LK_INTERLOCK) {
+ simple_unlock(&vp->v_interlock);
+ flags &= ~LK_INTERLOCK;
+ }
+ if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
+ error = VOP_LOCK(lvp,
+ (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, p);
+ } else
+ error = VOP_LOCK(lvp, flags, p);
+ if (error)
+ return (error);
+ error = lockmgr(&vp->v_lock, flags, &vp->v_interlock, p);
+ if (error)
+ VOP_UNLOCK(lvp, 0, p);
+ return (error);
+ }
}
/*
@@ -580,11 +653,46 @@ null_unlock(ap)
struct proc *a_p;
} */ *ap;
{
- vop_nounlock(ap);
- ap->a_flags &= ~LK_INTERLOCK;
- return (null_bypass((struct vop_generic_args *)ap));
+ struct vnode *vp = ap->a_vp;
+ int flags = ap->a_flags;
+ struct proc *p = ap->a_p;
+
+ if (vp->v_vnlock != NULL) {
+ if (flags & LK_THISLAYER)
+ return 0; /* the lock is shared across layers */
+ flags &= ~LK_THISLAYER;
+ return (lockmgr(vp->v_vnlock, flags | LK_RELEASE,
+ &vp->v_interlock, p));
+ }
+ if ((flags & LK_THISLAYER) == 0) {
+ if (flags & LK_INTERLOCK)
+ simple_unlock(&vp->v_interlock);
+ VOP_UNLOCK(NULLVPTOLOWERVP(vp), flags & ~LK_INTERLOCK, p);
+ } else
+ flags &= ~LK_THISLAYER;
+ return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, p));
+}
+
+static int
+null_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct proc *p = ap->a_p;
+
+ if (vp->v_vnlock != NULL)
+ return (lockstatus(vp->v_vnlock, p));
+ return (lockstatus(&vp->v_lock, p));
}
+/*
+ * There is no way to tell that someone issued remove/rmdir operation
+ * on the underlying filesystem. For now we just have to release lowevrp
+ * as soon as possible.
+ */
static int
null_inactive(ap)
struct vop_inactive_args /* {
@@ -593,27 +701,34 @@ null_inactive(ap)
} */ *ap;
{
struct vnode *vp = ap->a_vp;
+ struct proc *p = ap->a_p;
struct null_node *xp = VTONULL(vp);
struct vnode *lowervp = xp->null_lowervp;
+
+ lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, p);
+ LIST_REMOVE(xp, null_hash);
+ lockmgr(&null_hashlock, LK_RELEASE, NULL, p);
+
+ xp->null_lowervp = NULLVP;
+ if (vp->v_vnlock != NULL) {
+ vp->v_vnlock = &vp->v_lock; /* we no longer share the lock */
+ } else
+ VOP_UNLOCK(vp, LK_THISLAYER, p);
+
+ vput(lowervp);
/*
- * Do nothing (and _don't_ bypass).
- * Wait to vrele lowervp until reclaim,
- * so that until then our null_node is in the
- * cache and reusable.
- * We still have to tell the lower layer the vnode
- * is now inactive though.
- *
- * NEEDSWORK: Someday, consider inactive'ing
- * the lowervp and then trying to reactivate it
- * with capabilities (v_id)
- * like they do in the name lookup cache code.
- * That's too much work for now.
+ * Now it is safe to drop references to the lower vnode.
+ * VOP_INACTIVE() will be called by vrele() if necessary.
*/
- VOP_INACTIVE(lowervp, ap->a_p);
- VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+ vrele (lowervp);
+
return (0);
}
+/*
+ * We can free memory in null_inactive, but we do this
+ * here. (Possible to guard vp->v_data to point somewhere)
+ */
static int
null_reclaim(ap)
struct vop_reclaim_args /* {
@@ -622,21 +737,11 @@ null_reclaim(ap)
} */ *ap;
{
struct vnode *vp = ap->a_vp;
- struct null_node *xp = VTONULL(vp);
- struct vnode *lowervp = xp->null_lowervp;
+ void *vdata = vp->v_data;
- /*
- * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
- * so we can't call VOPs on ourself.
- */
- /* After this assignment, this node will not be re-used. */
- xp->null_lowervp = NULLVP;
- lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, ap->a_p);
- LIST_REMOVE(xp, null_hash);
- lockmgr(&null_hashlock, LK_RELEASE, NULL, ap->a_p);
- FREE(vp->v_data, M_TEMP);
vp->v_data = NULL;
- vrele (lowervp);
+ FREE(vdata, M_NULLFSNODE);
+
return (0);
}
@@ -652,16 +757,74 @@ null_print(ap)
}
/*
+ * Let an underlying filesystem do the work
+ */
+static int
+null_createvobject(ap)
+ struct vop_createvobject_args /* {
+ struct vnode *vp;
+ struct ucred *cred;
+ struct proc *p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL;
+ int error;
+
+ if (vp->v_type == VNON || lowervp == NULL)
+ return 0;
+ error = VOP_CREATEVOBJECT(lowervp, ap->a_cred, ap->a_p);
+ if (error)
+ return (error);
+ vp->v_flag |= VOBJBUF;
+ return (0);
+}
+
+/*
+ * We have nothing to destroy and this operation shouldn't be bypassed.
+ */
+static int
+null_destroyvobject(ap)
+ struct vop_destroyvobject_args /* {
+ struct vnode *vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ vp->v_flag &= ~VOBJBUF;
+ return (0);
+}
+
+static int
+null_getvobject(ap)
+ struct vop_getvobject_args /* {
+ struct vnode *vp;
+ struct vm_object **objpp;
+ } */ *ap;
+{
+ struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp);
+
+ if (lvp == NULL)
+ return EINVAL;
+ return (VOP_GETVOBJECT(lvp, ap->a_objpp));
+}
+
+/*
* Global vfs data structures
*/
vop_t **null_vnodeop_p;
static struct vnodeopv_entry_desc null_vnodeop_entries[] = {
{ &vop_default_desc, (vop_t *) null_bypass },
+
{ &vop_access_desc, (vop_t *) null_access },
{ &vop_bmap_desc, (vop_t *) vop_eopnotsupp },
+ { &vop_createvobject_desc, (vop_t *) null_createvobject },
+ { &vop_destroyvobject_desc, (vop_t *) null_destroyvobject },
{ &vop_getattr_desc, (vop_t *) null_getattr },
+ { &vop_getvobject_desc, (vop_t *) null_getvobject },
{ &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount},
{ &vop_inactive_desc, (vop_t *) null_inactive },
+ { &vop_islocked_desc, (vop_t *) null_islocked },
{ &vop_lock_desc, (vop_t *) null_lock },
{ &vop_lookup_desc, (vop_t *) null_lookup },
{ &vop_open_desc, (vop_t *) null_open },
OpenPOWER on IntegriCloud