summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/fs/nullfs/null_subr.c38
-rw-r--r--sys/fs/nullfs/null_vfsops.c53
-rw-r--r--sys/fs/nullfs/null_vnops.c293
-rw-r--r--sys/miscfs/nullfs/null_subr.c38
-rw-r--r--sys/miscfs/nullfs/null_vfsops.c53
-rw-r--r--sys/miscfs/nullfs/null_vnops.c293
6 files changed, 572 insertions, 196 deletions
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index 3671f0a..efb1357 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -99,6 +99,7 @@ nullfs_uninit(vfsp)
/*
* Return a VREF'ed alias for lower vnode if already exists, else 0.
+ * Lower vnode should be locked on entry and will be left locked on exit.
*/
static struct vnode *
null_node_find(mp, lowervp)
@@ -128,10 +129,15 @@ loop:
* stuff, but we don't want to lock
* the lower node.
*/
- if (vget(vp, 0, p)) {
+ if (vget(vp, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
printf ("null_node_find: vget failed.\n");
goto loop;
};
+ /*
+ * Now we got both vnodes locked, so release the
+ * lower one.
+ */
+ VOP_UNLOCK(lowervp, 0, p);
return (vp);
}
}
@@ -184,14 +190,30 @@ null_node_alloc(mp, lowervp, vpp)
*/
othervp = null_node_find(mp, lowervp);
if (othervp) {
+ vp->v_data = NULL;
FREE(xp, M_NULLFSNODE);
vp->v_type = VBAD; /* node is discarded */
- vp->v_usecount = 0; /* XXX */
+ vrele(vp);
*vpp = othervp;
return 0;
};
+
+ /*
+ * From NetBSD:
+ * Now lock the new node. We rely on the fact that we were passed
+ * a locked vnode. If the lower node is exporting a struct lock
+ * (v_vnlock != NULL) then we just set the upper v_vnlock to the
+ * lower one, and both are now locked. If the lower node is exporting
+ * NULL, then we copy that up and manually lock the new vnode.
+ */
+
lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, p);
- VREF(lowervp); /* Extra VREF will be vrele'd in null_node_create */
+ vp->v_vnlock = lowervp->v_vnlock;
+ error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_THISLAYER, p);
+ if (error)
+ panic("null_node_alloc: can't lock new vnode\n");
+
+ VREF(lowervp);
hd = NULL_NHASH(lowervp);
LIST_INSERT_HEAD(hd, xp, null_hash);
lockmgr(&null_hashlock, LK_RELEASE, NULL, p);
@@ -200,9 +222,9 @@ null_node_alloc(mp, lowervp, vpp)
/*
- * Try to find an existing null_node vnode refering
- * to it, otherwise make a new null_node vnode which
- * contains a reference to the lower vnode.
+ * Try to find an existing null_node vnode refering to the given underlying
+ * vnode (which should be locked). If no vnode found, create a new null_node
+ * vnode which contains a reference to the lower vnode.
*/
int
null_node_create(mp, lowervp, newvpp)
@@ -218,10 +240,10 @@ null_node_create(mp, lowervp, newvpp)
* null_node_find has taken another reference
* to the alias vnode.
*/
+ vrele(lowervp);
#ifdef NULLFS_DEBUG
vprint("null_node_create: exists", aliasvp);
#endif
- /* VREF(aliasvp); --- done in null_node_find */
} else {
int error;
@@ -242,8 +264,6 @@ null_node_create(mp, lowervp, newvpp)
*/
}
- vrele(lowervp);
-
#ifdef DIAGNOSTIC
if (lowervp->v_usecount < 1) {
/* Should never happen... */
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
index 5b49fbb..45f950e 100644
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -73,6 +73,8 @@ static int nullfs_sync(struct mount *mp, int waitfor,
static int nullfs_unmount(struct mount *mp, int mntflags, struct proc *p);
static int nullfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp);
static int nullfs_vptofh(struct vnode *vp, struct fid *fhp);
+static int nullfs_extattrctl(struct mount *mp, int cmd,
+ const char *attrname, caddr_t arg, struct proc *p);
/*
* Mount null layer
@@ -224,7 +226,7 @@ nullfs_unmount(mp, mntflags, p)
int mntflags;
struct proc *p;
{
- struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+ struct vnode *vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
void *mntdata;
int error;
int flags = 0;
@@ -234,39 +236,37 @@ nullfs_unmount(mp, mntflags, p)
if (mntflags & MNT_FORCE)
flags |= FORCECLOSE;
- /*
- * Clear out buffer cache. I don't think we
- * ever get anything cached at this level at the
- * moment, but who knows...
- */
-#if 0
- mntflushbuf(mp, 0);
- if (mntinvalbuf(mp, 1))
- return (EBUSY);
-#endif
- if (nullm_rootvp->v_usecount > 1)
+ error = VFS_ROOT(mp, &vp);
+ if (error)
+ return (error);
+ if (vp->v_usecount > 2) {
+ NULLFSDEBUG("nullfs_unmount: rootvp is busy(%d)\n",
+ vp->v_usecount);
+ vput(vp);
return (EBUSY);
- error = vflush(mp, nullm_rootvp, flags);
+ }
+ error = vflush(mp, vp, flags);
if (error)
return (error);
#ifdef NULLFS_DEBUG
- vprint("alias root of lower", nullm_rootvp);
+ vprint("alias root of lower", vp);
#endif
+ vput(vp);
/*
* Release reference on underlying root vnode
*/
- vrele(nullm_rootvp);
+ vrele(vp);
/*
* And blow it away for future re-use
*/
- vgone(nullm_rootvp);
+ vgone(vp);
/*
* Finally, throw away the null_mount structure
*/
mntdata = mp->mnt_data;
mp->mnt_data = 0;
- free(mntdata, M_NULLFSMNT); /* XXX */
+ free(mntdata, M_NULLFSMNT);
return 0;
}
@@ -287,13 +287,10 @@ nullfs_root(mp, vpp)
*/
vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
VREF(vp);
+
#ifdef NULLFS_DEBUG
if (VOP_ISLOCKED(vp, NULL)) {
- /*
- * XXX
- * Should we check type of node?
- */
- printf("nullfs_root: multi null mount?\n");
+ Debugger("root vnode is locked.\n");
vrele(vp);
return (EDEADLK);
}
@@ -370,8 +367,12 @@ nullfs_vget(mp, ino, vpp)
ino_t ino;
struct vnode **vpp;
{
+ int error;
+ error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
+ if (error)
+ return (error);
- return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
+ return (null_node_create(mp, *vpp, vpp));
}
static int
@@ -380,8 +381,12 @@ nullfs_fhtovp(mp, fidp, vpp)
struct fid *fidp;
struct vnode **vpp;
{
+ int error;
+ error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp);
+ if (error)
+ return (error);
- return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp);
+ return (null_node_create(mp, *vpp, vpp));
}
static int
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index 5692df6..0b7cb96 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -176,6 +176,8 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/mount.h>
@@ -183,13 +185,22 @@
#include <sys/malloc.h>
#include <miscfs/nullfs/null.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
&null_bug_bypass, 0, "");
static int null_access(struct vop_access_args *ap);
+static int null_createvobject(struct vop_createvobject_args *ap);
+static int null_destroyvobject(struct vop_destroyvobject_args *ap);
static int null_getattr(struct vop_getattr_args *ap);
+static int null_getvobject(struct vop_getvobject_args *ap);
static int null_inactive(struct vop_inactive_args *ap);
+static int null_islocked(struct vop_islocked_args *ap);
static int null_lock(struct vop_lock_args *ap);
static int null_lookup(struct vop_lookup_args *ap);
static int null_open(struct vop_open_args *ap);
@@ -277,7 +288,7 @@ null_bypass(ap)
* of vrele'ing their vp's. We must account for
* that. (This should go away in the future.)
*/
- if (reles & 1)
+ if (reles & VDESC_VP0_WILLRELE)
VREF(*this_vp_p);
}
@@ -287,7 +298,12 @@ null_bypass(ap)
* Call the operation on the lower layer
* with the modified argument structure.
*/
- error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+ if (vps_p[0] && *vps_p[0])
+ error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+ else {
+ printf("null_bypass: no map for %s\n", descp->vdesc_name);
+ error = EINVAL;
+ }
/*
* Maintain the illusion of call-by-value
@@ -300,7 +316,11 @@ null_bypass(ap)
break; /* bail out at end of list */
if (old_vps[i]) {
*(vps_p[i]) = old_vps[i];
- if (reles & 1)
+#if 0
+ if (reles & VDESC_VP0_WILLUNLOCK)
+ VOP_UNLOCK(*(vps_p[i]), LK_THISLAYER, curproc);
+#endif
+ if (reles & VDESC_VP0_WILLRELE)
vrele(*(vps_p[i]));
}
}
@@ -345,44 +365,43 @@ null_lookup(ap)
} */ *ap;
{
struct componentname *cnp = ap->a_cnp;
+ struct vnode *dvp = ap->a_dvp;
struct proc *p = cnp->cn_proc;
int flags = cnp->cn_flags;
- struct vop_lock_args lockargs;
- struct vop_unlock_args unlockargs;
- struct vnode *dvp, *vp;
+ struct vnode *vp, *ldvp, *lvp;
int error;
- if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+ if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
return (EROFS);
- error = null_bypass((struct vop_generic_args *)ap);
+ /*
+ * Although it is possible to call null_bypass(), we'll do
+ * a direct call to reduce overhead
+ */
+ ldvp = NULLVPTOLOWERVP(dvp);
+ vp = lvp = NULL;
+ error = VOP_LOOKUP(ldvp, &lvp, cnp);
if (error == EJUSTRETURN && (flags & ISLASTCN) &&
- (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+ (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
error = EROFS;
+
/*
- * We must do the same locking and unlocking at this layer as
- * is done in the layers below us. We could figure this out
- * based on the error return and the LASTCN, LOCKPARENT, and
- * LOCKLEAF flags. However, it is more expidient to just find
- * out the state of the lower level vnodes and set ours to the
- * same state.
+ * Rely only on the PDIRUNLOCK flag which should be carefully
+ * tracked by underlying filesystem.
*/
- dvp = ap->a_dvp;
- vp = *ap->a_vpp;
- if (dvp == vp)
- return (error);
- if (!VOP_ISLOCKED(dvp, NULL)) {
- unlockargs.a_vp = dvp;
- unlockargs.a_flags = 0;
- unlockargs.a_p = p;
- vop_nounlock(&unlockargs);
- }
- if (vp != NULLVP && VOP_ISLOCKED(vp, NULL)) {
- lockargs.a_vp = vp;
- lockargs.a_flags = LK_SHARED;
- lockargs.a_p = p;
- vop_nolock(&lockargs);
+ if (cnp->cn_flags & PDIRUNLOCK)
+ VOP_UNLOCK(dvp, LK_THISLAYER, p);
+ if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
+ if (ldvp == lvp) {
+ *ap->a_vpp = dvp;
+ VREF(dvp);
+ vrele(lvp);
+ } else {
+ error = null_node_create(dvp->v_mount, lvp, &vp);
+ if (error == 0)
+ *ap->a_vpp = vp;
+ }
}
return (error);
}
@@ -430,6 +449,7 @@ null_setattr(ap)
return (EROFS);
}
}
+
return (null_bypass((struct vop_generic_args *)ap));
}
@@ -454,6 +474,9 @@ null_getattr(ap)
return (0);
}
+/*
+ * Handle to disallow write access if mounted read-only.
+ */
static int
null_access(ap)
struct vop_access_args /* {
@@ -559,12 +582,62 @@ null_lock(ap)
struct proc *a_p;
} */ *ap;
{
+ struct vnode *vp = ap->a_vp;
+ int flags = ap->a_flags;
+ struct proc *p = ap->a_p;
+ struct vnode *lvp;
+ int error;
- vop_nolock(ap);
- if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN)
- return (0);
- ap->a_flags &= ~LK_INTERLOCK;
- return (null_bypass((struct vop_generic_args *)ap));
+ if (flags & LK_THISLAYER) {
+ if (vp->v_vnlock != NULL)
+ return 0; /* lock is shared across layers */
+ error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER,
+ &vp->v_interlock, p);
+ return (error);
+ }
+
+ if (vp->v_vnlock != NULL) {
+ /*
+ * The lower level has exported a struct lock to us. Use
+ * it so that all vnodes in the stack lock and unlock
+ * simultaneously. Note: we don't DRAIN the lock as DRAIN
+ * decommissions the lock - just because our vnode is
+ * going away doesn't mean the struct lock below us is.
+ * LK_EXCLUSIVE is fine.
+ */
+ if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
+ NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n");
+ return(lockmgr(vp->v_vnlock,
+ (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
+ &vp->v_interlock, p));
+ }
+ return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, p));
+ } else {
+ /*
+ * To prevent race conditions involving doing a lookup
+ * on "..", we have to lock the lower node, then lock our
+ * node. Most of the time it won't matter that we lock our
+ * node (as any locking would need the lower one locked
+ * first). But we can LK_DRAIN the upper lock as a step
+ * towards decomissioning it.
+ */
+ lvp = NULLVPTOLOWERVP(vp);
+ if (flags & LK_INTERLOCK) {
+ simple_unlock(&vp->v_interlock);
+ flags &= ~LK_INTERLOCK;
+ }
+ if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
+ error = VOP_LOCK(lvp,
+ (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, p);
+ } else
+ error = VOP_LOCK(lvp, flags, p);
+ if (error)
+ return (error);
+ error = lockmgr(&vp->v_lock, flags, &vp->v_interlock, p);
+ if (error)
+ VOP_UNLOCK(lvp, 0, p);
+ return (error);
+ }
}
/*
@@ -580,11 +653,46 @@ null_unlock(ap)
struct proc *a_p;
} */ *ap;
{
- vop_nounlock(ap);
- ap->a_flags &= ~LK_INTERLOCK;
- return (null_bypass((struct vop_generic_args *)ap));
+ struct vnode *vp = ap->a_vp;
+ int flags = ap->a_flags;
+ struct proc *p = ap->a_p;
+
+ if (vp->v_vnlock != NULL) {
+ if (flags & LK_THISLAYER)
+ return 0; /* the lock is shared across layers */
+ flags &= ~LK_THISLAYER;
+ return (lockmgr(vp->v_vnlock, flags | LK_RELEASE,
+ &vp->v_interlock, p));
+ }
+ if ((flags & LK_THISLAYER) == 0) {
+ if (flags & LK_INTERLOCK)
+ simple_unlock(&vp->v_interlock);
+ VOP_UNLOCK(NULLVPTOLOWERVP(vp), flags & ~LK_INTERLOCK, p);
+ } else
+ flags &= ~LK_THISLAYER;
+ return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, p));
+}
+
+static int
+null_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct proc *p = ap->a_p;
+
+ if (vp->v_vnlock != NULL)
+ return (lockstatus(vp->v_vnlock, p));
+ return (lockstatus(&vp->v_lock, p));
}
+/*
+ * There is no way to tell that someone issued remove/rmdir operation
+ * on the underlying filesystem. For now we just have to release lowevrp
+ * as soon as possible.
+ */
static int
null_inactive(ap)
struct vop_inactive_args /* {
@@ -593,27 +701,34 @@ null_inactive(ap)
} */ *ap;
{
struct vnode *vp = ap->a_vp;
+ struct proc *p = ap->a_p;
struct null_node *xp = VTONULL(vp);
struct vnode *lowervp = xp->null_lowervp;
+
+ lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, p);
+ LIST_REMOVE(xp, null_hash);
+ lockmgr(&null_hashlock, LK_RELEASE, NULL, p);
+
+ xp->null_lowervp = NULLVP;
+ if (vp->v_vnlock != NULL) {
+ vp->v_vnlock = &vp->v_lock; /* we no longer share the lock */
+ } else
+ VOP_UNLOCK(vp, LK_THISLAYER, p);
+
+ vput(lowervp);
/*
- * Do nothing (and _don't_ bypass).
- * Wait to vrele lowervp until reclaim,
- * so that until then our null_node is in the
- * cache and reusable.
- * We still have to tell the lower layer the vnode
- * is now inactive though.
- *
- * NEEDSWORK: Someday, consider inactive'ing
- * the lowervp and then trying to reactivate it
- * with capabilities (v_id)
- * like they do in the name lookup cache code.
- * That's too much work for now.
+ * Now it is safe to drop references to the lower vnode.
+ * VOP_INACTIVE() will be called by vrele() if necessary.
*/
- VOP_INACTIVE(lowervp, ap->a_p);
- VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+ vrele (lowervp);
+
return (0);
}
+/*
+ * We can free memory in null_inactive, but we do this
+ * here. (Possible to guard vp->v_data to point somewhere)
+ */
static int
null_reclaim(ap)
struct vop_reclaim_args /* {
@@ -622,21 +737,11 @@ null_reclaim(ap)
} */ *ap;
{
struct vnode *vp = ap->a_vp;
- struct null_node *xp = VTONULL(vp);
- struct vnode *lowervp = xp->null_lowervp;
+ void *vdata = vp->v_data;
- /*
- * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
- * so we can't call VOPs on ourself.
- */
- /* After this assignment, this node will not be re-used. */
- xp->null_lowervp = NULLVP;
- lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, ap->a_p);
- LIST_REMOVE(xp, null_hash);
- lockmgr(&null_hashlock, LK_RELEASE, NULL, ap->a_p);
- FREE(vp->v_data, M_TEMP);
vp->v_data = NULL;
- vrele (lowervp);
+ FREE(vdata, M_NULLFSNODE);
+
return (0);
}
@@ -652,16 +757,74 @@ null_print(ap)
}
/*
+ * Let an underlying filesystem do the work
+ */
+static int
+null_createvobject(ap)
+ struct vop_createvobject_args /* {
+ struct vnode *vp;
+ struct ucred *cred;
+ struct proc *p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL;
+ int error;
+
+ if (vp->v_type == VNON || lowervp == NULL)
+ return 0;
+ error = VOP_CREATEVOBJECT(lowervp, ap->a_cred, ap->a_p);
+ if (error)
+ return (error);
+ vp->v_flag |= VOBJBUF;
+ return (0);
+}
+
+/*
+ * We have nothing to destroy and this operation shouldn't be bypassed.
+ */
+static int
+null_destroyvobject(ap)
+ struct vop_destroyvobject_args /* {
+ struct vnode *vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ vp->v_flag &= ~VOBJBUF;
+ return (0);
+}
+
+static int
+null_getvobject(ap)
+ struct vop_getvobject_args /* {
+ struct vnode *vp;
+ struct vm_object **objpp;
+ } */ *ap;
+{
+ struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp);
+
+ if (lvp == NULL)
+ return EINVAL;
+ return (VOP_GETVOBJECT(lvp, ap->a_objpp));
+}
+
+/*
* Global vfs data structures
*/
vop_t **null_vnodeop_p;
static struct vnodeopv_entry_desc null_vnodeop_entries[] = {
{ &vop_default_desc, (vop_t *) null_bypass },
+
{ &vop_access_desc, (vop_t *) null_access },
{ &vop_bmap_desc, (vop_t *) vop_eopnotsupp },
+ { &vop_createvobject_desc, (vop_t *) null_createvobject },
+ { &vop_destroyvobject_desc, (vop_t *) null_destroyvobject },
{ &vop_getattr_desc, (vop_t *) null_getattr },
+ { &vop_getvobject_desc, (vop_t *) null_getvobject },
{ &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount},
{ &vop_inactive_desc, (vop_t *) null_inactive },
+ { &vop_islocked_desc, (vop_t *) null_islocked },
{ &vop_lock_desc, (vop_t *) null_lock },
{ &vop_lookup_desc, (vop_t *) null_lookup },
{ &vop_open_desc, (vop_t *) null_open },
diff --git a/sys/miscfs/nullfs/null_subr.c b/sys/miscfs/nullfs/null_subr.c
index 3671f0a..efb1357 100644
--- a/sys/miscfs/nullfs/null_subr.c
+++ b/sys/miscfs/nullfs/null_subr.c
@@ -99,6 +99,7 @@ nullfs_uninit(vfsp)
/*
* Return a VREF'ed alias for lower vnode if already exists, else 0.
+ * Lower vnode should be locked on entry and will be left locked on exit.
*/
static struct vnode *
null_node_find(mp, lowervp)
@@ -128,10 +129,15 @@ loop:
* stuff, but we don't want to lock
* the lower node.
*/
- if (vget(vp, 0, p)) {
+ if (vget(vp, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
printf ("null_node_find: vget failed.\n");
goto loop;
};
+ /*
+ * Now we got both vnodes locked, so release the
+ * lower one.
+ */
+ VOP_UNLOCK(lowervp, 0, p);
return (vp);
}
}
@@ -184,14 +190,30 @@ null_node_alloc(mp, lowervp, vpp)
*/
othervp = null_node_find(mp, lowervp);
if (othervp) {
+ vp->v_data = NULL;
FREE(xp, M_NULLFSNODE);
vp->v_type = VBAD; /* node is discarded */
- vp->v_usecount = 0; /* XXX */
+ vrele(vp);
*vpp = othervp;
return 0;
};
+
+ /*
+ * From NetBSD:
+ * Now lock the new node. We rely on the fact that we were passed
+ * a locked vnode. If the lower node is exporting a struct lock
+ * (v_vnlock != NULL) then we just set the upper v_vnlock to the
+ * lower one, and both are now locked. If the lower node is exporting
+ * NULL, then we copy that up and manually lock the new vnode.
+ */
+
lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, p);
- VREF(lowervp); /* Extra VREF will be vrele'd in null_node_create */
+ vp->v_vnlock = lowervp->v_vnlock;
+ error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_THISLAYER, p);
+ if (error)
+ panic("null_node_alloc: can't lock new vnode\n");
+
+ VREF(lowervp);
hd = NULL_NHASH(lowervp);
LIST_INSERT_HEAD(hd, xp, null_hash);
lockmgr(&null_hashlock, LK_RELEASE, NULL, p);
@@ -200,9 +222,9 @@ null_node_alloc(mp, lowervp, vpp)
/*
- * Try to find an existing null_node vnode refering
- * to it, otherwise make a new null_node vnode which
- * contains a reference to the lower vnode.
+ * Try to find an existing null_node vnode refering to the given underlying
+ * vnode (which should be locked). If no vnode found, create a new null_node
+ * vnode which contains a reference to the lower vnode.
*/
int
null_node_create(mp, lowervp, newvpp)
@@ -218,10 +240,10 @@ null_node_create(mp, lowervp, newvpp)
* null_node_find has taken another reference
* to the alias vnode.
*/
+ vrele(lowervp);
#ifdef NULLFS_DEBUG
vprint("null_node_create: exists", aliasvp);
#endif
- /* VREF(aliasvp); --- done in null_node_find */
} else {
int error;
@@ -242,8 +264,6 @@ null_node_create(mp, lowervp, newvpp)
*/
}
- vrele(lowervp);
-
#ifdef DIAGNOSTIC
if (lowervp->v_usecount < 1) {
/* Should never happen... */
diff --git a/sys/miscfs/nullfs/null_vfsops.c b/sys/miscfs/nullfs/null_vfsops.c
index 5b49fbb..45f950e 100644
--- a/sys/miscfs/nullfs/null_vfsops.c
+++ b/sys/miscfs/nullfs/null_vfsops.c
@@ -73,6 +73,8 @@ static int nullfs_sync(struct mount *mp, int waitfor,
static int nullfs_unmount(struct mount *mp, int mntflags, struct proc *p);
static int nullfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp);
static int nullfs_vptofh(struct vnode *vp, struct fid *fhp);
+static int nullfs_extattrctl(struct mount *mp, int cmd,
+ const char *attrname, caddr_t arg, struct proc *p);
/*
* Mount null layer
@@ -224,7 +226,7 @@ nullfs_unmount(mp, mntflags, p)
int mntflags;
struct proc *p;
{
- struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+ struct vnode *vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
void *mntdata;
int error;
int flags = 0;
@@ -234,39 +236,37 @@ nullfs_unmount(mp, mntflags, p)
if (mntflags & MNT_FORCE)
flags |= FORCECLOSE;
- /*
- * Clear out buffer cache. I don't think we
- * ever get anything cached at this level at the
- * moment, but who knows...
- */
-#if 0
- mntflushbuf(mp, 0);
- if (mntinvalbuf(mp, 1))
- return (EBUSY);
-#endif
- if (nullm_rootvp->v_usecount > 1)
+ error = VFS_ROOT(mp, &vp);
+ if (error)
+ return (error);
+ if (vp->v_usecount > 2) {
+ NULLFSDEBUG("nullfs_unmount: rootvp is busy(%d)\n",
+ vp->v_usecount);
+ vput(vp);
return (EBUSY);
- error = vflush(mp, nullm_rootvp, flags);
+ }
+ error = vflush(mp, vp, flags);
if (error)
return (error);
#ifdef NULLFS_DEBUG
- vprint("alias root of lower", nullm_rootvp);
+ vprint("alias root of lower", vp);
#endif
+ vput(vp);
/*
* Release reference on underlying root vnode
*/
- vrele(nullm_rootvp);
+ vrele(vp);
/*
* And blow it away for future re-use
*/
- vgone(nullm_rootvp);
+ vgone(vp);
/*
* Finally, throw away the null_mount structure
*/
mntdata = mp->mnt_data;
mp->mnt_data = 0;
- free(mntdata, M_NULLFSMNT); /* XXX */
+ free(mntdata, M_NULLFSMNT);
return 0;
}
@@ -287,13 +287,10 @@ nullfs_root(mp, vpp)
*/
vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
VREF(vp);
+
#ifdef NULLFS_DEBUG
if (VOP_ISLOCKED(vp, NULL)) {
- /*
- * XXX
- * Should we check type of node?
- */
- printf("nullfs_root: multi null mount?\n");
+ Debugger("root vnode is locked.\n");
vrele(vp);
return (EDEADLK);
}
@@ -370,8 +367,12 @@ nullfs_vget(mp, ino, vpp)
ino_t ino;
struct vnode **vpp;
{
+ int error;
+ error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
+ if (error)
+ return (error);
- return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
+ return (null_node_create(mp, *vpp, vpp));
}
static int
@@ -380,8 +381,12 @@ nullfs_fhtovp(mp, fidp, vpp)
struct fid *fidp;
struct vnode **vpp;
{
+ int error;
+ error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp);
+ if (error)
+ return (error);
- return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp);
+ return (null_node_create(mp, *vpp, vpp));
}
static int
diff --git a/sys/miscfs/nullfs/null_vnops.c b/sys/miscfs/nullfs/null_vnops.c
index 5692df6..0b7cb96 100644
--- a/sys/miscfs/nullfs/null_vnops.c
+++ b/sys/miscfs/nullfs/null_vnops.c
@@ -176,6 +176,8 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/mount.h>
@@ -183,13 +185,22 @@
#include <sys/malloc.h>
#include <miscfs/nullfs/null.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
&null_bug_bypass, 0, "");
static int null_access(struct vop_access_args *ap);
+static int null_createvobject(struct vop_createvobject_args *ap);
+static int null_destroyvobject(struct vop_destroyvobject_args *ap);
static int null_getattr(struct vop_getattr_args *ap);
+static int null_getvobject(struct vop_getvobject_args *ap);
static int null_inactive(struct vop_inactive_args *ap);
+static int null_islocked(struct vop_islocked_args *ap);
static int null_lock(struct vop_lock_args *ap);
static int null_lookup(struct vop_lookup_args *ap);
static int null_open(struct vop_open_args *ap);
@@ -277,7 +288,7 @@ null_bypass(ap)
* of vrele'ing their vp's. We must account for
* that. (This should go away in the future.)
*/
- if (reles & 1)
+ if (reles & VDESC_VP0_WILLRELE)
VREF(*this_vp_p);
}
@@ -287,7 +298,12 @@ null_bypass(ap)
* Call the operation on the lower layer
* with the modified argument structure.
*/
- error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+ if (vps_p[0] && *vps_p[0])
+ error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+ else {
+ printf("null_bypass: no map for %s\n", descp->vdesc_name);
+ error = EINVAL;
+ }
/*
* Maintain the illusion of call-by-value
@@ -300,7 +316,11 @@ null_bypass(ap)
break; /* bail out at end of list */
if (old_vps[i]) {
*(vps_p[i]) = old_vps[i];
- if (reles & 1)
+#if 0
+ if (reles & VDESC_VP0_WILLUNLOCK)
+ VOP_UNLOCK(*(vps_p[i]), LK_THISLAYER, curproc);
+#endif
+ if (reles & VDESC_VP0_WILLRELE)
vrele(*(vps_p[i]));
}
}
@@ -345,44 +365,43 @@ null_lookup(ap)
} */ *ap;
{
struct componentname *cnp = ap->a_cnp;
+ struct vnode *dvp = ap->a_dvp;
struct proc *p = cnp->cn_proc;
int flags = cnp->cn_flags;
- struct vop_lock_args lockargs;
- struct vop_unlock_args unlockargs;
- struct vnode *dvp, *vp;
+ struct vnode *vp, *ldvp, *lvp;
int error;
- if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+ if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
return (EROFS);
- error = null_bypass((struct vop_generic_args *)ap);
+ /*
+ * Although it is possible to call null_bypass(), we'll do
+ * a direct call to reduce overhead
+ */
+ ldvp = NULLVPTOLOWERVP(dvp);
+ vp = lvp = NULL;
+ error = VOP_LOOKUP(ldvp, &lvp, cnp);
if (error == EJUSTRETURN && (flags & ISLASTCN) &&
- (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+ (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
error = EROFS;
+
/*
- * We must do the same locking and unlocking at this layer as
- * is done in the layers below us. We could figure this out
- * based on the error return and the LASTCN, LOCKPARENT, and
- * LOCKLEAF flags. However, it is more expidient to just find
- * out the state of the lower level vnodes and set ours to the
- * same state.
+ * Rely only on the PDIRUNLOCK flag which should be carefully
+ * tracked by underlying filesystem.
*/
- dvp = ap->a_dvp;
- vp = *ap->a_vpp;
- if (dvp == vp)
- return (error);
- if (!VOP_ISLOCKED(dvp, NULL)) {
- unlockargs.a_vp = dvp;
- unlockargs.a_flags = 0;
- unlockargs.a_p = p;
- vop_nounlock(&unlockargs);
- }
- if (vp != NULLVP && VOP_ISLOCKED(vp, NULL)) {
- lockargs.a_vp = vp;
- lockargs.a_flags = LK_SHARED;
- lockargs.a_p = p;
- vop_nolock(&lockargs);
+ if (cnp->cn_flags & PDIRUNLOCK)
+ VOP_UNLOCK(dvp, LK_THISLAYER, p);
+ if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
+ if (ldvp == lvp) {
+ *ap->a_vpp = dvp;
+ VREF(dvp);
+ vrele(lvp);
+ } else {
+ error = null_node_create(dvp->v_mount, lvp, &vp);
+ if (error == 0)
+ *ap->a_vpp = vp;
+ }
}
return (error);
}
@@ -430,6 +449,7 @@ null_setattr(ap)
return (EROFS);
}
}
+
return (null_bypass((struct vop_generic_args *)ap));
}
@@ -454,6 +474,9 @@ null_getattr(ap)
return (0);
}
+/*
+ * Handle to disallow write access if mounted read-only.
+ */
static int
null_access(ap)
struct vop_access_args /* {
@@ -559,12 +582,62 @@ null_lock(ap)
struct proc *a_p;
} */ *ap;
{
+ struct vnode *vp = ap->a_vp;
+ int flags = ap->a_flags;
+ struct proc *p = ap->a_p;
+ struct vnode *lvp;
+ int error;
- vop_nolock(ap);
- if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN)
- return (0);
- ap->a_flags &= ~LK_INTERLOCK;
- return (null_bypass((struct vop_generic_args *)ap));
+ if (flags & LK_THISLAYER) {
+ if (vp->v_vnlock != NULL)
+ return 0; /* lock is shared across layers */
+ error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER,
+ &vp->v_interlock, p);
+ return (error);
+ }
+
+ if (vp->v_vnlock != NULL) {
+ /*
+ * The lower level has exported a struct lock to us. Use
+ * it so that all vnodes in the stack lock and unlock
+ * simultaneously. Note: we don't DRAIN the lock as DRAIN
+ * decommissions the lock - just because our vnode is
+ * going away doesn't mean the struct lock below us is.
+ * LK_EXCLUSIVE is fine.
+ */
+ if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
+ NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n");
+ return(lockmgr(vp->v_vnlock,
+ (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
+ &vp->v_interlock, p));
+ }
+ return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, p));
+ } else {
+ /*
+ * To prevent race conditions involving doing a lookup
+ * on "..", we have to lock the lower node, then lock our
+ * node. Most of the time it won't matter that we lock our
+ * node (as any locking would need the lower one locked
+ * first). But we can LK_DRAIN the upper lock as a step
+ * towards decomissioning it.
+ */
+ lvp = NULLVPTOLOWERVP(vp);
+ if (flags & LK_INTERLOCK) {
+ simple_unlock(&vp->v_interlock);
+ flags &= ~LK_INTERLOCK;
+ }
+ if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
+ error = VOP_LOCK(lvp,
+ (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, p);
+ } else
+ error = VOP_LOCK(lvp, flags, p);
+ if (error)
+ return (error);
+ error = lockmgr(&vp->v_lock, flags, &vp->v_interlock, p);
+ if (error)
+ VOP_UNLOCK(lvp, 0, p);
+ return (error);
+ }
}
/*
@@ -580,11 +653,46 @@ null_unlock(ap)
struct proc *a_p;
} */ *ap;
{
- vop_nounlock(ap);
- ap->a_flags &= ~LK_INTERLOCK;
- return (null_bypass((struct vop_generic_args *)ap));
+ struct vnode *vp = ap->a_vp;
+ int flags = ap->a_flags;
+ struct proc *p = ap->a_p;
+
+ if (vp->v_vnlock != NULL) {
+ if (flags & LK_THISLAYER)
+ return 0; /* the lock is shared across layers */
+ flags &= ~LK_THISLAYER;
+ return (lockmgr(vp->v_vnlock, flags | LK_RELEASE,
+ &vp->v_interlock, p));
+ }
+ if ((flags & LK_THISLAYER) == 0) {
+ if (flags & LK_INTERLOCK)
+ simple_unlock(&vp->v_interlock);
+ VOP_UNLOCK(NULLVPTOLOWERVP(vp), flags & ~LK_INTERLOCK, p);
+ } else
+ flags &= ~LK_THISLAYER;
+ return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, p));
+}
+
+static int
+null_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct proc *p = ap->a_p;
+
+ if (vp->v_vnlock != NULL)
+ return (lockstatus(vp->v_vnlock, p));
+ return (lockstatus(&vp->v_lock, p));
}
+/*
+ * There is no way to tell that someone issued remove/rmdir operation
+ * on the underlying filesystem. For now we just have to release lowevrp
+ * as soon as possible.
+ */
static int
null_inactive(ap)
struct vop_inactive_args /* {
@@ -593,27 +701,34 @@ null_inactive(ap)
} */ *ap;
{
struct vnode *vp = ap->a_vp;
+ struct proc *p = ap->a_p;
struct null_node *xp = VTONULL(vp);
struct vnode *lowervp = xp->null_lowervp;
+
+ lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, p);
+ LIST_REMOVE(xp, null_hash);
+ lockmgr(&null_hashlock, LK_RELEASE, NULL, p);
+
+ xp->null_lowervp = NULLVP;
+ if (vp->v_vnlock != NULL) {
+ vp->v_vnlock = &vp->v_lock; /* we no longer share the lock */
+ } else
+ VOP_UNLOCK(vp, LK_THISLAYER, p);
+
+ vput(lowervp);
/*
- * Do nothing (and _don't_ bypass).
- * Wait to vrele lowervp until reclaim,
- * so that until then our null_node is in the
- * cache and reusable.
- * We still have to tell the lower layer the vnode
- * is now inactive though.
- *
- * NEEDSWORK: Someday, consider inactive'ing
- * the lowervp and then trying to reactivate it
- * with capabilities (v_id)
- * like they do in the name lookup cache code.
- * That's too much work for now.
+ * Now it is safe to drop references to the lower vnode.
+ * VOP_INACTIVE() will be called by vrele() if necessary.
*/
- VOP_INACTIVE(lowervp, ap->a_p);
- VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+ vrele (lowervp);
+
return (0);
}
+/*
+ * We can free memory in null_inactive, but we do this
+ * here. (Possible to guard vp->v_data to point somewhere)
+ */
static int
null_reclaim(ap)
struct vop_reclaim_args /* {
@@ -622,21 +737,11 @@ null_reclaim(ap)
} */ *ap;
{
struct vnode *vp = ap->a_vp;
- struct null_node *xp = VTONULL(vp);
- struct vnode *lowervp = xp->null_lowervp;
+ void *vdata = vp->v_data;
- /*
- * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
- * so we can't call VOPs on ourself.
- */
- /* After this assignment, this node will not be re-used. */
- xp->null_lowervp = NULLVP;
- lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, ap->a_p);
- LIST_REMOVE(xp, null_hash);
- lockmgr(&null_hashlock, LK_RELEASE, NULL, ap->a_p);
- FREE(vp->v_data, M_TEMP);
vp->v_data = NULL;
- vrele (lowervp);
+ FREE(vdata, M_NULLFSNODE);
+
return (0);
}
@@ -652,16 +757,74 @@ null_print(ap)
}
/*
+ * Let an underlying filesystem do the work
+ */
+static int
+null_createvobject(ap)
+ struct vop_createvobject_args /* {
+ struct vnode *vp;
+ struct ucred *cred;
+ struct proc *p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL;
+ int error;
+
+ if (vp->v_type == VNON || lowervp == NULL)
+ return 0;
+ error = VOP_CREATEVOBJECT(lowervp, ap->a_cred, ap->a_p);
+ if (error)
+ return (error);
+ vp->v_flag |= VOBJBUF;
+ return (0);
+}
+
+/*
+ * We have nothing to destroy and this operation shouldn't be bypassed.
+ */
+static int
+null_destroyvobject(ap)
+ struct vop_destroyvobject_args /* {
+ struct vnode *vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ vp->v_flag &= ~VOBJBUF;
+ return (0);
+}
+
+static int
+null_getvobject(ap)
+ struct vop_getvobject_args /* {
+ struct vnode *vp;
+ struct vm_object **objpp;
+ } */ *ap;
+{
+ struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp);
+
+ if (lvp == NULL)
+ return EINVAL;
+ return (VOP_GETVOBJECT(lvp, ap->a_objpp));
+}
+
+/*
* Global vfs data structures
*/
vop_t **null_vnodeop_p;
static struct vnodeopv_entry_desc null_vnodeop_entries[] = {
{ &vop_default_desc, (vop_t *) null_bypass },
+
{ &vop_access_desc, (vop_t *) null_access },
{ &vop_bmap_desc, (vop_t *) vop_eopnotsupp },
+ { &vop_createvobject_desc, (vop_t *) null_createvobject },
+ { &vop_destroyvobject_desc, (vop_t *) null_destroyvobject },
{ &vop_getattr_desc, (vop_t *) null_getattr },
+ { &vop_getvobject_desc, (vop_t *) null_getvobject },
{ &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount},
{ &vop_inactive_desc, (vop_t *) null_inactive },
+ { &vop_islocked_desc, (vop_t *) null_islocked },
{ &vop_lock_desc, (vop_t *) null_lock },
{ &vop_lookup_desc, (vop_t *) null_lookup },
{ &vop_open_desc, (vop_t *) null_open },
OpenPOWER on IntegriCloud