From 35c71928a03800d369738a09947e6bcb26d29b1a Mon Sep 17 00:00:00 2001 From: tjr Date: Tue, 17 Jun 2003 08:52:45 +0000 Subject: MFp4: Fix two bugs causing possible deadlocks or panics, and one nit: - Emulate lock draining (LK_DRAIN) in null_lock() to avoid deadlocks when the vnode is being recycled. - Don't allow null_nodeget() to return a nullfs vnode from the wrong mount when multiple nullfs's are mounted. It's unclear why these checks were removed in null_subr.c 1.35, but they are definitely necessary. Without the checks, trying to unmount a nullfs mount will erroneously return EBUSY, and forcibly unmounting with -f will cause a panic. - Bump LOG2_SIZEVNODE up to 8, since vnodes are >256 bytes now. The old value (7) didn't cause any problems, but made the hash algorithm suboptimal. These changes fix nullfs enough that a parallel buildworld succeeds. Submitted by: tegge (partially; LK_DRAIN) Tested by: kris --- sys/fs/nullfs/null.h | 2 ++ sys/fs/nullfs/null_subr.c | 45 +++++++++++++++++++++++++++------- sys/fs/nullfs/null_vnops.c | 61 +++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 95 insertions(+), 13 deletions(-) (limited to 'sys/fs/nullfs') diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h index 3cb3716..62deb1e 100644 --- a/sys/fs/nullfs/null.h +++ b/sys/fs/nullfs/null.h @@ -51,6 +51,8 @@ struct null_node { LIST_ENTRY(null_node) null_hash; /* Hash list */ struct vnode *null_lowervp; /* VREFed once */ struct vnode *null_vnode; /* Back pointer */ + int null_pending_locks; + int null_drain_wakeup; }; #define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data)) diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c index f2a43e0..2120415 100644 --- a/sys/fs/nullfs/null_subr.c +++ b/sys/fs/nullfs/null_subr.c @@ -50,7 +50,7 @@ #include -#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */ +#define LOG2_SIZEVNODE 8 /* log2(sizeof struct vnode) */ #define NNULLNODECACHE 16 /* @@ -71,8 +71,8 @@ struct mtx null_hashmtx; static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table"); MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part"); -static struct vnode * null_hashget(struct vnode *); -static struct vnode * null_hashins(struct null_node *); +static struct vnode * null_hashget(struct mount *, struct vnode *); +static struct vnode * null_hashins(struct mount *, struct null_node *); /* * Initialise cache headers @@ -103,7 +103,8 @@ nullfs_uninit(vfsp) * Lower vnode should be locked on entry and will be left locked on exit. */ static struct vnode * -null_hashget(lowervp) +null_hashget(mp, lowervp) + struct mount *mp; struct vnode *lowervp; { struct thread *td = curthread; /* XXX */ @@ -121,9 +122,20 @@ null_hashget(lowervp) loop: mtx_lock(&null_hashmtx); LIST_FOREACH(a, hd, null_hash) { - if (a->null_lowervp == lowervp) { + if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) { vp = NULLTOV(a); mtx_lock(&vp->v_interlock); + /* + * Don't block if nullfs vnode is being recycled. + * We already hold a lock on the lower vnode, thus + * waiting might deadlock against the thread + * recycling the nullfs vnode or another thread + * in vrele() waiting for the vnode lock. + */ + if ((vp->v_iflag & VI_XLOCK) != 0) { + VI_UNLOCK(vp); + continue; + } mtx_unlock(&null_hashmtx); /* * We need vget for the VXLOCK @@ -145,7 +157,8 @@ loop: * node found. */ static struct vnode * -null_hashins(xp) +null_hashins(mp, xp) + struct mount *mp; struct null_node *xp; { struct thread *td = curthread; /* XXX */ @@ -157,9 +170,21 @@ null_hashins(xp) loop: mtx_lock(&null_hashmtx); LIST_FOREACH(oxp, hd, null_hash) { - if (oxp->null_lowervp == xp->null_lowervp) { + if (oxp->null_lowervp == xp->null_lowervp && + NULLTOV(oxp)->v_mount == mp) { ovp = NULLTOV(oxp); mtx_lock(&ovp->v_interlock); + /* + * Don't block if nullfs vnode is being recycled. + * We already hold a lock on the lower vnode, thus + * waiting might deadlock against the thread + * recycling the nullfs vnode or another thread + * in vrele() waiting for the vnode lock. + */ + if ((ovp->v_iflag & VI_XLOCK) != 0) { + VI_UNLOCK(ovp); + continue; + } mtx_unlock(&null_hashmtx); if (vget(ovp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td)) goto loop; @@ -192,7 +217,7 @@ null_nodeget(mp, lowervp, vpp) int error; /* Lookup the hash firstly */ - *vpp = null_hashget(lowervp); + *vpp = null_hashget(mp, lowervp); if (*vpp != NULL) { vrele(lowervp); return (0); @@ -222,6 +247,8 @@ null_nodeget(mp, lowervp, vpp) xp->null_vnode = vp; xp->null_lowervp = lowervp; + xp->null_pending_locks = 0; + xp->null_drain_wakeup = 0; vp->v_type = lowervp->v_type; vp->v_data = xp; @@ -244,7 +271,7 @@ null_nodeget(mp, lowervp, vpp) * Atomically insert our new node into the hash or vget existing * if someone else has beaten us to it. */ - *vpp = null_hashins(xp); + *vpp = null_hashins(mp, xp); if (*vpp != NULL) { vrele(lowervp); VOP_UNLOCK(vp, LK_THISLAYER, td); diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c index 588e18c..96d6f83 100644 --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -592,6 +592,7 @@ null_lock(ap) struct thread *td = ap->a_td; struct vnode *lvp; int error; + struct null_node *nn; if (flags & LK_THISLAYER) { if (vp->v_vnlock != NULL) { @@ -614,13 +615,65 @@ null_lock(ap) * going away doesn't mean the struct lock below us is. * LK_EXCLUSIVE is fine. */ + if ((flags & LK_INTERLOCK) == 0) { + VI_LOCK(vp); + flags |= LK_INTERLOCK; + } + nn = VTONULL(vp); if ((flags & LK_TYPE_MASK) == LK_DRAIN) { NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n"); - return(lockmgr(vp->v_vnlock, - (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, - &vp->v_interlock, td)); + /* + * Emulate lock draining by waiting for all other + * pending locks to complete. Afterwards the + * lockmgr call might block, but no other threads + * will attempt to use this nullfs vnode due to the + * VI_XLOCK flag. + */ + while (nn->null_pending_locks > 0) { + nn->null_drain_wakeup = 1; + msleep(&nn->null_pending_locks, + VI_MTX(vp), + PVFS, + "nuldr", 0); + } + error = lockmgr(vp->v_vnlock, + (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, + VI_MTX(vp), td); + return error; + } + nn->null_pending_locks++; + error = lockmgr(vp->v_vnlock, flags, &vp->v_interlock, td); + VI_LOCK(vp); + /* + * If we're called from vrele then v_usecount can have been 0 + * and another process might have initiated a recycle + * operation. When that happens, just back out. + */ + if (error == 0 && (vp->v_iflag & VI_XLOCK) != 0 && + td != vp->v_vxproc) { + lockmgr(vp->v_vnlock, + (flags & ~LK_TYPE_MASK) | LK_RELEASE, + VI_MTX(vp), td); + VI_LOCK(vp); + error = ENOENT; + } + nn->null_pending_locks--; + /* + * Wakeup the process draining the vnode after all + * pending lock attempts has been failed. + */ + if (nn->null_pending_locks == 0 && + nn->null_drain_wakeup != 0) { + nn->null_drain_wakeup = 0; + wakeup(&nn->null_pending_locks); + } + if (error == ENOENT && (vp->v_iflag & VI_XLOCK) != 0 && + vp->v_vxproc != curthread) { + vp->v_iflag |= VI_XWANT; + msleep(vp, VI_MTX(vp), PINOD, "nulbo", 0); } - return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, td)); + VI_UNLOCK(vp); + return error; } else { /* * To prevent race conditions involving doing a lookup -- cgit v1.1