summaryrefslogtreecommitdiffstats
path: root/sys/fs
diff options
context:
space:
mode:
authorlulf <lulf@FreeBSD.org>2008-05-24 14:51:30 +0000
committerlulf <lulf@FreeBSD.org>2008-05-24 14:51:30 +0000
commit0bf5a314d88ca0c5df2f71ef6beb6bc4067223f5 (patch)
tree236415ff1f2b33b277f193bd046392771ce6a318 /sys/fs
parent197cf7b9ba868974b8a725176a65151c78822bea (diff)
downloadFreeBSD-src-0bf5a314d88ca0c5df2f71ef6beb6bc4067223f5.zip
FreeBSD-src-0bf5a314d88ca0c5df2f71ef6beb6bc4067223f5.tar.gz
- Add locking to all filesystem operations in fdescfs and flag it as MPSAFE.
- Use proper synhronization primitives to protect the internal fdesc node cache used in fdescfs. - Properly initialize and uninitalize hash. - Remove unused functions. Since fdescfs might recurse on itself, adding proper locking to it needed some tricky workarounds in some parts to make it work. For instance, a descriptor in fdescfs could refer to an open descriptor to itself, thus forcing the thread to recurse on vnode locks. Because of this, other race conditions also had to be fixed. Tested by: pho Reviewed by: kib (mentor) Approved by: kib (mentor)
Diffstat (limited to 'sys/fs')
-rw-r--r--sys/fs/fdescfs/fdesc.h9
-rw-r--r--sys/fs/fdescfs/fdesc_vfsops.c44
-rw-r--r--sys/fs/fdescfs/fdesc_vnops.c230
3 files changed, 199 insertions, 84 deletions
diff --git a/sys/fs/fdescfs/fdesc.h b/sys/fs/fdescfs/fdesc.h
index fb7d952..06f53fb 100644
--- a/sys/fs/fdescfs/fdesc.h
+++ b/sys/fs/fdescfs/fdesc.h
@@ -35,8 +35,11 @@
*/
#ifdef _KERNEL
+/* Private mount flags for fdescfs. */
+#define FMNT_UNMOUNTF 0x01
struct fdescmount {
struct vnode *f_root; /* Root node */
+ int flags;
};
#define FD_ROOT 1
@@ -55,10 +58,12 @@ struct fdescnode {
int fd_ix; /* filesystem index */
};
+extern struct mtx fdesc_hashmtx;
#define VFSTOFDESC(mp) ((struct fdescmount *)((mp)->mnt_data))
#define VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data)
extern vfs_init_t fdesc_init;
-extern int fdesc_allocvp(fdntype, int, struct mount *, struct vnode **,
- struct thread *);
+extern vfs_uninit_t fdesc_uninit;
+extern int fdesc_allocvp(fdntype, unsigned, int, struct mount *,
+ struct vnode **, struct thread *);
#endif /* _KERNEL */
diff --git a/sys/fs/fdescfs/fdesc_vfsops.c b/sys/fs/fdescfs/fdesc_vfsops.c
index 12ab09b..1994c13 100644
--- a/sys/fs/fdescfs/fdesc_vfsops.c
+++ b/sys/fs/fdescfs/fdesc_vfsops.c
@@ -85,18 +85,30 @@ fdesc_mount(struct mount *mp, struct thread *td)
if (mp->mnt_flag & (MNT_UPDATE | MNT_ROOTFS))
return (EOPNOTSUPP);
- error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp, td);
- if (error)
- return (error);
-
MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount),
M_FDESCMNT, M_WAITOK); /* XXX */
+
+ /*
+ * We need to initialize a few bits of our local mount point struct to
+ * avoid confusion in allocvp.
+ */
+ mp->mnt_data = (qaddr_t) fmp;
+ fmp->flags = 0;
+ error = fdesc_allocvp(Froot, -1, FD_ROOT, mp, &rvp, td);
+ if (error) {
+ free(fmp, M_FDESCMNT);
+ mp->mnt_data = 0;
+ return (error);
+ }
rvp->v_type = VDIR;
rvp->v_vflag |= VV_ROOT;
fmp->f_root = rvp;
+ VOP_UNLOCK(rvp, 0);
/* XXX -- don't mark as local to work around fts() problems */
/*mp->mnt_flag |= MNT_LOCAL;*/
- mp->mnt_data = fmp;
+ MNT_ILOCK(mp);
+ mp->mnt_kern_flag |= MNTK_MPSAFE;
+ MNT_IUNLOCK(mp);
vfs_getnewfsid(mp);
vfs_mountedfrom(mp, "fdescfs");
@@ -109,11 +121,19 @@ fdesc_unmount(mp, mntflags, td)
int mntflags;
struct thread *td;
{
+ struct fdescmount *fmp;
+ caddr_t data;
int error;
int flags = 0;
- if (mntflags & MNT_FORCE)
+ fmp = (struct fdescmount *)mp->mnt_data;
+ if (mntflags & MNT_FORCE) {
+ /* The hash mutex protects the private mount flags. */
+ mtx_lock(&fdesc_hashmtx);
+ fmp->flags |= FMNT_UNMOUNTF;
+ mtx_unlock(&fdesc_hashmtx);
flags |= FORCECLOSE;
+ }
/*
* Clear out buffer cache. I don't think we
@@ -127,10 +147,14 @@ fdesc_unmount(mp, mntflags, td)
return (error);
/*
- * Finally, throw away the fdescmount structure
+ * Finally, throw away the fdescmount structure. Hold the hashmtx to
+ * protect the fdescmount structure.
*/
- free(mp->mnt_data, M_FDESCMNT); /* XXX */
+ mtx_lock(&fdesc_hashmtx);
+ data = mp->mnt_data;
mp->mnt_data = 0;
+ mtx_unlock(&fdesc_hashmtx);
+ free(data, M_FDESCMNT); /* XXX */
return (0);
}
@@ -148,8 +172,7 @@ fdesc_root(mp, flags, vpp, td)
* Return locked reference to root.
*/
vp = VFSTOFDESC(mp)->f_root;
- VREF(vp);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ vget(vp, LK_EXCLUSIVE | LK_RETRY, td);
*vpp = vp;
return (0);
}
@@ -208,6 +231,7 @@ static struct vfsops fdesc_vfsops = {
.vfs_mount = fdesc_mount,
.vfs_root = fdesc_root,
.vfs_statfs = fdesc_statfs,
+ .vfs_uninit = fdesc_uninit,
.vfs_unmount = fdesc_unmount,
};
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
index 06b1718..1fbd41e 100644
--- a/sys/fs/fdescfs/fdesc_vnops.c
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -56,18 +56,15 @@
#include <fs/fdescfs/fdesc.h>
-#define FDL_WANT 0x01
-#define FDL_LOCKED 0x02
-static int fdcache_lock;
-
#define NFDCACHE 4
#define FD_NHASH(ix) \
(&fdhashtbl[(ix) & fdhash])
static LIST_HEAD(fdhashhead, fdescnode) *fdhashtbl;
static u_long fdhash;
+struct mtx fdesc_hashmtx;
+
static vop_getattr_t fdesc_getattr;
-static vop_inactive_t fdesc_inactive;
static vop_lookup_t fdesc_lookup;
static vop_open_t fdesc_open;
static vop_readdir_t fdesc_readdir;
@@ -79,7 +76,6 @@ static struct vop_vector fdesc_vnodeops = {
.vop_access = VOP_NULL,
.vop_getattr = fdesc_getattr,
- .vop_inactive = fdesc_inactive,
.vop_lookup = fdesc_lookup,
.vop_open = fdesc_open,
.vop_pathconf = vop_stdpathconf,
@@ -88,6 +84,9 @@ static struct vop_vector fdesc_vnodeops = {
.vop_setattr = fdesc_setattr,
};
+static void fdesc_insmntque_dtr(struct vnode *, void *);
+static void fdesc_remove_entry(struct fdescnode *);
+
/*
* Initialise cache headers
*/
@@ -96,81 +95,154 @@ fdesc_init(vfsp)
struct vfsconf *vfsp;
{
+ mtx_init(&fdesc_hashmtx, "fdescfs_hash", NULL, MTX_DEF);
fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash);
return (0);
}
+/*
+ * Uninit ready for unload.
+ */
+int
+fdesc_uninit(vfsp)
+ struct vfsconf *vfsp;
+{
+
+ hashdestroy(fdhashtbl, M_CACHE, fdhash);
+ mtx_destroy(&fdesc_hashmtx);
+ return (0);
+}
+
+/*
+ * If allocating vnode fails, call this.
+ */
+static void
+fdesc_insmntque_dtr(struct vnode *vp, void *arg)
+{
+
+ vgone(vp);
+ vput(vp);
+}
+
+/*
+ * Remove an entry from the hash if it exists.
+ */
+static void
+fdesc_remove_entry(struct fdescnode *fd)
+{
+ struct fdhashhead *fc;
+ struct fdescnode *fd2;
+
+ fc = FD_NHASH(fd->fd_ix);
+ mtx_lock(&fdesc_hashmtx);
+ LIST_FOREACH(fd2, fc, fd_hash) {
+ if (fd == fd2) {
+ LIST_REMOVE(fd, fd_hash);
+ break;
+ }
+ }
+ mtx_unlock(&fdesc_hashmtx);
+}
+
int
-fdesc_allocvp(ftype, ix, mp, vpp, td)
+fdesc_allocvp(ftype, fd_fd, ix, mp, vpp, td)
fdntype ftype;
+ unsigned fd_fd;
int ix;
struct mount *mp;
struct vnode **vpp;
struct thread *td;
{
+ struct fdescmount *fmp;
struct fdhashhead *fc;
- struct fdescnode *fd;
+ struct fdescnode *fd, *fd2;
+ struct vnode *vp, *vp2;
int error = 0;
fc = FD_NHASH(ix);
loop:
+ mtx_lock(&fdesc_hashmtx);
+ /*
+ * If a forced unmount is progressing, we need to drop it. The flags are
+ * protected by the hashmtx.
+ */
+ fmp = (struct fdescmount *)mp->mnt_data;
+ if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) {
+ mtx_unlock(&fdesc_hashmtx);
+ return (-1);
+ }
+
LIST_FOREACH(fd, fc, fd_hash) {
if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) {
- if (vget(fd->fd_vnode, LK_EXCLUSIVE | LK_CANRECURSE,
- td))
+ /* Get reference to vnode in case it's being free'd */
+ vp = fd->fd_vnode;
+ VI_LOCK(vp);
+ mtx_unlock(&fdesc_hashmtx);
+ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td))
goto loop;
- *vpp = fd->fd_vnode;
- VOP_UNLOCK(*vpp, 0);
- return (error);
+ *vpp = vp;
+ return (0);
}
}
+ mtx_unlock(&fdesc_hashmtx);
- /*
- * otherwise lock the array while we call getnewvnode
- * since that can block.
- */
- if (fdcache_lock & FDL_LOCKED) {
- fdcache_lock |= FDL_WANT;
- (void) tsleep( &fdcache_lock, PINOD, "fdalvp", 0);
- goto loop;
- }
- fdcache_lock |= FDL_LOCKED;
-
- /*
- * Do the MALLOC before the getnewvnode since doing so afterward
- * might cause a bogus v_data pointer to get dereferenced
- * elsewhere if MALLOC should block.
- */
MALLOC(fd, struct fdescnode *, sizeof(struct fdescnode), M_TEMP, M_WAITOK);
- error = getnewvnode("fdescfs", mp, &fdesc_vnodeops, vpp);
+ error = getnewvnode("fdescfs", mp, &fdesc_vnodeops, &vp);
if (error) {
FREE(fd, M_TEMP);
- goto out;
+ return (error);
}
- (*vpp)->v_data = fd;
- fd->fd_vnode = *vpp;
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ vp->v_data = fd;
+ fd->fd_vnode = vp;
fd->fd_type = ftype;
- fd->fd_fd = -1;
+ fd->fd_fd = fd_fd;
fd->fd_ix = ix;
- /* XXX: vnode should be locked here */
- error = insmntque(*vpp, mp); /* XXX: Too early for mpsafe fs */
+ error = insmntque1(vp, mp, fdesc_insmntque_dtr, NULL);
if (error != 0) {
- free(fd, M_TEMP);
*vpp = NULLVP;
- goto out;
+ return (error);
}
- LIST_INSERT_HEAD(fc, fd, fd_hash);
-out:
- fdcache_lock &= ~FDL_LOCKED;
+ /* Make sure that someone didn't beat us when inserting the vnode. */
+ mtx_lock(&fdesc_hashmtx);
+ /*
+ * If a forced unmount is progressing, we need to drop it. The flags are
+ * protected by the hashmtx.
+ */
+ fmp = (struct fdescmount *)mp->mnt_data;
+ if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) {
+ mtx_unlock(&fdesc_hashmtx);
+ vgone(vp);
+ vput(vp);
+ *vpp = NULLVP;
+ return (-1);
+ }
- if (fdcache_lock & FDL_WANT) {
- fdcache_lock &= ~FDL_WANT;
- wakeup( &fdcache_lock);
+ LIST_FOREACH(fd2, fc, fd_hash) {
+ if (fd2->fd_ix == ix && fd2->fd_vnode->v_mount == mp) {
+ /* Get reference to vnode in case it's being free'd */
+ vp2 = fd2->fd_vnode;
+ VI_LOCK(vp2);
+ mtx_unlock(&fdesc_hashmtx);
+ error = vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, td);
+ /* Someone beat us, dec use count and wait for reclaim */
+ vgone(vp);
+ vput(vp);
+ /* If we didn't get it, return no vnode. */
+ if (error)
+ vp2 = NULLVP;
+ *vpp = vp2;
+ return (error);
+ }
}
- return (error);
+ /* If we came here, we can insert it safely. */
+ LIST_INSERT_HEAD(fc, fd, fd_hash);
+ mtx_unlock(&fdesc_hashmtx);
+ *vpp = vp;
+ return (0);
}
/*
@@ -230,13 +302,43 @@ fdesc_lookup(ap)
if ((error = fget(td, fd, &fp)) != 0)
goto bad;
- error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp, td);
- fdrop(fp, td);
+ /* Check if we're looking up ourselves. */
+ if (VTOFDESC(dvp)->fd_ix == FD_DESC + fd) {
+ /*
+ * In case we're holding the last reference to the file, the dvp
+ * will be re-acquired.
+ */
+ vhold(dvp);
+ VOP_UNLOCK(dvp, 0);
+ fdrop(fp, td);
+
+ /* Re-aquire the lock afterwards. */
+ vn_lock(dvp, LK_RETRY | LK_EXCLUSIVE);
+ vdrop(dvp);
+ fvp = dvp;
+ } else {
+ /*
+ * Unlock our root node (dvp) when doing this, since we might
+ * deadlock since the vnode might be locked by another thread
+ * and the root vnode lock will be obtained afterwards (in case
+ * we're looking up the fd of the root vnode), which will be the
+ * opposite lock order. Vhold the root vnode first so we don't
+ * loose it.
+ */
+ vhold(dvp);
+ VOP_UNLOCK(dvp, 0);
+ error = fdesc_allocvp(Fdesc, fd, FD_DESC + fd, dvp->v_mount,
+ &fvp, td);
+ fdrop(fp, td);
+ /*
+ * The root vnode must be locked last to prevent deadlock condition.
+ */
+ vn_lock(dvp, LK_RETRY | LK_EXCLUSIVE);
+ vdrop(dvp);
+ }
+
if (error)
goto bad;
- VTOFDESC(fvp)->fd_fd = fd;
- if (fvp != dvp)
- vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
*vpp = fvp;
return (0);
@@ -505,34 +607,18 @@ done:
}
static int
-fdesc_inactive(ap)
- struct vop_inactive_args /* {
- struct vnode *a_vp;
- struct thread *a_td;
- } */ *ap;
-{
- struct vnode *vp = ap->a_vp;
-
- /*
- * Clear out the v_type field to avoid
- * nasty things happening in vgone().
- */
- vp->v_type = VNON;
- return (0);
-}
-
-static int
fdesc_reclaim(ap)
struct vop_reclaim_args /* {
struct vnode *a_vp;
} */ *ap;
{
- struct vnode *vp = ap->a_vp;
- struct fdescnode *fd = VTOFDESC(vp);
+ struct vnode *vp;
+ struct fdescnode *fd;
- LIST_REMOVE(fd, fd_hash);
+ vp = ap->a_vp;
+ fd = VTOFDESC(vp);
+ fdesc_remove_entry(fd);
FREE(vp->v_data, M_TEMP);
- vp->v_data = 0;
-
+ vp->v_data = NULL;
return (0);
}
OpenPOWER on IntegriCloud