summaryrefslogtreecommitdiffstats
path: root/sys/nfsclient
diff options
context:
space:
mode:
authormohans <mohans@FreeBSD.org>2006-05-19 00:04:24 +0000
committermohans <mohans@FreeBSD.org>2006-05-19 00:04:24 +0000
commit60ef6157336e266567b08916dee68af4c4f4d21e (patch)
tree02c7543fb7adb7dda8df6f0ad320769be2b28c31 /sys/nfsclient
parent876847ec5e682b076f7a4f80cf4918cfed7d4a06 (diff)
downloadFreeBSD-src-60ef6157336e266567b08916dee68af4c4f4d21e.zip
FreeBSD-src-60ef6157336e266567b08916dee68af4c4f4d21e.tar.gz
Changes to make the NFS client MP safe.
Thanks to Kris Kennaway for testing and sending lots of bugs my way.
Diffstat (limited to 'sys/nfsclient')
-rw-r--r--sys/nfsclient/nfs.h4
-rw-r--r--sys/nfsclient/nfs_bio.c330
-rw-r--r--sys/nfsclient/nfs_nfsiod.c29
-rw-r--r--sys/nfsclient/nfs_node.c3
-rw-r--r--sys/nfsclient/nfs_socket.c443
-rw-r--r--sys/nfsclient/nfs_subs.c132
-rw-r--r--sys/nfsclient/nfs_vfsops.c92
-rw-r--r--sys/nfsclient/nfs_vnops.c319
-rw-r--r--sys/nfsclient/nfsmount.h2
-rw-r--r--sys/nfsclient/nfsnode.h15
10 files changed, 919 insertions, 450 deletions
diff --git a/sys/nfsclient/nfs.h b/sys/nfsclient/nfs.h
index 09f77da..c3c54b2 100644
--- a/sys/nfsclient/nfs.h
+++ b/sys/nfsclient/nfs.h
@@ -131,6 +131,7 @@ extern struct uma_zone *nfsmount_zone;
extern struct callout nfs_callout;
extern struct nfsstats nfsstats;
+extern struct mtx nfs_iod_mtx;
extern int nfs_numasync;
extern unsigned int nfs_iodmax;
@@ -178,6 +179,7 @@ struct nfsreq {
int r_rtt; /* RTT for rpc */
int r_lastmsg; /* last tprintf */
struct thread *r_td; /* Proc that did I/O system call */
+ struct mtx r_mtx; /* Protects nfsreq fields */
};
/*
@@ -310,8 +312,6 @@ int nfs_meta_setsize (struct vnode *, struct ucred *,
void nfs_set_sigmask __P((struct thread *td, sigset_t *oldset));
void nfs_restore_sigmask __P((struct thread *td, sigset_t *set));
-int nfs_tsleep __P((struct thread *td, void *ident, int priority, char *wmesg,
- int timo));
int nfs_msleep __P((struct thread *td, void *ident, struct mtx *mtx, int priority,
char *wmesg, int timo));
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
index fca722f..19228ac 100644
--- a/sys/nfsclient/nfs_bio.c
+++ b/sys/nfsclient/nfs_bio.c
@@ -71,6 +71,7 @@ static int nfs_directio_write(struct vnode *vp, struct uio *uiop,
extern int nfs_directio_enable;
extern int nfs_directio_allow_mmap;
+
/*
* Vnode op for VM getpages.
*/
@@ -90,8 +91,6 @@ nfs_getpages(struct vop_getpages_args *ap)
vm_page_t *pages;
struct nfsnode *np;
- GIANT_REQUIRED;
-
vp = ap->a_vp;
np = VTONFS(vp);
td = curthread; /* XXX */
@@ -101,22 +100,28 @@ nfs_getpages(struct vop_getpages_args *ap)
count = ap->a_count;
if ((object = vp->v_object) == NULL) {
- printf("nfs_getpages: called with non-merged cache vnode??\n");
+ nfs_printf("nfs_getpages: called with non-merged cache vnode??\n");
return VM_PAGER_ERROR;
}
- if (nfs_directio_enable && !nfs_directio_allow_mmap &&
- (np->n_flag & NNONCACHE) &&
- (vp->v_type == VREG)) {
- printf("nfs_getpages: called on non-cacheable vnode??\n");
- return VM_PAGER_ERROR;
+ if (nfs_directio_enable && !nfs_directio_allow_mmap) {
+ mtx_lock(&np->n_mtx);
+ if ((np->n_flag & NNONCACHE) && (vp->v_type == VREG)) {
+ mtx_unlock(&np->n_mtx);
+ nfs_printf("nfs_getpages: called on non-cacheable vnode??\n");
+ return VM_PAGER_ERROR;
+ } else
+ mtx_unlock(&np->n_mtx);
}
+ mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&nmp->nm_mtx);
/* We'll never get here for v4, because we always have fsinfo */
(void)nfs_fsinfo(nmp, vp, cred, td);
- }
+ } else
+ mtx_unlock(&nmp->nm_mtx);
npages = btoc(count);
@@ -173,7 +178,7 @@ nfs_getpages(struct vop_getpages_args *ap)
relpbuf(bp, &nfs_pbuf_freecnt);
if (error && (uio.uio_resid == count)) {
- printf("nfs_getpages: error %d\n", error);
+ nfs_printf("nfs_getpages: error %d\n", error);
VM_OBJECT_LOCK(object);
vm_page_lock_queues();
for (i = 0; i < npages; ++i) {
@@ -270,8 +275,6 @@ nfs_putpages(struct vop_putpages_args *ap)
struct nfsnode *np;
vm_page_t *pages;
- GIANT_REQUIRED;
-
vp = ap->a_vp;
np = VTONFS(vp);
td = curthread; /* XXX */
@@ -282,15 +285,22 @@ nfs_putpages(struct vop_putpages_args *ap)
rtvals = ap->a_rtvals;
npages = btoc(count);
offset = IDX_TO_OFF(pages[0]->pindex);
-
+
+ mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
(nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&nmp->nm_mtx);
(void)nfs_fsinfo(nmp, vp, cred, td);
- }
+ } else
+ mtx_unlock(&nmp->nm_mtx);
+ mtx_lock(&np->n_mtx);
if (nfs_directio_enable && !nfs_directio_allow_mmap &&
- (np->n_flag & NNONCACHE) && (vp->v_type == VREG))
- printf("nfs_putpages: called on noncache-able vnode??\n");
+ (np->n_flag & NNONCACHE) && (vp->v_type == VREG)) {
+ mtx_unlock(&np->n_mtx);
+ nfs_printf("nfs_putpages: called on noncache-able vnode??\n");
+ mtx_lock(&np->n_mtx);
+ }
for (i = 0; i < npages; i++)
rtvals[i] = VM_PAGER_AGAIN;
@@ -298,12 +308,12 @@ nfs_putpages(struct vop_putpages_args *ap)
/*
* When putting pages, do not extend file past EOF.
*/
-
if (offset + count > np->n_size) {
count = np->n_size - offset;
if (count < 0)
count = 0;
}
+ mtx_unlock(&np->n_mtx);
/*
* We use only the kva address for the buffer, but this is extremely
@@ -350,6 +360,81 @@ nfs_putpages(struct vop_putpages_args *ap)
}
/*
+ * For nfs, cache consistency can only be maintained approximately.
+ * Although RFC1094 does not specify the criteria, the following is
+ * believed to be compatible with the reference port.
+ * For nfs:
+ * If the file's modify time on the server has changed since the
+ * last read rpc or you have written to the file,
+ * you may have lost data cache consistency with the
+ * server, so flush all of the file's data out of the cache.
+ * Then force a getattr rpc to ensure that you have up to date
+ * attributes.
+ * NB: This implies that cache data can be read when up to
+ * NFS_ATTRTIMEO seconds out of date. If you find that you need current
+ * attributes this could be forced by setting n_attrstamp to 0 before
+ * the VOP_GETATTR() call.
+ */
+static inline int
+nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
+{
+ int error = 0;
+ struct vattr vattr;
+ struct nfsnode *np = VTONFS(vp);
+ int old_lock;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+
+ /*
+ * Grab the exclusive lock before checking whether the cache is
+ * consistent.
+ * XXX - We can make this cheaper later (by acquiring cheaper locks).
+ * But for now, this suffices.
+ */
+ old_lock = nfs_upgrade_vnlock(vp, td);
+ mtx_lock(&np->n_mtx);
+ if (np->n_flag & NMODIFIED) {
+ mtx_unlock(&np->n_mtx);
+ if (vp->v_type != VREG) {
+ if (vp->v_type != VDIR)
+ panic("nfs: bioread, not dir");
+ (nmp->nm_rpcops->nr_invaldir)(vp);
+ error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
+ if (error)
+ goto out;
+ }
+ np->n_attrstamp = 0;
+ error = VOP_GETATTR(vp, &vattr, cred, td);
+ if (error)
+ goto out;
+ mtx_lock(&np->n_mtx);
+ np->n_mtime = vattr.va_mtime;
+ mtx_unlock(&np->n_mtx);
+ } else {
+ mtx_unlock(&np->n_mtx);
+ error = VOP_GETATTR(vp, &vattr, cred, td);
+ if (error)
+ return (error);
+ mtx_lock(&np->n_mtx);
+ if ((np->n_flag & NSIZECHANGED)
+ || (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) {
+ mtx_unlock(&np->n_mtx);
+ if (vp->v_type == VDIR)
+ (nmp->nm_rpcops->nr_invaldir)(vp);
+ error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
+ if (error)
+ goto out;
+ mtx_lock(&np->n_mtx);
+ np->n_mtime = vattr.va_mtime;
+ np->n_flag &= ~NSIZECHANGED;
+ }
+ mtx_unlock(&np->n_mtx);
+ }
+out:
+ nfs_downgrade_vnlock(vp, td, old_lock);
+ return error;
+}
+
+/*
* Vnode op for read using bio
*/
int
@@ -358,7 +443,6 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
struct nfsnode *np = VTONFS(vp);
int biosize, i;
struct buf *bp, *rabp;
- struct vattr vattr;
struct thread *td;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
daddr_t lbn, rabn;
@@ -376,9 +460,14 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
return (EINVAL);
td = uio->uio_td;
+ mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&nmp->nm_mtx);
(void)nfs_fsinfo(nmp, vp, cred, td);
+ } else
+ mtx_unlock(&nmp->nm_mtx);
+
if (vp->v_type != VDIR &&
(uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
return (EFBIG);
@@ -389,52 +478,18 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
biosize = vp->v_mount->mnt_stat.f_iosize;
seqcount = (int)((off_t)(ioflag >> IO_SEQSHIFT) * biosize / BKVASIZE);
- /*
- * For nfs, cache consistency can only be maintained approximately.
- * Although RFC1094 does not specify the criteria, the following is
- * believed to be compatible with the reference port.
- * For nfs:
- * If the file's modify time on the server has changed since the
- * last read rpc or you have written to the file,
- * you may have lost data cache consistency with the
- * server, so flush all of the file's data out of the cache.
- * Then force a getattr rpc to ensure that you have up to date
- * attributes.
- * NB: This implies that cache data can be read when up to
- * NFS_ATTRTIMEO seconds out of date. If you find that you need current
- * attributes this could be forced by setting n_attrstamp to 0 before
- * the VOP_GETATTR() call.
- */
- if (np->n_flag & NMODIFIED) {
- if (vp->v_type != VREG) {
- if (vp->v_type != VDIR)
- panic("nfs: bioread, not dir");
- (nmp->nm_rpcops->nr_invaldir)(vp);
- error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
- if (error)
- return (error);
- }
- np->n_attrstamp = 0;
- error = VOP_GETATTR(vp, &vattr, cred, td);
- if (error)
- return (error);
- np->n_mtime = vattr.va_mtime;
- } else {
- error = VOP_GETATTR(vp, &vattr, cred, td);
- if (error)
- return (error);
- if ((np->n_flag & NSIZECHANGED)
- || (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) {
- if (vp->v_type == VDIR)
- (nmp->nm_rpcops->nr_invaldir)(vp);
- error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
- if (error)
- return (error);
- np->n_mtime = vattr.va_mtime;
- np->n_flag &= ~NSIZECHANGED;
- }
- }
+
+ error = nfs_bioread_check_cons(vp, td, cred);
+ if (error)
+ return error;
+
do {
+ u_quad_t nsize;
+
+ mtx_lock(&np->n_mtx);
+ nsize = np->n_size;
+ mtx_unlock(&np->n_mtx);
+
switch (vp->v_type) {
case VREG:
nfsstats.biocache_reads++;
@@ -443,12 +498,10 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
/*
* Start the read ahead(s), as required.
- * The readahead is kicked off only if sequential access
- * is detected, based on the readahead hint (ra_expect_lbn).
*/
- if (nmp->nm_readahead > 0 && np->ra_expect_lbn == lbn) {
+ if (nmp->nm_readahead > 0) {
for (nra = 0; nra < nmp->nm_readahead && nra < seqcount &&
- (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
+ (off_t)(lbn + 1 + nra) * biosize < nsize; nra++) {
rabn = lbn + 1 + nra;
if (incore(&vp->v_bufobj, rabn) == NULL) {
rabp = nfs_getcacheblk(vp, rabn, biosize, td);
@@ -472,15 +525,14 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
}
}
}
- np->ra_expect_lbn = lbn + 1;
}
/* Note that bcount is *not* DEV_BSIZE aligned. */
bcount = biosize;
- if ((off_t)lbn * biosize >= np->n_size) {
+ if ((off_t)lbn * biosize >= nsize) {
bcount = 0;
- } else if ((off_t)(lbn + 1) * biosize > np->n_size) {
- bcount = np->n_size - (off_t)lbn * biosize;
+ } else if ((off_t)(lbn + 1) * biosize > nsize) {
+ bcount = nsize - (off_t)lbn * biosize;
}
bp = nfs_getcacheblk(vp, lbn, bcount, td);
@@ -652,7 +704,7 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
n = np->n_direofoffset - uio->uio_offset;
break;
default:
- printf(" nfs_bioread: type %x unexpected\n", vp->v_type);
+ nfs_printf(" nfs_bioread: type %x unexpected\n", vp->v_type);
bp = NULL;
break;
};
@@ -690,14 +742,18 @@ nfs_directio_write(vp, uiop, cred, ioflag)
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
struct thread *td = uiop->uio_td;
int size;
-
+ int wsize;
+
+ mtx_lock(&nmp->nm_mtx);
+ wsize = nmp->nm_wsize;
+ mtx_unlock(&nmp->nm_mtx);
if (ioflag & IO_SYNC) {
int iomode, must_commit;
struct uio uio;
struct iovec iov;
do_sync:
while (uiop->uio_resid > 0) {
- size = min(uiop->uio_resid, nmp->nm_wsize);
+ size = min(uiop->uio_resid, wsize);
size = min(uiop->uio_iov->iov_len, size);
iov.iov_base = uiop->uio_iov->iov_base;
iov.iov_len = size;
@@ -746,7 +802,7 @@ do_sync:
* in NFS directio access.
*/
while (uiop->uio_resid > 0) {
- size = min(uiop->uio_resid, nmp->nm_wsize);
+ size = min(uiop->uio_resid, wsize);
size = min(uiop->uio_iov->iov_len, size);
bp = getpbuf(&nfs_pbuf_freecnt);
t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK);
@@ -819,8 +875,6 @@ nfs_write(struct vop_write_args *ap)
int n, on, error = 0;
struct proc *p = td?td->td_proc:NULL;
- GIANT_REQUIRED;
-
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_WRITE)
panic("nfs_write mode");
@@ -829,20 +883,29 @@ nfs_write(struct vop_write_args *ap)
#endif
if (vp->v_type != VREG)
return (EIO);
+ mtx_lock(&np->n_mtx);
if (np->n_flag & NWRITEERR) {
np->n_flag &= ~NWRITEERR;
+ mtx_unlock(&np->n_mtx);
return (np->n_error);
- }
+ } else
+ mtx_unlock(&np->n_mtx);
+ mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&nmp->nm_mtx);
(void)nfs_fsinfo(nmp, vp, cred, td);
+ } else
+ mtx_unlock(&nmp->nm_mtx);
/*
* Synchronously flush pending buffers if we are in synchronous
* mode or if we are appending.
*/
if (ioflag & (IO_APPEND | IO_SYNC)) {
+ mtx_lock(&np->n_mtx);
if (np->n_flag & NMODIFIED) {
+ mtx_unlock(&np->n_mtx);
#ifdef notyet /* Needs matching nonblock semantics elsewhere, too. */
/*
* Require non-blocking, synchronous writes to
@@ -857,7 +920,8 @@ flush_and_restart:
error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
if (error)
return (error);
- }
+ } else
+ mtx_unlock(&np->n_mtx);
}
/*
@@ -869,7 +933,9 @@ flush_and_restart:
error = VOP_GETATTR(vp, &vattr, cred, td);
if (error)
return (error);
+ mtx_lock(&np->n_mtx);
uio->uio_offset = np->n_size;
+ mtx_unlock(&np->n_mtx);
}
if (uio->uio_offset < 0)
@@ -907,6 +973,11 @@ flush_and_restart:
* no point optimizing for something that really won't ever happen.
*/
if (!(ioflag & IO_SYNC)) {
+ int nflag;
+
+ mtx_lock(&np->n_mtx);
+ nflag = np->n_flag;
+ mtx_unlock(&np->n_mtx);
int needrestart = 0;
if (nmp->nm_wcommitsize < uio->uio_resid) {
/*
@@ -918,9 +989,9 @@ flush_and_restart:
if (ioflag & IO_NDELAY)
return (EAGAIN);
ioflag |= IO_SYNC;
- if (np->n_flag & NMODIFIED)
+ if (nflag & NMODIFIED)
needrestart = 1;
- } else if (np->n_flag & NMODIFIED) {
+ } else if (nflag & NMODIFIED) {
int wouldcommit = 0;
BO_LOCK(&vp->v_bufobj);
if (vp->v_bufobj.bo_dirty.bv_cnt != 0) {
@@ -961,8 +1032,9 @@ again:
* Handle direct append and file extension cases, calculate
* unaligned buffer size.
*/
-
+ mtx_lock(&np->n_mtx);
if (uio->uio_offset == np->n_size && n) {
+ mtx_unlock(&np->n_mtx);
/*
* Get the buffer (in its pre-append state to maintain
* B_CACHE if it was previously set). Resize the
@@ -975,9 +1047,11 @@ again:
if (bp != NULL) {
long save;
+ mtx_lock(&np->n_mtx);
np->n_size = uio->uio_offset + n;
np->n_flag |= NMODIFIED;
vnode_pager_setsize(vp, np->n_size);
+ mtx_unlock(&np->n_mtx);
save = bp->b_flags & B_CACHE;
bcount += n;
@@ -996,12 +1070,15 @@ again:
else
bcount = np->n_size - (off_t)lbn * biosize;
}
+ mtx_unlock(&np->n_mtx);
bp = nfs_getcacheblk(vp, lbn, bcount, td);
+ mtx_lock(&np->n_mtx);
if (uio->uio_offset + n > np->n_size) {
np->n_size = uio->uio_offset + n;
np->n_flag |= NMODIFIED;
vnode_pager_setsize(vp, np->n_size);
}
+ mtx_unlock(&np->n_mtx);
}
if (!bp) {
@@ -1047,7 +1124,9 @@ again:
}
if (bp->b_wcred == NOCRED)
bp->b_wcred = crhold(cred);
+ mtx_lock(&np->n_mtx);
np->n_flag |= NMODIFIED;
+ mtx_unlock(&np->n_mtx);
/*
* If dirtyend exceeds file size, chop it down. This should
@@ -1059,7 +1138,7 @@ again:
*/
if (bp->b_dirtyend > bcount) {
- printf("NFS append race @%lx:%d\n",
+ nfs_printf("NFS append race @%lx:%d\n",
(long)bp->b_blkno * DEV_BSIZE,
bp->b_dirtyend - bcount);
bp->b_dirtyend = bcount;
@@ -1139,7 +1218,7 @@ again:
break;
} else if ((n + on) == biosize) {
bp->b_flags |= B_ASYNC;
- (void) (nmp->nm_rpcops->nr_writebp)(bp, 0, 0);
+ (void) (nmp->nm_rpcops->nr_writebp)(bp, 0, NULL);
} else {
bdwrite(bp);
}
@@ -1229,15 +1308,7 @@ nfs_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
slptimeo = 0;
}
- if ((old_lock = VOP_ISLOCKED(vp, td)) != LK_EXCLUSIVE) {
- if (old_lock == LK_SHARED) {
- /* Upgrade to exclusive lock, this might block */
- vn_lock(vp, LK_UPGRADE | LK_RETRY, td);
- } else {
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
- }
- }
-
+ old_lock = nfs_upgrade_vnlock(vp, td);
/*
* Now, flush as required.
*/
@@ -1247,17 +1318,12 @@ nfs_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
goto out;
error = vinvalbuf(vp, flags, td, 0, slptimeo);
}
+ mtx_lock(&np->n_mtx);
if (np->n_directio_asyncwr == 0)
np->n_flag &= ~NMODIFIED;
+ mtx_unlock(&np->n_mtx);
out:
- if (old_lock != LK_EXCLUSIVE) {
- if (old_lock == LK_SHARED) {
- /* Downgrade from exclusive lock, this might block */
- vn_lock(vp, LK_DOWNGRADE, td);
- } else {
- VOP_UNLOCK(vp, 0, td);
- }
- }
+ nfs_downgrade_vnlock(vp, td, old_lock);
return error;
}
@@ -1283,11 +1349,12 @@ nfs_asyncio(struct nfsmount *nmp, struct buf *bp, struct ucred *cred, struct thr
* leave the async daemons for more important rpc's (such as reads
* and writes).
*/
+ mtx_lock(&nfs_iod_mtx);
if (bp->b_iocmd == BIO_WRITE && (bp->b_flags & B_NEEDCOMMIT) &&
(nmp->nm_bufqiods > nfs_numasync / 2)) {
+ mtx_unlock(&nfs_iod_mtx);
return(EIO);
}
-
again:
if (nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
@@ -1350,12 +1417,15 @@ again:
NFS_DPF(ASYNCIO,
("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
nmp->nm_bufqwant = TRUE;
- error = nfs_tsleep(td, &nmp->nm_bufq, slpflag | PRIBIO,
+ error = nfs_msleep(td, &nmp->nm_bufq, &nfs_iod_mtx,
+ slpflag | PRIBIO,
"nfsaio", slptimeo);
if (error) {
error2 = nfs_sigintr(nmp, NULL, td);
- if (error2)
+ if (error2) {
+ mtx_unlock(&nfs_iod_mtx);
return (error2);
+ }
if (slpflag == PCATCH) {
slpflag = 0;
slptimeo = 2 * hz;
@@ -1385,11 +1455,17 @@ again:
BUF_KERNPROC(bp);
TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
nmp->nm_bufqlen++;
- if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE)
+ if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
+ mtx_lock(&(VTONFS(bp->b_vp))->n_mtx);
VTONFS(bp->b_vp)->n_directio_asyncwr++;
+ mtx_unlock(&(VTONFS(bp->b_vp))->n_mtx);
+ }
+ mtx_unlock(&nfs_iod_mtx);
return (0);
}
+ mtx_unlock(&nfs_iod_mtx);
+
/*
* All the iods are busy on other mounts, so return EIO to
* force the caller to process the i/o synchronously.
@@ -1415,12 +1491,13 @@ nfs_doio_directwrite(struct buf *bp)
free(uiop, M_NFSDIRECTIO);
if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
struct nfsnode *np = VTONFS(bp->b_vp);
-
+ mtx_lock(&np->n_mtx);
np->n_directio_asyncwr--;
if ((np->n_flag & NFSYNCWAIT) && np->n_directio_asyncwr == 0) {
np->n_flag &= ~NFSYNCWAIT;
wakeup((caddr_t)&np->n_directio_asyncwr);
}
+ mtx_unlock(&np->n_mtx);
}
vdrop(bp->b_vp);
bp->b_vp = NULL;
@@ -1441,7 +1518,8 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
struct uio uio;
struct iovec io;
struct proc *p = td ? td->td_proc : NULL;
-
+ uint8_t iocmd;
+
np = VTONFS(vp);
nmp = VFSTONFS(vp->v_mount);
uiop = &uio;
@@ -1459,8 +1537,8 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
bp->b_ioflags &= ~BIO_ERROR;
KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp));
-
- if (bp->b_iocmd == BIO_READ) {
+ iocmd = bp->b_iocmd;
+ if (iocmd == BIO_READ) {
io.iov_len = uiop->uio_resid = bp->b_bcount;
io.iov_base = bp->b_data;
uiop->uio_rw = UIO_READ;
@@ -1490,11 +1568,15 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
}
}
/* ASSERT_VOP_LOCKED(vp, "nfs_doio"); */
- if (p && (vp->v_vflag & VV_TEXT) &&
- (NFS_TIMESPEC_COMPARE(&np->n_mtime, &np->n_vattr.va_mtime))) {
- PROC_LOCK(p);
- killproc(p, "text file modification");
- PROC_UNLOCK(p);
+ if (p && (vp->v_vflag & VV_TEXT)) {
+ mtx_lock(&np->n_mtx);
+ if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &np->n_vattr.va_mtime)) {
+ mtx_unlock(&np->n_mtx);
+ PROC_LOCK(p);
+ killproc(p, "text file modification");
+ PROC_UNLOCK(p);
+ } else
+ mtx_unlock(&np->n_mtx);
}
break;
case VLNK:
@@ -1524,7 +1606,7 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
bp->b_flags |= B_INVAL;
break;
default:
- printf("nfs_doio: type %x unexpected\n", vp->v_type);
+ nfs_printf("nfs_doio: type %x unexpected\n", vp->v_type);
break;
};
if (error) {
@@ -1558,9 +1640,10 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
/*
* Setup for actual write
*/
-
+ mtx_lock(&np->n_mtx);
if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
+ mtx_unlock(&np->n_mtx);
if (bp->b_dirtyend > bp->b_dirtyoff) {
io.iov_len = uiop->uio_resid = bp->b_dirtyend
@@ -1635,7 +1718,9 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
if (error) {
bp->b_ioflags |= BIO_ERROR;
bp->b_error = np->n_error = error;
+ mtx_lock(&np->n_mtx);
np->n_flag |= NWRITEERR;
+ mtx_unlock(&np->n_mtx);
}
bp->b_dirtyoff = bp->b_dirtyend = 0;
}
@@ -1664,13 +1749,16 @@ int
nfs_meta_setsize(struct vnode *vp, struct ucred *cred, struct thread *td, u_quad_t nsize)
{
struct nfsnode *np = VTONFS(vp);
- u_quad_t tsize = np->n_size;
+ u_quad_t tsize;
int biosize = vp->v_mount->mnt_stat.f_iosize;
int error = 0;
+ mtx_lock(&np->n_mtx);
+ tsize = np->n_size;
np->n_size = nsize;
+ mtx_unlock(&np->n_mtx);
- if (np->n_size < tsize) {
+ if (nsize < tsize) {
struct buf *bp;
daddr_t lbn;
int bufsize;
diff --git a/sys/nfsclient/nfs_nfsiod.c b/sys/nfsclient/nfs_nfsiod.c
index 4317989..f38dffe 100644
--- a/sys/nfsclient/nfs_nfsiod.c
+++ b/sys/nfsclient/nfs_nfsiod.c
@@ -102,17 +102,22 @@ sysctl_iodmin(SYSCTL_HANDLER_ARGS)
error = sysctl_handle_int(oidp, &newmin, 0, req);
if (error || (req->newptr == NULL))
return (error);
- if (newmin > nfs_iodmax)
- return (EINVAL);
+ mtx_lock(&nfs_iod_mtx);
+ if (newmin > nfs_iodmax) {
+ error = EINVAL;
+ goto out;
+ }
nfs_iodmin = newmin;
if (nfs_numasync >= nfs_iodmin)
- return (0);
+ goto out;
/*
* If the current number of nfsiod is lower
* than the new minimum, create some more.
*/
for (i = nfs_iodmin - nfs_numasync; i > 0; i--)
nfs_nfsiodnew();
+out:
+ mtx_unlock(&nfs_iod_mtx);
return (0);
}
SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmin, CTLTYPE_UINT | CTLFLAG_RW, 0,
@@ -131,9 +136,10 @@ sysctl_iodmax(SYSCTL_HANDLER_ARGS)
return (error);
if (newmax > NFS_MAXASYNCDAEMON)
return (EINVAL);
+ mtx_lock(&nfs_iod_mtx);
nfs_iodmax = newmax;
if (nfs_numasync <= nfs_iodmax)
- return (0);
+ goto out;
/*
* If there are some asleep nfsiods that should
* exit, wakeup() them so that they check nfs_iodmax
@@ -146,6 +152,8 @@ sysctl_iodmax(SYSCTL_HANDLER_ARGS)
wakeup(&nfs_iodwant[iod]);
iod--;
}
+out:
+ mtx_unlock(&nfs_iod_mtx);
return (0);
}
SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmax, CTLTYPE_UINT | CTLFLAG_RW, 0,
@@ -168,8 +176,10 @@ nfs_nfsiodnew(void)
}
if (newiod == -1)
return (-1);
+ mtx_unlock(&nfs_iod_mtx);
error = kthread_create(nfssvc_iod, nfs_asyncdaemon + i, NULL, RFHIGHPID,
0, "nfsiod %d", newiod);
+ mtx_lock(&nfs_iod_mtx);
if (error)
return (-1);
nfs_numasync++;
@@ -183,6 +193,7 @@ nfsiod_setup(void *dummy)
int error;
TUNABLE_INT_FETCH("vfs.nfs.iodmin", &nfs_iodmin);
+ mtx_lock(&nfs_iod_mtx);
/* Silently limit the start number of nfsiod's */
if (nfs_iodmin > NFS_MAXASYNCDAEMON)
nfs_iodmin = NFS_MAXASYNCDAEMON;
@@ -192,6 +203,7 @@ nfsiod_setup(void *dummy)
if (error == -1)
panic("nfsiod_setup: nfs_nfsiodnew failed");
}
+ mtx_unlock(&nfs_iod_mtx);
}
SYSINIT(nfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL);
@@ -211,7 +223,7 @@ nfssvc_iod(void *instance)
int myiod, timo;
int error = 0;
- mtx_lock(&Giant);
+ mtx_lock(&nfs_iod_mtx);
myiod = (int *)instance - nfs_asyncdaemon;
/*
* Main loop
@@ -230,7 +242,7 @@ nfssvc_iod(void *instance)
* Always keep at least nfs_iodmin kthreads.
*/
timo = (myiod < nfs_iodmin) ? 0 : nfs_iodmaxidle * hz;
- error = tsleep(&nfs_iodwant[myiod], PWAIT | PCATCH,
+ error = msleep(&nfs_iodwant[myiod], &nfs_iod_mtx, PWAIT | PCATCH,
"-", timo);
}
if (error)
@@ -243,6 +255,7 @@ nfssvc_iod(void *instance)
nmp->nm_bufqwant = 0;
wakeup(&nmp->nm_bufq);
}
+ mtx_unlock(&nfs_iod_mtx);
if (bp->b_flags & B_DIRECT) {
KASSERT((bp->b_iocmd == BIO_WRITE), ("nfscvs_iod: BIO_WRITE not set"));
(void)nfs_doio_directwrite(bp);
@@ -252,7 +265,7 @@ nfssvc_iod(void *instance)
else
(void) nfs_doio(bp->b_vp, bp, bp->b_wcred, NULL);
}
-
+ mtx_lock(&nfs_iod_mtx);
/*
* If there are more than one iod on this mount, then defect
* so that the iods can be shared out fairly between the mounts
@@ -276,7 +289,7 @@ finish:
/* Someone may be waiting for the last nfsiod to terminate. */
if (--nfs_numasync == 0)
wakeup(&nfs_numasync);
- mtx_unlock(&Giant);
+ mtx_unlock(&nfs_iod_mtx);
if ((error == 0) || (error == EWOULDBLOCK))
kthread_exit(0);
/* Abnormal termination */
diff --git a/sys/nfsclient/nfs_node.c b/sys/nfsclient/nfs_node.c
index ada16f4..b0a958a 100644
--- a/sys/nfsclient/nfs_node.c
+++ b/sys/nfsclient/nfs_node.c
@@ -164,6 +164,7 @@ nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp)
np->n_fhp = &np->n_fh;
bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
np->n_fhsize = fhsize;
+ mtx_init(&np->n_mtx, "NFSnode lock", NULL, MTX_DEF);
*npp = np;
return (0);
@@ -234,7 +235,7 @@ nfs_reclaim(struct vop_reclaim_args *ap)
if (np->n_fhsize > NFS_SMALLFH) {
FREE((caddr_t)np->n_fhp, M_NFSBIGFH);
}
-
+ mtx_destroy(&np->n_mtx);
uma_zfree(nfsnode_zone, vp->v_data);
vp->v_data = NULL;
return (0);
diff --git a/sys/nfsclient/nfs_socket.c b/sys/nfsclient/nfs_socket.c
index cd3fc83..d2fd025 100644
--- a/sys/nfsclient/nfs_socket.c
+++ b/sys/nfsclient/nfs_socket.c
@@ -115,7 +115,7 @@ static int nfs_realign_test;
static int nfs_realign_count;
static int nfs_bufpackets = 4;
static int nfs_reconnects;
-static int nfs3_jukebox_delay = 10;
+static int nfs3_jukebox_delay = 10;
SYSCTL_DECL(_vfs_nfs);
@@ -125,8 +125,7 @@ SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, "");
SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0,
"number of times the nfs client has had to reconnect");
SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0,
- "number of seconds to delay a retry after receiving EJUKEBOX");
-
+ "number of seconds to delay a retry after receiving EJUKEBOX");
/*
* There is a congestion window for outstanding rpcs maintained per mount
@@ -154,10 +153,8 @@ static void nfs_softterm(struct nfsreq *rep);
static int nfs_reconnect(struct nfsreq *rep);
static void nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag);
static void nfs_clnt_udp_soupcall(struct socket *so, void *arg, int waitflag);
-static void wakeup_nfsreq(struct nfsreq *req);
extern struct mtx nfs_reqq_mtx;
-extern struct mtx nfs_reply_mtx;
/*
* Initialize sockets and congestion for a new NFS connection.
@@ -172,13 +169,13 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
struct sockaddr *saddr;
struct thread *td = &thread0; /* only used for socreate and sobind */
- NET_ASSERT_GIANT();
+ NET_LOCK_GIANT();
if (nmp->nm_sotype == SOCK_STREAM) {
- mtx_lock(&nmp->nm_nfstcpstate.mtx);
+ mtx_lock(&nmp->nm_mtx);
nmp->nm_nfstcpstate.flags |= NFS_TCP_EXPECT_RPCMARKER;
nmp->nm_nfstcpstate.rpcresid = 0;
- mtx_unlock(&nmp->nm_nfstcpstate.mtx);
+ mtx_unlock(&nmp->nm_mtx);
}
nmp->nm_so = NULL;
saddr = nmp->nm_nam;
@@ -243,12 +240,16 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
* Protocols that do not require connections may be optionally left
* unconnected for servers that reply from a port other than NFS_PORT.
*/
+ mtx_lock(&nmp->nm_mtx);
if (nmp->nm_flag & NFSMNT_NOCONN) {
if (nmp->nm_soflags & PR_CONNREQUIRED) {
error = ENOTCONN;
+ mtx_unlock(&nmp->nm_mtx);
goto bad;
- }
+ } else
+ mtx_unlock(&nmp->nm_mtx);
} else {
+ mtx_unlock(&nmp->nm_mtx);
error = soconnect(so, nmp->nm_nam, td);
if (error)
goto bad;
@@ -290,7 +291,7 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
pktscale = 2;
if (pktscale > 64)
pktscale = 64;
-
+ mtx_lock(&nmp->nm_mtx);
if (nmp->nm_sotype == SOCK_DGRAM) {
sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
@@ -313,7 +314,9 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
sopt.sopt_val = &val;
sopt.sopt_valsize = sizeof val;
val = 1;
+ mtx_unlock(&nmp->nm_mtx);
sosetopt(so, &sopt);
+ mtx_lock(&nmp->nm_mtx);
}
if (so->so_proto->pr_protocol == IPPROTO_TCP) {
struct sockopt sopt;
@@ -326,13 +329,16 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
sopt.sopt_val = &val;
sopt.sopt_valsize = sizeof val;
val = 1;
+ mtx_unlock(&nmp->nm_mtx);
sosetopt(so, &sopt);
+ mtx_lock(&nmp->nm_mtx);
}
sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
sizeof (u_int32_t)) * pktscale;
rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
sizeof (u_int32_t)) * pktscale;
}
+ mtx_unlock(&nmp->nm_mtx);
error = soreserve(so, sndreserve, rcvreserve);
if (error)
goto bad;
@@ -349,6 +355,7 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
so->so_snd.sb_flags |= SB_NOINTR;
SOCKBUF_UNLOCK(&so->so_snd);
+ mtx_lock(&nmp->nm_mtx);
/* Initialize other non-zero congestion variables */
nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
nmp->nm_srtt[3] = (NFS_TIMEO << 3);
@@ -357,10 +364,13 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
nmp->nm_sent = 0;
nmp->nm_timeouts = 0;
+ mtx_unlock(&nmp->nm_mtx);
+ NET_UNLOCK_GIANT();
return (0);
bad:
nfs_disconnect(nmp);
+ NET_UNLOCK_GIANT();
return (error);
}
@@ -387,7 +397,9 @@ nfs_reconnect(struct nfsreq *rep)
error = EINTR;
if (error == EIO || error == EINTR)
return (error);
+ mtx_lock(&Giant);
(void) tsleep(&lbolt, PSOCK, "nfscon", 0);
+ mtx_unlock(&Giant);
}
/*
@@ -399,9 +411,10 @@ nfs_reconnect(struct nfsreq *rep)
* until the connection is established successfully, and
* then re-transmit the request.
*/
- mtx_lock(&nmp->nm_nfstcpstate.mtx);
+ mtx_lock(&nmp->nm_mtx);
nmp->nm_nfstcpstate.flags &= ~NFS_TCP_FORCE_RECONNECT;
- mtx_unlock(&nmp->nm_nfstcpstate.mtx);
+ nmp->nm_nfstcpstate.rpcresid = 0;
+ mtx_unlock(&nmp->nm_mtx);
/*
* Loop through outstanding request list and fix up all requests
@@ -409,8 +422,11 @@ nfs_reconnect(struct nfsreq *rep)
*/
mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
- if (rp->r_nmp == nmp)
+ if (rp->r_nmp == nmp) {
+ mtx_lock(&rp->r_mtx);
rp->r_flags |= R_MUSTRESEND;
+ mtx_unlock(&rp->r_mtx);
+ }
}
mtx_unlock(&nfs_reqq_mtx);
return (0);
@@ -426,9 +442,11 @@ nfs_disconnect(struct nfsmount *nmp)
NET_ASSERT_GIANT();
+ mtx_lock(&nmp->nm_mtx);
if (nmp->nm_so) {
so = nmp->nm_so;
nmp->nm_so = NULL;
+ mtx_unlock(&nmp->nm_mtx);
SOCKBUF_LOCK(&so->so_rcv);
so->so_upcallarg = NULL;
so->so_upcall = NULL;
@@ -436,7 +454,8 @@ nfs_disconnect(struct nfsmount *nmp)
SOCKBUF_UNLOCK(&so->so_rcv);
soshutdown(so, SHUT_WR);
soclose(so);
- }
+ } else
+ mtx_unlock(&nmp->nm_mtx);
}
void
@@ -463,22 +482,29 @@ nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top,
struct sockaddr *sendnam;
int error, error2, soflags, flags;
- NET_ASSERT_GIANT();
+ NET_LOCK_GIANT();
KASSERT(rep, ("nfs_send: called with rep == NULL"));
error = nfs_sigintr(rep->r_nmp, rep, rep->r_td);
if (error) {
m_freem(top);
- return (error);
+ goto out;
}
+ mtx_lock(&rep->r_nmp->nm_mtx);
+ mtx_lock(&rep->r_mtx);
if ((so = rep->r_nmp->nm_so) == NULL) {
rep->r_flags |= R_MUSTRESEND;
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&rep->r_nmp->nm_mtx);
m_freem(top);
- return (0);
+ error = 0;
+ goto out;
}
rep->r_flags &= ~R_MUSTRESEND;
soflags = rep->r_nmp->nm_soflags;
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&rep->r_nmp->nm_mtx);
if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
sendnam = NULL;
@@ -493,7 +519,9 @@ nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top,
flags, curthread /*XXX*/);
if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
error = 0;
+ mtx_lock(&rep->r_mtx);
rep->r_flags |= R_MUSTRESEND;
+ mtx_unlock(&rep->r_mtx);
}
if (error) {
@@ -513,8 +541,11 @@ nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top,
error2 = NFS_SIGREP(rep);
if (error2)
error = error2;
- else
+ else {
+ mtx_lock(&rep->r_mtx);
rep->r_flags |= R_MUSTRESEND;
+ mtx_unlock(&rep->r_mtx);
+ }
/*
* Handle any recoverable (soft) socket errors here. (?)
@@ -523,6 +554,8 @@ nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top,
error != EWOULDBLOCK && error != EPIPE)
error = 0;
}
+out:
+ NET_UNLOCK_GIANT();
return (error);
}
@@ -533,7 +566,7 @@ nfs_reply(struct nfsreq *rep)
register struct mbuf *m;
int error = 0, sotype, slpflag;
- NET_ASSERT_GIANT();
+ NET_LOCK_GIANT();
sotype = rep->r_nmp->nm_sotype;
/*
@@ -543,30 +576,39 @@ nfs_reply(struct nfsreq *rep)
if (sotype != SOCK_DGRAM) {
error = nfs_sndlock(rep);
if (error)
- return (error);
+ goto out;
tryagain:
+ mtx_lock(&rep->r_nmp->nm_mtx);
+ mtx_lock(&rep->r_mtx);
if (rep->r_mrep) {
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&rep->r_nmp->nm_mtx);
nfs_sndunlock(rep);
- return (0);
+ error = 0;
+ goto out;
}
if (rep->r_flags & R_SOFTTERM) {
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&rep->r_nmp->nm_mtx);
nfs_sndunlock(rep);
- return (EINTR);
+ error = EINTR;
+ goto out;
}
so = rep->r_nmp->nm_so;
- mtx_lock(&rep->r_nmp->nm_nfstcpstate.mtx);
if (!so ||
(rep->r_nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT)) {
- mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx);
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&rep->r_nmp->nm_mtx);
error = nfs_reconnect(rep);
if (error) {
nfs_sndunlock(rep);
- return (error);
+ goto out;
}
goto tryagain;
- } else
- mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx);
+ }
while (rep->r_flags & R_MUSTRESEND) {
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&rep->r_nmp->nm_mtx);
m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
nfsstats.rpcretries++;
error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
@@ -574,41 +616,59 @@ tryagain:
if (error == EINTR || error == ERESTART ||
(error = nfs_reconnect(rep)) != 0) {
nfs_sndunlock(rep);
- return (error);
+ goto out;
}
goto tryagain;
}
+ mtx_lock(&rep->r_nmp->nm_mtx);
+ mtx_lock(&rep->r_mtx);
}
+ mtx_unlock(&rep->r_nmp->nm_mtx);
+ mtx_unlock(&rep->r_mtx);
nfs_sndunlock(rep);
}
slpflag = 0;
+ mtx_lock(&rep->r_nmp->nm_mtx);
if (rep->r_nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
- mtx_lock(&nfs_reply_mtx);
+ mtx_unlock(&rep->r_nmp->nm_mtx);
+ mtx_lock(&rep->r_mtx);
while ((rep->r_mrep == NULL) && (error == 0) &&
((rep->r_flags & R_SOFTTERM) == 0) &&
((sotype == SOCK_DGRAM) || ((rep->r_flags & R_MUSTRESEND) == 0)))
- error = msleep((caddr_t)rep, &nfs_reply_mtx,
+ error = msleep((caddr_t)rep, &rep->r_mtx,
slpflag | (PZERO - 1), "nfsreq", 0);
- mtx_unlock(&nfs_reply_mtx);
- if (error == EINTR || error == ERESTART)
+ if (error == EINTR || error == ERESTART) {
/* NFS operations aren't restartable. Map ERESTART to EINTR */
- return (EINTR);
- if (rep->r_flags & R_SOFTTERM)
+ error = EINTR;
+ mtx_unlock(&rep->r_mtx);
+ goto out;
+ }
+ if (rep->r_flags & R_SOFTTERM) {
/* Request was terminated because we exceeded the retries (soft mount) */
- return (ETIMEDOUT);
+ error = ETIMEDOUT;
+ mtx_unlock(&rep->r_mtx);
+ goto out;
+ }
+ mtx_unlock(&rep->r_mtx);
if (sotype == SOCK_STREAM) {
- mtx_lock(&rep->r_nmp->nm_nfstcpstate.mtx);
+ mtx_lock(&rep->r_nmp->nm_mtx);
+ mtx_lock(&rep->r_mtx);
if (((rep->r_nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT) ||
(rep->r_flags & R_MUSTRESEND))) {
- mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx);
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&rep->r_nmp->nm_mtx);
error = nfs_sndlock(rep);
if (error)
- return (error);
+ goto out;
goto tryagain;
- } else
- mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx);
+ } else {
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&rep->r_nmp->nm_mtx);
+ }
}
+out:
+ NET_UNLOCK_GIANT();
return (error);
}
@@ -660,6 +720,8 @@ nfsmout:
* Iff no match, just drop the datagram
*/
TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
+ mtx_lock(&nmp->nm_mtx);
+ mtx_lock(&rep->r_mtx);
if (rep->r_mrep == NULL && rxid == rep->r_xid) {
/* Found it.. */
rep->r_mrep = mrep;
@@ -703,8 +765,13 @@ nfsmout:
NFS_SDRTT(rep) += t1;
}
nmp->nm_timeouts = 0;
+ wakeup((caddr_t)rep);
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&nmp->nm_mtx);
break;
}
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&nmp->nm_mtx);
}
/*
* If not matched to a request, drop it.
@@ -713,31 +780,18 @@ nfsmout:
if (rep == 0) {
nfsstats.rpcunexpected++;
m_freem(mrep);
- } else
- wakeup_nfsreq(rep);
+ }
mtx_unlock(&nfs_reqq_mtx);
}
-/*
- * The wakeup of the requestor should be done under the mutex
- * to avoid potential missed wakeups.
- */
-static void
-wakeup_nfsreq(struct nfsreq *req)
-{
- mtx_lock(&nfs_reply_mtx);
- wakeup((caddr_t)req);
- mtx_unlock(&nfs_reply_mtx);
-}
-
static void
nfs_mark_for_reconnect(struct nfsmount *nmp)
{
struct nfsreq *rp;
- mtx_lock(&nmp->nm_nfstcpstate.mtx);
+ mtx_lock(&nmp->nm_mtx);
nmp->nm_nfstcpstate.flags |= NFS_TCP_FORCE_RECONNECT;
- mtx_unlock(&nmp->nm_nfstcpstate.mtx);
+ mtx_unlock(&nmp->nm_mtx);
/*
* Wakeup all processes that are waiting for replies
* on this mount point. One of them does the reconnect.
@@ -745,8 +799,10 @@ nfs_mark_for_reconnect(struct nfsmount *nmp)
mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
if (rp->r_nmp == nmp) {
+ mtx_lock(&rp->r_mtx);
rp->r_flags |= R_MUSTRESEND;
- wakeup_nfsreq(rp);
+ wakeup((caddr_t)rp);
+ mtx_unlock(&rp->r_mtx);
}
}
mtx_unlock(&nfs_reqq_mtx);
@@ -795,19 +851,21 @@ nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag)
* Don't pick any more data from the socket if we've marked the
* mountpoint for reconnect.
*/
- mtx_lock(&nmp->nm_nfstcpstate.mtx);
+ mtx_lock(&nmp->nm_mtx);
if (nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT) {
- mtx_unlock(&nmp->nm_nfstcpstate.mtx);
+ mtx_unlock(&nmp->nm_mtx);
return;
} else
- mtx_unlock(&nmp->nm_nfstcpstate.mtx);
+ mtx_unlock(&nmp->nm_mtx);
auio.uio_td = curthread;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = UIO_READ;
for ( ; ; ) {
+ mtx_lock(&nmp->nm_mtx);
if (nmp->nm_nfstcpstate.flags & NFS_TCP_EXPECT_RPCMARKER) {
int resid;
+ mtx_unlock(&nmp->nm_mtx);
if (!nfstcp_marker_readable(so)) {
/* Marker is not readable */
return;
@@ -864,14 +922,20 @@ nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag)
nmp->nm_mountp->mnt_stat.f_mntfromname);
goto mark_reconnect;
}
+ mtx_lock(&nmp->nm_mtx);
nmp->nm_nfstcpstate.rpcresid = len;
nmp->nm_nfstcpstate.flags &= ~(NFS_TCP_EXPECT_RPCMARKER);
- }
+ mtx_unlock(&nmp->nm_mtx);
+ } else
+ mtx_unlock(&nmp->nm_mtx);
+
/*
* Processed RPC marker or no RPC marker to process.
* Pull in and process data.
*/
+ mtx_lock(&nmp->nm_mtx);
if (nmp->nm_nfstcpstate.rpcresid > 0) {
+ mtx_unlock(&nmp->nm_mtx);
if (!nfstcp_readable(so, nmp->nm_nfstcpstate.rpcresid)) {
/* All data not readable */
return;
@@ -894,11 +958,14 @@ nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag)
}
if (mp == NULL)
panic("nfs_clnt_tcp_soupcall: Got empty mbuf chain from sorecv\n");
+ mtx_lock(&nmp->nm_mtx);
nmp->nm_nfstcpstate.rpcresid = 0;
nmp->nm_nfstcpstate.flags |= NFS_TCP_EXPECT_RPCMARKER;
+ mtx_unlock(&nmp->nm_mtx);
/* We got the entire RPC reply. Match XIDs and wake up requestor */
nfs_clnt_match_xid(so, nmp, mp);
- }
+ } else
+ mtx_unlock(&nmp->nm_mtx);
}
mark_reconnect:
@@ -953,7 +1020,7 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
struct mbuf *m, *md, *mheadend;
time_t waituntil;
caddr_t dpos;
- int s, error = 0, mrest_len, auth_len, auth_type;
+ int error = 0, mrest_len, auth_len, auth_type;
struct timeval now;
u_int32_t *xidp;
@@ -966,11 +1033,12 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
if ((nmp->nm_flag & NFSMNT_NFSV4) != 0)
return nfs4_request(vp, mrest, procnum, td, cred, mrp, mdp, dposp);
MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
- rep->r_mrep = rep->r_md = NULL;
+ bzero(rep, sizeof(struct nfsreq));
rep->r_nmp = nmp;
rep->r_vp = vp;
rep->r_td = td;
rep->r_procnum = procnum;
+ mtx_init(&rep->r_mtx, "NFSrep lock", NULL, MTX_DEF);
getmicrouptime(&now);
rep->r_lastmsg = now.tv_sec -
@@ -1019,7 +1087,6 @@ tryagain:
* Chain request into list of outstanding requests. Be sure
* to put it LAST so timer finds oldest requests first.
*/
- s = splsoftclock();
mtx_lock(&nfs_reqq_mtx);
if (TAILQ_EMPTY(&nfs_reqq))
callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL);
@@ -1031,10 +1098,11 @@ tryagain:
* send this one now but let timer do it. If not timing a request,
* do it now.
*/
+ mtx_lock(&nmp->nm_mtx);
if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
(nmp->nm_flag & NFSMNT_DUMBTIMR) ||
nmp->nm_sent < nmp->nm_cwnd)) {
- splx(s);
+ mtx_unlock(&nmp->nm_mtx);
error = nfs_sndlock(rep);
if (!error) {
m2 = m_copym(m, 0, M_COPYALL, M_TRYWAIT);
@@ -1047,12 +1115,14 @@ tryagain:
* blocking on nfs_send() too long, so check for R_SENT here.
*/
if (!error && (rep->r_flags & (R_SENT | R_MUSTRESEND)) == 0) {
+ mtx_lock(&nmp->nm_mtx);
nmp->nm_sent += NFS_CWNDSCALE;
+ mtx_unlock(&nmp->nm_mtx);
rep->r_flags |= R_SENT;
}
mtx_unlock(&nfs_reqq_mtx);
} else {
- splx(s);
+ mtx_unlock(&nmp->nm_mtx);
rep->r_rtt = -1;
}
@@ -1065,7 +1135,6 @@ tryagain:
/*
* RPC done, unlink the request.
*/
- s = splsoftclock();
mtx_lock(&nfs_reqq_mtx);
/*
* nfs_timer() may be in the process of re-transmitting this request.
@@ -1086,10 +1155,11 @@ tryagain:
*/
if (rep->r_flags & R_SENT) {
rep->r_flags &= ~R_SENT; /* paranoia */
+ mtx_lock(&nmp->nm_mtx);
nmp->nm_sent -= NFS_CWNDSCALE;
+ mtx_unlock(&nmp->nm_mtx);
}
mtx_unlock(&nfs_reqq_mtx);
- splx(s);
/*
* If there was a successful reply and a tprintf msg.
@@ -1113,6 +1183,7 @@ tryagain:
if (rep->r_mrep != NULL)
m_freem(rep->r_mrep);
m_freem(rep->r_mreq);
+ mtx_destroy(&rep->r_mtx);
free((caddr_t)rep, M_NFSREQ);
return (error);
}
@@ -1131,6 +1202,7 @@ tryagain:
error = EACCES;
m_freem(mrep);
m_freem(rep->r_mreq);
+ mtx_destroy(&rep->r_mtx);
free((caddr_t)rep, M_NFSREQ);
return (error);
}
@@ -1153,12 +1225,16 @@ tryagain:
m_freem(mrep);
error = 0;
waituntil = time_second + nfs3_jukebox_delay;
- while (time_second < waituntil)
- (void) tsleep(&lbolt,
- PSOCK, "nqnfstry", 0);
+ while (time_second < waituntil) {
+ mtx_lock(&Giant);
+ (void) tsleep(&lbolt, PSOCK, "nqnfstry", 0);
+ mtx_unlock(&Giant);
+ }
+ mtx_lock(&nfs_reqq_mtx);
if (++nfs_xid == 0)
nfs_xid++;
rep->r_xid = *xidp = txdr_unsigned(nfs_xid);
+ mtx_unlock(&nfs_reqq_mtx);
goto tryagain;
}
@@ -1176,6 +1252,7 @@ tryagain:
} else
m_freem(mrep);
m_freem(rep->r_mreq);
+ mtx_destroy(&rep->r_mtx);
free((caddr_t)rep, M_NFSREQ);
return (error);
}
@@ -1184,6 +1261,7 @@ tryagain:
*mdp = md;
*dposp = dpos;
m_freem(rep->r_mreq);
+ mtx_destroy(&rep->r_mtx);
FREE((caddr_t)rep, M_NFSREQ);
return (0);
}
@@ -1191,6 +1269,7 @@ tryagain:
error = EPROTONOSUPPORT;
nfsmout:
m_freem(rep->r_mreq);
+ mtx_destroy(&rep->r_mtx);
free((caddr_t)rep, M_NFSREQ);
return (error);
}
@@ -1215,34 +1294,34 @@ nfs_timer(void *arg)
struct socket *so;
struct nfsmount *nmp;
int timeo;
- int s, error;
+ int error;
struct timeval now;
getmicrouptime(&now);
- s = splnet();
mtx_lock(&Giant); /* nfs_down -> tprintf */
mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
nmp = rep->r_nmp;
- if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
+ mtx_lock(&rep->r_mtx);
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
+ mtx_unlock(&rep->r_mtx);
continue;
+ } else
+ mtx_unlock(&rep->r_mtx);
if (nfs_sigintr(nmp, rep, rep->r_td))
continue;
+ mtx_lock(&nmp->nm_mtx);
+ mtx_lock(&rep->r_mtx);
if (nmp->nm_tprintf_initial_delay != 0 &&
(rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) &&
rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) {
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&nmp->nm_mtx);
rep->r_lastmsg = now.tv_sec;
nfs_down(rep, nmp, rep->r_td, "not responding",
- 0, NFSSTA_TIMEO);
-#if 0
- if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
- /* we're not yet completely mounted and */
- /* we can't complete an RPC, so we fail */
- nfsstats.rpctimeouts++;
- nfs_softterm(rep);
- continue;
- }
-#endif
+ 0, NFSSTA_TIMEO);
+ mtx_lock(&nmp->nm_mtx);
+ mtx_lock(&rep->r_mtx);
}
if (rep->r_rtt >= 0) {
rep->r_rtt++;
@@ -1252,14 +1331,19 @@ nfs_timer(void *arg)
timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
if (nmp->nm_timeouts > 0)
timeo *= nfs_backoff[nmp->nm_timeouts - 1];
- if (rep->r_rtt <= timeo)
+ if (rep->r_rtt <= timeo) {
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&nmp->nm_mtx);
continue;
+ }
if (nmp->nm_timeouts < NFS_NBACKOFF)
nmp->nm_timeouts++;
}
if (rep->r_rexmit >= rep->r_retry) { /* too many */
nfsstats.rpctimeouts++;
nfs_softterm(rep);
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&nmp->nm_mtx);
continue;
}
if (nmp->nm_sotype != SOCK_DGRAM) {
@@ -1272,12 +1356,17 @@ nfs_timer(void *arg)
* if necessary.
*/
rep->r_flags |= R_MUSTRESEND;
- wakeup_nfsreq(rep);
+ wakeup((caddr_t)rep);
rep->r_rtt = 0;
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&nmp->nm_mtx);
continue;
}
- if ((so = nmp->nm_so) == NULL)
+ if ((so = nmp->nm_so) == NULL) {
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&nmp->nm_mtx);
continue;
+ }
/*
* If there is enough space and the window allows..
* Resend it
@@ -1285,57 +1374,69 @@ nfs_timer(void *arg)
*/
rep->r_rtt = -1;
if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
- ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
- (rep->r_flags & R_SENT) ||
- nmp->nm_sent < nmp->nm_cwnd) &&
- (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) {
- /*
- * Mark the request to indicate that a XMIT is in progress
- * to prevent the req structure being removed in nfs_request().
- */
- rep->r_flags |= R_REXMIT_INPROG;
- mtx_unlock(&nfs_reqq_mtx);
- NET_LOCK_GIANT();
- if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
- error = (*so->so_proto->pr_usrreqs->pru_send)
- (so, 0, m, NULL, NULL, curthread);
- else
- error = (*so->so_proto->pr_usrreqs->pru_send)
- (so, 0, m, nmp->nm_nam, NULL, curthread);
- NET_UNLOCK_GIANT();
- mtx_lock(&nfs_reqq_mtx);
- rep->r_flags &= ~R_REXMIT_INPROG;
- wakeup((caddr_t)&rep->r_flags);
- if (error) {
- if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
- so->so_error = 0;
- rep->r_flags |= R_RESENDERR;
- } else {
+ ((nmp->nm_flag & NFSMNT_DUMBTIMR) || (rep->r_flags & R_SENT) ||
+ nmp->nm_sent < nmp->nm_cwnd)) {
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&nmp->nm_mtx);
+ if ((m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) {
/*
- * Iff first send, start timing
- * else turn timing off, backoff timer
- * and divide congestion window by 2.
+ * Mark the request to indicate that a XMIT is in
+ * progress to prevent the req structure being
+ * removed in nfs_request().
*/
- rep->r_flags &= ~R_RESENDERR;
- if (rep->r_flags & R_SENT) {
- rep->r_flags &= ~R_TIMING;
- if (++rep->r_rexmit > NFS_MAXREXMIT)
- rep->r_rexmit = NFS_MAXREXMIT;
- nmp->nm_cwnd >>= 1;
- if (nmp->nm_cwnd < NFS_CWNDSCALE)
- nmp->nm_cwnd = NFS_CWNDSCALE;
- nfsstats.rpcretries++;
+ mtx_lock(&rep->r_mtx);
+ rep->r_flags |= R_REXMIT_INPROG;
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&nfs_reqq_mtx);
+ NET_LOCK_GIANT();
+ if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
+ error = (*so->so_proto->pr_usrreqs->pru_send)
+ (so, 0, m, NULL, NULL, curthread);
+ else
+ error = (*so->so_proto->pr_usrreqs->pru_send)
+ (so, 0, m, nmp->nm_nam, NULL,
+ curthread);
+ NET_UNLOCK_GIANT();
+ mtx_lock(&nfs_reqq_mtx);
+ mtx_lock(&nmp->nm_mtx);
+ mtx_lock(&rep->r_mtx);
+ rep->r_flags &= ~R_REXMIT_INPROG;
+ wakeup((caddr_t)&rep->r_flags);
+ if (error) {
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
+ so->so_error = 0;
+ rep->r_flags |= R_RESENDERR;
} else {
- rep->r_flags |= R_SENT;
- nmp->nm_sent += NFS_CWNDSCALE;
+ /*
+ * Iff first send, start timing
+ * else turn timing off, backoff timer
+ * and divide congestion window by 2.
+ */
+ rep->r_flags &= ~R_RESENDERR;
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_TIMING;
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ nmp->nm_cwnd >>= 1;
+ if (nmp->nm_cwnd < NFS_CWNDSCALE)
+ nmp->nm_cwnd = NFS_CWNDSCALE;
+ nfsstats.rpcretries++;
+ } else {
+ rep->r_flags |= R_SENT;
+ nmp->nm_sent += NFS_CWNDSCALE;
+ }
+ rep->r_rtt = 0;
}
- rep->r_rtt = 0;
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&nmp->nm_mtx);
}
+ } else {
+ mtx_unlock(&rep->r_mtx);
+ mtx_unlock(&nmp->nm_mtx);
}
}
mtx_unlock(&nfs_reqq_mtx);
mtx_unlock(&Giant); /* nfs_down -> tprintf */
- splx(s);
callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL);
}
@@ -1349,31 +1450,33 @@ nfs_nmcancelreqs(nmp)
struct nfsmount *nmp;
{
struct nfsreq *req;
- int i, s;
+ int i;
- s = splnet();
mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
+ mtx_lock(&req->r_mtx);
if (nmp != req->r_nmp || req->r_mrep != NULL ||
- (req->r_flags & R_SOFTTERM))
+ (req->r_flags & R_SOFTTERM)) {
+ mtx_unlock(&req->r_mtx);
continue;
+ }
nfs_softterm(req);
+ mtx_unlock(&req->r_mtx);
}
mtx_unlock(&nfs_reqq_mtx);
- splx(s);
for (i = 0; i < 30; i++) {
- s = splnet();
mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
if (nmp == req->r_nmp)
break;
}
mtx_unlock(&nfs_reqq_mtx);
- splx(s);
if (req == NULL)
return (0);
+ mtx_lock(&Giant);
tsleep(&lbolt, PSOCK, "nfscancel", 0);
+ mtx_unlock(&Giant);
}
return (EBUSY);
}
@@ -1387,7 +1490,7 @@ nfs_nmcancelreqs(nmp)
static void
nfs_softterm(struct nfsreq *rep)
{
-
+ KASSERT(mtx_owned(&rep->r_mtx), ("NFS req lock not owned !"));
rep->r_flags |= R_SOFTTERM;
if (rep->r_flags & R_SENT) {
rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
@@ -1397,7 +1500,7 @@ nfs_softterm(struct nfsreq *rep)
* Request terminated, wakeup the blocked process, so that we
* can return EINTR back.
*/
- wakeup_nfsreq(rep);
+ wakeup((caddr_t)rep);
}
/*
@@ -1494,28 +1597,6 @@ nfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *
}
/*
- * NFS wrapper to tsleep(), that shoves a new p_sigmask and restores the
- * old one after tsleep() returns.
- */
-int
-nfs_tsleep(struct thread *td, void *ident, int priority, char *wmesg, int timo)
-{
- sigset_t oldset;
- int error;
- struct proc *p;
-
- if ((priority & PCATCH) == 0)
- return tsleep(ident, priority, wmesg, timo);
- if (td == NULL)
- td = curthread; /* XXX */
- nfs_set_sigmask(td, &oldset);
- error = tsleep(ident, priority, wmesg, timo);
- nfs_restore_sigmask(td, &oldset);
- p = td->td_proc;
- return (error);
-}
-
-/*
* Test for a termination condition pending on the process.
* This is used for NFSMNT_INT mounts.
*/
@@ -1524,19 +1605,28 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td)
{
struct proc *p;
sigset_t tmpset;
-
+ int error = 0;
+
if ((nmp->nm_flag & NFSMNT_NFSV4) != 0)
return nfs4_sigintr(nmp, rep, td);
- if (rep && (rep->r_flags & R_SOFTTERM))
- return (EIO);
+ if (rep) {
+ mtx_lock(&rep->r_mtx);
+ if (rep->r_flags & R_SOFTTERM) {
+ mtx_unlock(&rep->r_mtx);
+ error = EIO;
+ goto out;
+ } else
+ mtx_unlock(&rep->r_mtx);
+ }
/* Terminate all requests while attempting a forced unmount. */
- if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)
- return (EIO);
+ if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) {
+ error = EIO;
+ goto out;
+ }
if (!(nmp->nm_flag & NFSMNT_INT))
- return (0);
+ goto out;
if (td == NULL)
return (0);
-
p = td->td_proc;
PROC_LOCK(p);
tmpset = p->p_siglist;
@@ -1551,6 +1641,8 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td)
PROC_UNLOCK(p);
return (0);
+out:
+ return(error);
}
/*
@@ -1567,21 +1659,25 @@ nfs_sndlock(struct nfsreq *rep)
int error, slpflag = 0, slptimeo = 0;
td = rep->r_td;
+ mtx_lock(&rep->r_nmp->nm_mtx);
if (rep->r_nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
while (*statep & NFSSTA_SNDLOCK) {
error = nfs_sigintr(rep->r_nmp, rep, td);
- if (error)
+ if (error) {
+ mtx_unlock(&rep->r_nmp->nm_mtx);
return (error);
+ }
*statep |= NFSSTA_WANTSND;
- (void) tsleep(statep, slpflag | (PZERO - 1),
- "nfsndlck", slptimeo);
+ (void) msleep(statep, &rep->r_nmp->nm_mtx,
+ slpflag | (PZERO - 1), "nfsndlck", slptimeo);
if (slpflag == PCATCH) {
slpflag = 0;
slptimeo = 2 * hz;
}
}
*statep |= NFSSTA_SNDLOCK;
+ mtx_unlock(&rep->r_nmp->nm_mtx);
return (0);
}
@@ -1593,6 +1689,7 @@ nfs_sndunlock(struct nfsreq *rep)
{
int *statep = &rep->r_nmp->nm_state;
+ mtx_lock(&rep->r_nmp->nm_mtx);
if ((*statep & NFSSTA_SNDLOCK) == 0)
panic("nfs sndunlock");
*statep &= ~NFSSTA_SNDLOCK;
@@ -1600,6 +1697,7 @@ nfs_sndunlock(struct nfsreq *rep)
*statep &= ~NFSSTA_WANTSND;
wakeup(statep);
}
+ mtx_unlock(&rep->r_nmp->nm_mtx);
}
/*
@@ -1703,8 +1801,10 @@ nfs_down(rep, nmp, td, msg, error, flags)
nmp->nm_state |= NFSSTA_LOCKTIMEO;
}
#endif
+ mtx_lock(&rep->r_mtx);
if (rep)
rep->r_flags |= R_TPRINTFMSG;
+ mtx_unlock(&rep->r_mtx);
nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
}
@@ -1721,8 +1821,10 @@ nfs_up(rep, nmp, td, msg, flags)
if (nmp == NULL)
return;
+ mtx_lock(&rep->r_mtx);
if ((rep == NULL) || (rep->r_flags & R_TPRINTFMSG) != 0)
nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
+ mtx_unlock(&rep->r_mtx);
if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
nmp->nm_state &= ~NFSSTA_TIMEO;
vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
@@ -1736,4 +1838,3 @@ nfs_up(rep, nmp, td, msg, flags)
}
#endif
}
-
diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c
index fb67091..8556b3c 100644
--- a/sys/nfsclient/nfs_subs.c
+++ b/sys/nfsclient/nfs_subs.c
@@ -76,6 +76,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
/*
+ * Note that stdarg.h and the ANSI style va_start macro is used for both
+ * ANSI and traditional C compilers.
+ */
+#include <machine/stdarg.h>
+
+/*
* Data items converted to xdr at startup, since they are constant
* This is kinda hokey, but may save a little time doing byte swaps
*/
@@ -95,7 +101,6 @@ int nfs_pbuf_freecnt = -1; /* start out unlimited */
struct nfs_reqq nfs_reqq;
struct mtx nfs_reqq_mtx;
-struct mtx nfs_reply_mtx;
struct nfs_bufq nfs_bufq;
/*
@@ -182,6 +187,7 @@ nfsm_rpchead(struct ucred *cr, int nmflag, int procid, int auth_type,
*/
tl = nfsm_build(u_int32_t *, 8 * NFSX_UNSIGNED);
+ mtx_lock(&nfs_reqq_mtx);
/* Get a pretty random xid to start with */
if (!nfs_xid)
nfs_xid = random();
@@ -193,6 +199,7 @@ nfsm_rpchead(struct ucred *cr, int nmflag, int procid, int auth_type,
*xidpp = tl;
*tl++ = txdr_unsigned(nfs_xid);
+ mtx_unlock(&nfs_reqq_mtx);
*tl++ = rpc_call;
*tl++ = rpc_vers;
*tl++ = txdr_unsigned(NFS_PROG);
@@ -416,7 +423,7 @@ nfs_init(struct vfsconf *vfsp)
TAILQ_INIT(&nfs_reqq);
callout_init(&nfs_callout, CALLOUT_MPSAFE);
mtx_init(&nfs_reqq_mtx, "NFS reqq lock", NULL, MTX_DEF);
- mtx_init(&nfs_reply_mtx, "Synch NFS reply posting", NULL, MTX_DEF);
+ mtx_init(&nfs_iod_mtx, "NFS iod lock", NULL, MTX_DEF);
nfs_pbuf_freecnt = nswbuf / 2 + 1;
@@ -437,19 +444,80 @@ nfs_uninit(struct vfsconf *vfsp)
* Tell all nfsiod processes to exit. Clear nfs_iodmax, and wakeup
* any sleeping nfsiods so they check nfs_iodmax and exit.
*/
+ mtx_lock(&nfs_iod_mtx);
nfs_iodmax = 0;
for (i = 0; i < nfs_numasync; i++)
if (nfs_iodwant[i])
wakeup(&nfs_iodwant[i]);
/* The last nfsiod to exit will wake us up when nfs_numasync hits 0 */
while (nfs_numasync)
- tsleep(&nfs_numasync, PWAIT, "ioddie", 0);
-
+ msleep(&nfs_numasync, &nfs_iod_mtx, PWAIT, "ioddie", 0);
+ mtx_unlock(&nfs_iod_mtx);
nfs_nhuninit();
uma_zdestroy(nfsmount_zone);
return (0);
}
+void
+nfs_dircookie_lock(struct nfsnode *np)
+{
+ mtx_lock(&np->n_mtx);
+ while (np->n_flag & NDIRCOOKIELK)
+ (void) msleep(&np->n_flag, &np->n_mtx, PZERO, "nfsdirlk", 0);
+ np->n_flag |= NDIRCOOKIELK;
+ mtx_unlock(&np->n_mtx);
+}
+
+void
+nfs_dircookie_unlock(struct nfsnode *np)
+{
+ mtx_lock(&np->n_mtx);
+ np->n_flag &= ~NDIRCOOKIELK;
+ wakeup(&np->n_flag);
+ mtx_unlock(&np->n_mtx);
+}
+
+int
+nfs_upgrade_vnlock(struct vnode *vp, struct thread *td)
+{
+ int old_lock;
+
+ if ((old_lock = VOP_ISLOCKED(vp, td)) != LK_EXCLUSIVE) {
+ if (old_lock == LK_SHARED) {
+ /* Upgrade to exclusive lock, this might block */
+ vn_lock(vp, LK_UPGRADE | LK_RETRY, td);
+ } else {
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
+ }
+ }
+ return old_lock;
+}
+
+void
+nfs_downgrade_vnlock(struct vnode *vp, struct thread *td, int old_lock)
+{
+ if (old_lock != LK_EXCLUSIVE) {
+ if (old_lock == LK_SHARED) {
+ /* Downgrade from exclusive lock, this might block */
+ vn_lock(vp, LK_DOWNGRADE, td);
+ } else {
+ VOP_UNLOCK(vp, 0, td);
+ }
+ }
+}
+
+void
+nfs_printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ mtx_lock(&Giant);
+ va_start(ap, fmt);
+ printf(fmt, ap);
+ va_end(ap);
+ mtx_unlock(&Giant);
+}
+
/*
* Attribute cache routines.
* nfs_loadattrcache() - loads or updates the cache contents from attributes
@@ -466,7 +534,7 @@ nfs_uninit(struct vfsconf *vfsp)
*/
int
nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp,
- struct vattr *vaper, int dontshrink)
+ struct vattr *vaper, int dontshrink)
{
struct vnode *vp = *vpp;
struct vattr *vap;
@@ -535,6 +603,7 @@ nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp,
* information.
*/
np = VTONFS(vp);
+ mtx_lock(&np->n_mtx);
if (vp->v_type != vtyp) {
vp->v_type = vtyp;
if (vp->v_type == VFIFO)
@@ -617,6 +686,7 @@ nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp,
vaper->va_mtime = np->n_mtim;
}
}
+ mtx_unlock(&np->n_mtx);
return (0);
}
@@ -639,16 +709,20 @@ nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
struct vattr *vap;
struct nfsmount *nmp;
int timeo;
-
+
np = VTONFS(vp);
vap = &np->n_vattr;
nmp = VFSTONFS(vp->v_mount);
+#ifdef NFS_ACDEBUG
+ mtx_lock(&Giant); /* nfs_printf() */
+#endif
+ mtx_lock(&np->n_mtx);
/* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
timeo = (time_second - np->n_mtime.tv_sec) / 10;
#ifdef NFS_ACDEBUG
if (nfs_acdebug>1)
- printf("nfs_getattrcache: initial timeo = %d\n", timeo);
+ nfs_printf("nfs_getattrcache: initial timeo = %d\n", timeo);
#endif
if (vap->va_type == VDIR) {
@@ -665,18 +739,19 @@ nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
#ifdef NFS_ACDEBUG
if (nfs_acdebug > 2)
- printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
- nmp->nm_acregmin, nmp->nm_acregmax,
- nmp->nm_acdirmin, nmp->nm_acdirmax);
+ nfs_printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
+ nmp->nm_acregmin, nmp->nm_acregmax,
+ nmp->nm_acdirmin, nmp->nm_acdirmax);
if (nfs_acdebug)
- printf("nfs_getattrcache: age = %d; final timeo = %d\n",
- (time_second - np->n_attrstamp), timeo);
+ nfs_printf("nfs_getattrcache: age = %d; final timeo = %d\n",
+ (time_second - np->n_attrstamp), timeo);
#endif
if ((time_second - np->n_attrstamp) >= timeo) {
nfsstats.attrcache_misses++;
- return (ENOENT);
+ mtx_unlock(&np->n_mtx);
+ return( ENOENT);
}
nfsstats.attrcache_hits++;
if (vap->va_size != np->n_size) {
@@ -701,6 +776,10 @@ nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
if (np->n_flag & NUPD)
vaper->va_mtime = np->n_mtim;
}
+ mtx_unlock(&np->n_mtx);
+#ifdef NFS_ACDEBUG
+ mtx_unlock(&Giant); /* nfs_printf() */
+#endif
return (0);
}
@@ -714,7 +793,8 @@ nfs_getcookie(struct nfsnode *np, off_t off, int add)
{
struct nfsdmap *dp, *dp2;
int pos;
-
+ nfsuint64 *retval = NULL;
+
pos = (uoff_t)off / NFS_DIRBLKSIZ;
if (pos == 0 || off < 0) {
#ifdef DIAGNOSTIC
@@ -732,14 +812,14 @@ nfs_getcookie(struct nfsnode *np, off_t off, int add)
dp->ndm_eocookie = 0;
LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list);
} else
- return (NULL);
+ goto out;
}
while (pos >= NFSNUMCOOKIES) {
pos -= NFSNUMCOOKIES;
if (LIST_NEXT(dp, ndm_list)) {
if (!add && dp->ndm_eocookie < NFSNUMCOOKIES &&
- pos >= dp->ndm_eocookie)
- return (NULL);
+ pos >= dp->ndm_eocookie)
+ goto out;
dp = LIST_NEXT(dp, ndm_list);
} else if (add) {
MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap),
@@ -748,15 +828,17 @@ nfs_getcookie(struct nfsnode *np, off_t off, int add)
LIST_INSERT_AFTER(dp, dp2, ndm_list);
dp = dp2;
} else
- return (NULL);
+ goto out;
}
if (pos >= dp->ndm_eocookie) {
if (add)
dp->ndm_eocookie = pos + 1;
else
- return (NULL);
+ goto out;
}
- return (&dp->ndm_cookies[pos]);
+ retval = &dp->ndm_cookies[pos];
+out:
+ return (retval);
}
/*
@@ -773,11 +855,13 @@ nfs_invaldir(struct vnode *vp)
if (vp->v_type != VDIR)
panic("nfs: invaldir not dir");
#endif
+ nfs_dircookie_lock(np);
np->n_direofoffset = 0;
np->n_cookieverf.nfsuquad[0] = 0;
np->n_cookieverf.nfsuquad[1] = 0;
if (LIST_FIRST(&np->n_cookies))
LIST_FIRST(&np->n_cookies)->ndm_eocookie = 0;
+ nfs_dircookie_unlock(np);
}
/*
@@ -797,8 +881,6 @@ nfs_clearcommit(struct mount *mp)
struct buf *bp, *nbp;
int s;
- GIANT_REQUIRED;
-
s = splbio();
MNT_ILOCK(mp);
MNT_VNODE_FOREACH(vp, mp, nvp) {
@@ -896,7 +978,7 @@ nfsm_getfh_xx(nfsfh_t **f, int *s, int v3, struct mbuf **md, caddr_t *dpos)
int
nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md,
- caddr_t *dpos)
+ caddr_t *dpos)
{
int t1;
@@ -910,7 +992,7 @@ nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md,
int
nfsm_postop_attr_xx(struct vnode **v, int *f, struct mbuf **md,
- caddr_t *dpos)
+ caddr_t *dpos)
{
u_int32_t *tl;
int t1;
@@ -945,9 +1027,11 @@ nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md, caddr_t *dpos)
tl = nfsm_dissect_xx(6 * NFSX_UNSIGNED, md, dpos);
if (tl == NULL)
return EBADRPC;
+ mtx_lock(&(VTONFS(*v))->n_mtx);
if (*f)
ttretf = (VTONFS(*v)->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) &&
VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3)));
+ mtx_unlock(&(VTONFS(*v))->n_mtx);
}
t1 = nfsm_postop_attr_xx(v, &ttattrf, md, dpos);
if (t1)
diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c
index 2c74bee..17a009c 100644
--- a/sys/nfsclient/nfs_vfsops.c
+++ b/sys/nfsclient/nfs_vfsops.c
@@ -35,6 +35,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+
#include "opt_bootp.h"
#include "opt_nfsroot.h"
@@ -84,6 +85,7 @@ MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write st
uma_zone_t nfsmount_zone;
struct nfsstats nfsstats;
+
SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RD,
&nfsstats, nfsstats, "S,nfsstats");
@@ -183,7 +185,8 @@ nfs_iosize(struct nfsmount *nmp)
* space.
*/
iosize = max(nmp->nm_rsize, nmp->nm_wsize);
- if (iosize < PAGE_SIZE) iosize = PAGE_SIZE;
+ if (iosize < PAGE_SIZE)
+ iosize = PAGE_SIZE;
return iosize;
}
@@ -257,8 +260,12 @@ nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
return (error);
}
vp = NFSTOV(np);
- if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ mtx_lock(&nmp->nm_mtx);
+ if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&nmp->nm_mtx);
(void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
+ } else
+ mtx_unlock(&nmp->nm_mtx);
nfsstats.rpccnt[NFSPROC_FSSTAT]++;
mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
mb = mreq;
@@ -273,7 +280,9 @@ nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
goto nfsmout;
}
sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
+ mtx_lock(&nmp->nm_mtx);
sbp->f_iosize = nfs_iosize(nmp);
+ mtx_unlock(&nmp->nm_mtx);
if (v3) {
sbp->f_bsize = NFS_FABLKSIZE;
tquad = fxdr_hyper(&sfp->sf_tbytes);
@@ -314,7 +323,7 @@ nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
int error = 0, retattr;
struct mbuf *mreq, *mrep, *md, *mb;
u_int64_t maxfsize;
-
+
nfsstats.rpccnt[NFSPROC_FSINFO]++;
mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
mb = mreq;
@@ -323,6 +332,7 @@ nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
nfsm_request(vp, NFSPROC_FSINFO, td, cred);
nfsm_postop_attr(vp, retattr);
if (!error) {
+ mtx_lock(&nmp->nm_mtx);
fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
@@ -358,6 +368,7 @@ nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
nmp->nm_maxfilesize = maxfsize;
nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
nmp->nm_state |= NFSSTA_GOTFSINFO;
+ mtx_unlock(&nmp->nm_mtx);
}
m_freem(mrep);
nfsmout:
@@ -664,8 +675,7 @@ nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp)
if (nmp->nm_sotype == SOCK_DGRAM)
while (nfs_connect(nmp, NULL)) {
printf("nfs_args: retrying connect\n");
- (void) tsleep((caddr_t)&lbolt,
- PSOCK, "nfscon", 0);
+ (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
}
}
}
@@ -693,24 +703,31 @@ nfs_mount(struct mount *mp, struct thread *td)
size_t len;
u_char nfh[NFSX_V3FHMAX];
- if (vfs_filteropt(mp->mnt_optnew, nfs_opts))
- return (EINVAL);
+ if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
+ error = EINVAL;
+ goto out;
+ }
- if (mp->mnt_flag & MNT_ROOTFS)
- return (nfs_mountroot(mp, td));
+ if (mp->mnt_flag & MNT_ROOTFS) {
+ error = nfs_mountroot(mp, td);
+ goto out;
+ }
error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args);
if (error)
- return (error);
+ goto out;
if (args.version != NFS_ARGSVERSION) {
- return (EPROGMISMATCH);
+ error = EPROGMISMATCH;
+ goto out;
}
if (mp->mnt_flag & MNT_UPDATE) {
struct nfsmount *nmp = VFSTONFS(mp);
- if (nmp == NULL)
- return (EIO);
+ if (nmp == NULL) {
+ error = EIO;
+ goto out;
+ }
/*
* When doing an update, we can't change from or to
* v3, switch lockd strategies or change cookie translation
@@ -720,7 +737,7 @@ nfs_mount(struct mount *mp, struct thread *td)
(nmp->nm_flag &
(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
nfs_decode_args(mp, nmp, &args);
- return (0);
+ goto out;
}
/*
@@ -734,21 +751,25 @@ nfs_mount(struct mount *mp, struct thread *td)
*/
if (nfs_ip_paranoia == 0)
args.flags |= NFSMNT_NOCONN;
- if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX)
- return (EINVAL);
+ if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
+ error = EINVAL;
+ goto out;
+ }
error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize);
if (error)
- return (error);
+ goto out;
error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
if (error)
- return (error);
+ goto out;
bzero(&hst[len], MNAMELEN - len);
/* sockargs() call must be after above copyin() calls */
error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen);
if (error)
- return (error);
+ goto out;
args.fh = nfh;
error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred);
+ mp->mnt_kern_flag |= MNTK_MPSAFE;
+out:
return (error);
}
@@ -771,12 +792,11 @@ nfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
error = copyin(data, &args, sizeof (struct nfs_args));
if (error)
- return (error);
+ return error;
ma = mount_arg(ma, "nfs_args", &args, sizeof args);
error = kernel_mount(ma, flags);
-
return (error);
}
@@ -805,6 +825,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
}
vfs_getnewfsid(mp);
nmp->nm_mountp = mp;
+ mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
/*
* V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
@@ -851,10 +872,6 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
nfs_decode_args(mp, nmp, argp);
- if (nmp->nm_sotype == SOCK_STREAM)
- mtx_init(&nmp->nm_nfstcpstate.mtx, "NFS/TCP state lock",
- NULL, MTX_DEF);
-
/*
* For Connection based sockets (TCP,...) defer the connect until
* the first request, in case the server is not responding.
@@ -869,7 +886,9 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
* stuck on a dead server and we are holding a lock on the mount
* point.
*/
+ mtx_lock(&nmp->nm_mtx);
mp->mnt_stat.f_iosize = nfs_iosize(nmp);
+ mtx_unlock(&nmp->nm_mtx);
/*
* A reference count is needed on the nfsnode representing the
* remote root. If this object is not persistent, then backward
@@ -900,8 +919,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
return (0);
bad:
- if (nmp->nm_sotype == SOCK_STREAM)
- mtx_destroy(&nmp->nm_nfstcpstate.mtx);
+ mtx_destroy(&nmp->nm_mtx);
nfs_disconnect(nmp);
uma_zfree(nfsmount_zone, nmp);
FREE(nam, M_SONAME);
@@ -930,12 +948,12 @@ nfs_unmount(struct mount *mp, int mntflags, struct thread *td)
if (flags & FORCECLOSE) {
error = nfs_nmcancelreqs(nmp);
if (error)
- return (error);
+ goto out;
}
/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
error = vflush(mp, 1, flags, td);
if (error)
- return (error);
+ goto out;
/*
* We are now committed to the unmount.
@@ -943,11 +961,10 @@ nfs_unmount(struct mount *mp, int mntflags, struct thread *td)
nfs_disconnect(nmp);
FREE(nmp->nm_nam, M_SONAME);
- if (nmp->nm_sotype == SOCK_STREAM)
- mtx_destroy(&nmp->nm_nfstcpstate.mtx);
-
+ mtx_destroy(&nmp->nm_mtx);
uma_zfree(nfsmount_zone, nmp);
- return (0);
+out:
+ return (error);
}
/*
@@ -964,15 +981,18 @@ nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
nmp = VFSTONFS(mp);
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
if (error)
- return (error);
+ return error;
vp = NFSTOV(np);
/*
* Get transfer parameters and attributes for root vnode once.
*/
+ mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
(nmp->nm_flag & NFSMNT_NFSV3)) {
+ mtx_unlock(&nmp->nm_mtx);
nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
- }
+ } else
+ mtx_unlock(&nmp->nm_mtx);
if (vp->v_type == VNON)
vp->v_type = VDIR;
vp->v_vflag |= VV_ROOT;
@@ -1051,8 +1071,10 @@ nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
break;
#endif
case VFS_CTL_QUERY:
+ mtx_lock(&nmp->nm_mtx);
if (nmp->nm_state & NFSSTA_TIMEO)
vq.vq_flags |= VQ_NOTRESP;
+ mtx_unlock(&nmp->nm_mtx);
#if 0
if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
(nmp->nm_state & NFSSTA_LOCKTIMEO))
diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c
index ccaa150..f8124ff 100644
--- a/sys/nfsclient/nfs_vnops.c
+++ b/sys/nfsclient/nfs_vnops.c
@@ -192,6 +192,7 @@ static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
/*
* Global variables
*/
+struct mtx nfs_iod_mtx;
struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
int nfs_numasync = 0;
@@ -241,6 +242,23 @@ SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
#define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \
| NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \
| NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
+
+/*
+ * SMP Locking Note :
+ * The list of locks after the description of the lock is the ordering
+ * of other locks acquired with the lock held.
+ * np->n_mtx : Protects the fields in the nfsnode.
+ VM Object Lock
+ VI_MTX (acquired indirectly)
+ * nmp->nm_mtx : Protects the fields in the nfsmount.
+ rep->r_mtx
+ * nfs_iod_mtx : Global lock, protects shared nfsiod state.
+ * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
+ nmp->nm_mtx
+ rep->r_mtx
+ * rep->r_mtx : Protects the fields in an nfsreq.
+ */
+
static int
nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
struct ucred *cred)
@@ -266,9 +284,11 @@ nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
if (!error) {
tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
rmode = fxdr_unsigned(u_int32_t, *tl);
+ mtx_lock(&np->n_mtx);
np->n_mode = rmode;
np->n_modeuid = cred->cr_uid;
np->n_modestamp = time_second;
+ mtx_unlock(&np->n_mtx);
}
m_freem(mrep);
nfsmout:
@@ -343,6 +363,7 @@ nfs_access(struct vop_access_args *ap)
* Does our cached result allow us to give a definite yes to
* this request?
*/
+ mtx_lock(&np->n_mtx);
if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
(ap->a_cred->cr_uid == np->n_modeuid) &&
((np->n_mode & mode) == mode)) {
@@ -352,18 +373,21 @@ nfs_access(struct vop_access_args *ap)
* Either a no, or a don't know. Go to the wire.
*/
nfsstats.accesscache_misses++;
+ mtx_unlock(&np->n_mtx);
error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred);
+ mtx_lock(&np->n_mtx);
if (!error) {
if ((np->n_mode & mode) != mode) {
error = EACCES;
}
}
}
+ mtx_unlock(&np->n_mtx);
return (error);
} else {
- if ((error = nfsspec_access(ap)) != 0)
+ if ((error = nfsspec_access(ap)) != 0) {
return (error);
-
+ }
/*
* Attempt to prevent a mapped root from accessing a file
* which it shouldn't. We try to read a byte from the file
@@ -371,12 +395,14 @@ nfs_access(struct vop_access_args *ap)
* After calling nfsspec_access, we should have the correct
* file size cached.
*/
+ mtx_lock(&np->n_mtx);
if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
&& VTONFS(vp)->n_size > 0) {
struct iovec aiov;
struct uio auio;
char buf[1];
+ mtx_unlock(&np->n_mtx);
aiov.iov_base = buf;
aiov.iov_len = 1;
auio.uio_iov = &aiov;
@@ -400,7 +426,8 @@ nfs_access(struct vop_access_args *ap)
error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
else
error = EACCES;
- }
+ } else
+ mtx_unlock(&np->n_mtx);
return (error);
}
}
@@ -428,7 +455,9 @@ nfs_open(struct vop_open_args *ap)
/*
* Get a valid lease. If cached data is stale, flush it.
*/
+ mtx_lock(&np->n_mtx);
if (np->n_flag & NMODIFIED) {
+ mtx_unlock(&np->n_mtx);
error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
if (error == EINTR || error == EIO)
return (error);
@@ -438,20 +467,28 @@ nfs_open(struct vop_open_args *ap)
error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
if (error)
return (error);
+ mtx_lock(&np->n_mtx);
np->n_mtime = vattr.va_mtime;
+ mtx_unlock(&np->n_mtx);
} else {
np->n_attrstamp = 0;
+ mtx_unlock(&np->n_mtx);
error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
if (error)
return (error);
+ mtx_lock(&np->n_mtx);
if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
if (vp->v_type == VDIR)
np->n_direofoffset = 0;
+ mtx_unlock(&np->n_mtx);
error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
- if (error == EINTR || error == EIO)
+ if (error == EINTR || error == EIO) {
return (error);
+ }
+ mtx_lock(&np->n_mtx);
np->n_mtime = vattr.va_mtime;
}
+ mtx_unlock(&np->n_mtx);
}
/*
* If the object has >= 1 O_DIRECT active opens, we disable caching.
@@ -461,11 +498,12 @@ nfs_open(struct vop_open_args *ap)
error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
if (error)
return (error);
+ mtx_lock(&np->n_mtx);
np->n_flag |= NNONCACHE;
+ mtx_unlock(&np->n_mtx);
}
np->n_directio_opens++;
}
- np->ra_expect_lbn = 0;
vnode_create_vobject(vp, vattr.va_size, ap->a_td);
return (0);
}
@@ -519,7 +557,9 @@ nfs_close(struct vop_close_args *ap)
vm_object_page_clean(vp->v_object, 0, 0, 0);
VM_OBJECT_UNLOCK(vp->v_object);
}
+ mtx_lock(&np->n_mtx);
if (np->n_flag & NMODIFIED) {
+ mtx_unlock(&np->n_mtx);
if (NFS_ISV3(vp)) {
/*
* Under NFSv3 we have dirty buffers to dispose of. We
@@ -539,6 +579,7 @@ nfs_close(struct vop_close_args *ap)
/* np->n_flag &= ~NMODIFIED; */
} else
error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
+ mtx_lock(&np->n_mtx);
}
/*
* Invalidate the attribute cache in all cases.
@@ -551,13 +592,16 @@ nfs_close(struct vop_close_args *ap)
np->n_flag &= ~NWRITEERR;
error = np->n_error;
}
+ mtx_unlock(&np->n_mtx);
}
if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
+ mtx_lock(&np->n_mtx);
KASSERT((np->n_directio_opens > 0),
- ("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
+ ("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
np->n_directio_opens--;
if (np->n_directio_opens == 0)
np->n_flag &= ~NNONCACHE;
+ mtx_unlock(&np->n_mtx);
}
return (error);
}
@@ -578,21 +622,21 @@ nfs_getattr(struct vop_getattr_args *ap)
/*
* Update local times for special files.
*/
+ mtx_lock(&np->n_mtx);
if (np->n_flag & (NACC | NUPD))
np->n_flag |= NCHG;
+ mtx_unlock(&np->n_mtx);
/*
* First look in the cache.
*/
if (nfs_getattrcache(vp, ap->a_vap) == 0)
- return (0);
-
+ goto nfsmout;
if (v3 && nfsaccess_cache_timeout > 0) {
nfsstats.accesscache_misses++;
nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_td, ap->a_cred);
if (nfs_getattrcache(vp, ap->a_vap) == 0)
- return (0);
+ goto nfsmout;
}
-
nfsstats.rpccnt[NFSPROC_GETATTR]++;
mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
mb = mreq;
@@ -635,8 +679,10 @@ nfs_setattr(struct vop_setattr_args *ap)
if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
- (vp->v_mount->mnt_flag & MNT_RDONLY))
- return (EROFS);
+ (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+ error = EROFS;
+ goto out;
+ }
if (vap->va_size != VNOVAL) {
switch (vp->v_type) {
case VDIR:
@@ -650,7 +696,7 @@ nfs_setattr(struct vop_setattr_args *ap)
vap->va_mode == (mode_t)VNOVAL &&
vap->va_uid == (uid_t)VNOVAL &&
vap->va_gid == (gid_t)VNOVAL)
- return (0);
+ return (0);
vap->va_size = VNOVAL;
break;
default:
@@ -660,47 +706,60 @@ nfs_setattr(struct vop_setattr_args *ap)
*/
if (vp->v_mount->mnt_flag & MNT_RDONLY)
return (EROFS);
-
/*
* We run vnode_pager_setsize() early (why?),
* we must set np->n_size now to avoid vinvalbuf
* V_SAVE races that might setsize a lower
* value.
*/
-
+ mtx_lock(&np->n_mtx);
tsize = np->n_size;
+ mtx_unlock(&np->n_mtx);
error = nfs_meta_setsize(vp, ap->a_cred,
- ap->a_td, vap->va_size);
-
+ ap->a_td, vap->va_size);
+ mtx_lock(&np->n_mtx);
if (np->n_flag & NMODIFIED) {
+ tsize = np->n_size;
+ mtx_unlock(&np->n_mtx);
if (vap->va_size == 0)
error = nfs_vinvalbuf(vp, 0, ap->a_td, 1);
else
error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
if (error) {
- vnode_pager_setsize(vp, np->n_size);
- return (error);
+ vnode_pager_setsize(vp, tsize);
+ goto out;
}
- }
+ } else
+ mtx_unlock(&np->n_mtx);
/*
* np->n_size has already been set to vap->va_size
* in nfs_meta_setsize(). We must set it again since
* nfs_loadattrcache() could be called through
* nfs_meta_setsize() and could modify np->n_size.
*/
+ mtx_lock(&np->n_mtx);
np->n_vattr.va_size = np->n_size = vap->va_size;
+ mtx_unlock(&np->n_mtx);
};
- } else if ((vap->va_mtime.tv_sec != VNOVAL ||
- vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
- vp->v_type == VREG &&
- (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) != 0 &&
- (error == EINTR || error == EIO))
- return (error);
+ } else {
+ mtx_lock(&np->n_mtx);
+ if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) &&
+ (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
+ mtx_unlock(&np->n_mtx);
+ if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) != 0 &&
+ (error == EINTR || error == EIO))
+ return error;
+ } else
+ mtx_unlock(&np->n_mtx);
+ }
error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_td);
if (error && vap->va_size != VNOVAL) {
+ mtx_lock(&np->n_mtx);
np->n_size = np->n_vattr.va_size = tsize;
- vnode_pager_setsize(vp, np->n_size);
+ vnode_pager_setsize(vp, tsize);
+ mtx_unlock(&np->n_mtx);
}
+out:
return (error);
}
@@ -779,7 +838,7 @@ nfs_lookup(struct vop_lookup_args *ap)
int error = 0, attrflag, fhsize;
int v3 = NFS_ISV3(dvp);
struct thread *td = cnp->cn_thread;
-
+
*vpp = NULLVP;
if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
@@ -964,8 +1023,10 @@ nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
nfsm_strsiz(len, NFS_MAXPATHLEN);
if (len == NFS_MAXPATHLEN) {
struct nfsnode *np = VTONFS(vp);
+ mtx_lock(&np->n_mtx);
if (np->n_size && np->n_size < NFS_MAXPATHLEN)
len = np->n_size;
+ mtx_unlock(&np->n_mtx);
}
nfsm_mtouio(uiop, len);
}
@@ -987,17 +1048,23 @@ nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
struct nfsmount *nmp;
int error = 0, len, retlen, tsiz, eof, attrflag;
int v3 = NFS_ISV3(vp);
+ int rsize;
#ifndef nolint
eof = 0;
#endif
nmp = VFSTONFS(vp->v_mount);
tsiz = uiop->uio_resid;
- if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
+ mtx_lock(&nmp->nm_mtx);
+ if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
+ mtx_unlock(&nmp->nm_mtx);
return (EFBIG);
+ }
+ rsize = nmp->nm_rsize;
+ mtx_unlock(&nmp->nm_mtx);
while (tsiz > 0) {
nfsstats.rpccnt[NFSPROC_READ]++;
- len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
+ len = (tsiz > rsize) ? rsize : tsiz;
mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
mb = mreq;
bpos = mtod(mb, caddr_t);
@@ -1020,9 +1087,10 @@ nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
}
tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
eof = fxdr_unsigned(int, *(tl + 1));
- } else
+ } else {
nfsm_loadattr(vp, NULL);
- nfsm_strsiz(retlen, nmp->nm_rsize);
+ }
+ nfsm_strsiz(retlen, rsize);
nfsm_mtouio(uiop, retlen);
m_freem(mrep);
tsiz -= retlen;
@@ -1043,7 +1111,7 @@ nfsmout:
*/
int
nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
- int *iomode, int *must_commit)
+ int *iomode, int *must_commit)
{
u_int32_t *tl;
int32_t backup;
@@ -1052,18 +1120,24 @@ nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
-
+ int wsize;
+
#ifndef DIAGNOSTIC
if (uiop->uio_iovcnt != 1)
panic("nfs: writerpc iovcnt > 1");
#endif
*must_commit = 0;
tsiz = uiop->uio_resid;
- if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
+ mtx_lock(&nmp->nm_mtx);
+ if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
+ mtx_unlock(&nmp->nm_mtx);
return (EFBIG);
+ }
+ wsize = nmp->nm_wsize;
+ mtx_unlock(&nmp->nm_mtx);
while (tsiz > 0) {
nfsstats.rpccnt[NFSPROC_WRITE]++;
- len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
+ len = (tsiz > wsize) ? wsize : tsiz;
mreq = nfsm_reqhead(vp, NFSPROC_WRITE,
NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
mb = mreq;
@@ -1122,6 +1196,7 @@ nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
else if (committed == NFSV3WRITE_DATASYNC &&
commit == NFSV3WRITE_UNSTABLE)
committed = commit;
+ mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
NFSX_V3WRITEVERF);
@@ -1132,11 +1207,16 @@ nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
NFSX_V3WRITEVERF);
}
+ mtx_unlock(&nmp->nm_mtx);
}
- } else
- nfsm_loadattr(vp, NULL);
- if (wccflag)
- VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
+ } else {
+ nfsm_loadattr(vp, NULL);
+ }
+ if (wccflag) {
+ mtx_lock(&(VTONFS(vp))->n_mtx);
+ VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
+ mtx_unlock(&(VTONFS(vp))->n_mtx);
+ }
m_freem(mrep);
if (error)
break;
@@ -1232,9 +1312,11 @@ nfsmout:
cache_enter(dvp, newvp, cnp);
*vpp = newvp;
}
+ mtx_lock(&(VTONFS(dvp))->n_mtx);
VTONFS(dvp)->n_flag |= NMODIFIED;
if (!wccflag)
VTONFS(dvp)->n_attrstamp = 0;
+ mtx_unlock(&(VTONFS(dvp))->n_mtx);
return (error);
}
@@ -1246,7 +1328,6 @@ nfsmout:
static int
nfs_mknod(struct vop_mknod_args *ap)
{
-
return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
}
@@ -1359,9 +1440,11 @@ nfsmout:
cache_enter(dvp, newvp, cnp);
*ap->a_vpp = newvp;
}
+ mtx_lock(&(VTONFS(dvp))->n_mtx);
VTONFS(dvp)->n_flag |= NMODIFIED;
if (!wccflag)
VTONFS(dvp)->n_attrstamp = 0;
+ mtx_unlock(&(VTONFS(dvp))->n_mtx);
return (error);
}
@@ -1434,7 +1517,6 @@ nfs_remove(struct vop_remove_args *ap)
int
nfs_removeit(struct sillyrename *sp)
{
-
/*
* Make sure that the directory vnode is still valid.
* XXX we should lock sp->s_dvp here.
@@ -1469,9 +1551,11 @@ nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
nfsm_wcc_data(dvp, wccflag);
m_freem(mrep);
nfsmout:
+ mtx_lock(&(VTONFS(dvp))->n_mtx);
VTONFS(dvp)->n_flag |= NMODIFIED;
if (!wccflag)
VTONFS(dvp)->n_attrstamp = 0;
+ mtx_unlock(&(VTONFS(dvp))->n_mtx);
return (error);
}
@@ -1502,7 +1586,7 @@ nfs_rename(struct vop_rename_args *ap)
}
if (fvp == tvp) {
- printf("nfs_rename: fvp == tvp (can't happen)\n");
+ nfs_printf("nfs_rename: fvp == tvp (can't happen)\n");
error = 0;
goto out;
}
@@ -1609,8 +1693,12 @@ nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen,
}
m_freem(mrep);
nfsmout:
+ mtx_lock(&(VTONFS(fdvp))->n_mtx);
VTONFS(fdvp)->n_flag |= NMODIFIED;
+ mtx_unlock(&(VTONFS(fdvp))->n_mtx);
+ mtx_lock(&(VTONFS(tdvp))->n_mtx);
VTONFS(tdvp)->n_flag |= NMODIFIED;
+ mtx_unlock(&(VTONFS(tdvp))->n_mtx);
if (!fwccflag)
VTONFS(fdvp)->n_attrstamp = 0;
if (!twccflag)
@@ -1659,7 +1747,9 @@ nfs_link(struct vop_link_args *ap)
}
m_freem(mrep);
nfsmout:
+ mtx_lock(&(VTONFS(tdvp))->n_mtx);
VTONFS(tdvp)->n_flag |= NMODIFIED;
+ mtx_unlock(&(VTONFS(tdvp))->n_mtx);
if (!attrflag)
VTONFS(vp)->n_attrstamp = 0;
if (!wccflag)
@@ -1758,7 +1848,9 @@ nfsmout:
} else {
*ap->a_vpp = newvp;
}
+ mtx_lock(&(VTONFS(dvp))->n_mtx);
VTONFS(dvp)->n_flag |= NMODIFIED;
+ mtx_unlock(&(VTONFS(dvp))->n_mtx);
if (!wccflag)
VTONFS(dvp)->n_attrstamp = 0;
return (error);
@@ -1813,7 +1905,9 @@ nfs_mkdir(struct vop_mkdir_args *ap)
nfsm_wcc_data(dvp, wccflag);
m_freem(mrep);
nfsmout:
+ mtx_lock(&(VTONFS(dvp))->n_mtx);
VTONFS(dvp)->n_flag |= NMODIFIED;
+ mtx_unlock(&(VTONFS(dvp))->n_mtx);
if (!wccflag)
VTONFS(dvp)->n_attrstamp = 0;
/*
@@ -1869,7 +1963,9 @@ nfs_rmdir(struct vop_rmdir_args *ap)
nfsm_wcc_data(dvp, wccflag);
m_freem(mrep);
nfsmout:
+ mtx_lock(&(VTONFS(dvp))->n_mtx);
VTONFS(dvp)->n_flag |= NMODIFIED;
+ mtx_unlock(&(VTONFS(dvp))->n_mtx);
if (!wccflag)
VTONFS(dvp)->n_attrstamp = 0;
cache_purge(dvp);
@@ -1891,20 +1987,25 @@ nfs_readdir(struct vop_readdir_args *ap)
struct vnode *vp = ap->a_vp;
struct nfsnode *np = VTONFS(vp);
struct uio *uio = ap->a_uio;
- int tresid, error;
+ int tresid, error = 0;
struct vattr vattr;
+
+ if (vp->v_type != VDIR)
+ return(EPERM);
- if (vp->v_type != VDIR)
- return (EPERM);
/*
* First, check for hit on the EOF offset cache
*/
if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
(np->n_flag & NMODIFIED) == 0) {
- if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0 &&
- !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
- nfsstats.direofcache_hits++;
- return (0);
+ if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0) {
+ mtx_lock(&np->n_mtx);
+ if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
+ mtx_unlock(&np->n_mtx);
+ nfsstats.direofcache_hits++;
+ goto out;
+ } else
+ mtx_unlock(&np->n_mtx);
}
}
@@ -1914,8 +2015,10 @@ nfs_readdir(struct vop_readdir_args *ap)
tresid = uio->uio_resid;
error = nfs_bioread(vp, uio, 0, ap->a_cred);
- if (!error && uio->uio_resid == tresid)
+ if (!error && uio->uio_resid == tresid) {
nfsstats.direofcache_misses++;
+ }
+out:
return (error);
}
@@ -1950,11 +2053,16 @@ nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
/*
* If there is no cookie, assume directory was stale.
*/
+ nfs_dircookie_lock(dnp);
cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
- if (cookiep)
+ if (cookiep) {
cookie = *cookiep;
- else
+ nfs_dircookie_unlock(dnp);
+ } else {
+ nfs_dircookie_unlock(dnp);
return (NFSERR_BAD_COOKIE);
+ }
+
/*
* Loop around doing readdir rpc's of size nm_readdirsize
* truncated to a multiple of DIRBLKSIZ.
@@ -1971,8 +2079,10 @@ nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
*tl++ = cookie.nfsuquad[0];
*tl++ = cookie.nfsuquad[1];
+ mtx_lock(&dnp->n_mtx);
*tl++ = dnp->n_cookieverf.nfsuquad[0];
*tl++ = dnp->n_cookieverf.nfsuquad[1];
+ mtx_unlock(&dnp->n_mtx);
} else {
tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
*tl++ = cookie.nfsuquad[0];
@@ -1984,8 +2094,10 @@ nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
if (!error) {
tl = nfsm_dissect(u_int32_t *,
2 * NFSX_UNSIGNED);
+ mtx_lock(&dnp->n_mtx);
dnp->n_cookieverf.nfsuquad[0] = *tl++;
dnp->n_cookieverf.nfsuquad[1] = *tl;
+ mtx_unlock(&dnp->n_mtx);
} else {
m_freem(mrep);
goto nfsmout;
@@ -2100,9 +2212,11 @@ nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
dnp->n_direofoffset = uiop->uio_offset;
else {
if (uiop->uio_resid > 0)
- printf("EEK! readdirrpc resid > 0\n");
+ nfs_printf("EEK! readdirrpc resid > 0\n");
+ nfs_dircookie_lock(dnp);
cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
*cookiep = cookie;
+ nfs_dircookie_unlock(dnp);
}
nfsmout:
return (error);
@@ -2146,11 +2260,15 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
/*
* If there is no cookie, assume directory was stale.
*/
+ nfs_dircookie_lock(dnp);
cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
- if (cookiep)
+ if (cookiep) {
cookie = *cookiep;
- else
+ nfs_dircookie_unlock(dnp);
+ } else {
+ nfs_dircookie_unlock(dnp);
return (NFSERR_BAD_COOKIE);
+ }
/*
* Loop around doing readdir rpc's of size nm_readdirsize
* truncated to a multiple of DIRBLKSIZ.
@@ -2166,8 +2284,10 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED);
*tl++ = cookie.nfsuquad[0];
*tl++ = cookie.nfsuquad[1];
+ mtx_lock(&dnp->n_mtx);
*tl++ = dnp->n_cookieverf.nfsuquad[0];
*tl++ = dnp->n_cookieverf.nfsuquad[1];
+ mtx_unlock(&dnp->n_mtx);
*tl++ = txdr_unsigned(nmp->nm_readdirsize);
*tl = txdr_unsigned(nmp->nm_rsize);
nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred);
@@ -2177,8 +2297,10 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
goto nfsmout;
}
tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
+ mtx_lock(&dnp->n_mtx);
dnp->n_cookieverf.nfsuquad[0] = *tl++;
dnp->n_cookieverf.nfsuquad[1] = *tl++;
+ mtx_unlock(&dnp->n_mtx);
more_dirs = fxdr_unsigned(int, *tl);
/* loop thru the dir entries, doctoring them to 4bsd form */
@@ -2313,9 +2435,9 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
i = fxdr_unsigned(int, *tl);
if (i) {
- tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
- fhsize = fxdr_unsigned(int, *tl);
- nfsm_adv(nfsm_rndup(fhsize));
+ tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
+ fhsize = fxdr_unsigned(int, *tl);
+ nfsm_adv(nfsm_rndup(fhsize));
}
}
if (newvp != NULLVP) {
@@ -2359,9 +2481,11 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
dnp->n_direofoffset = uiop->uio_offset;
else {
if (uiop->uio_resid > 0)
- printf("EEK! readdirplusrpc resid > 0\n");
+ nfs_printf("EEK! readdirplusrpc resid > 0\n");
+ nfs_dircookie_lock(dnp);
cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
*cookiep = cookie;
+ nfs_dircookie_unlock(dnp);
}
nfsmout:
if (newvp != NULLVP) {
@@ -2521,7 +2645,7 @@ nfsmout:
*/
int
nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
- struct thread *td)
+ struct thread *td)
{
u_int32_t *tl;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
@@ -2529,8 +2653,12 @@ nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
int error = 0, wccflag = NFSV3_WCCRATTR;
struct mbuf *mreq, *mrep, *md, *mb;
- if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
+ mtx_lock(&nmp->nm_mtx);
+ if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
+ mtx_unlock(&nmp->nm_mtx);
return (0);
+ }
+ mtx_unlock(&nmp->nm_mtx);
nfsstats.rpccnt[NFSPROC_COMMIT]++;
mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
mb = mreq;
@@ -2600,7 +2728,6 @@ nfs_strategy(struct vop_strategy_args *ap)
static int
nfs_fsync(struct vop_fsync_args *ap)
{
-
return (nfs_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1));
}
@@ -2821,8 +2948,10 @@ loop:
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
VI_MTX(vp), "nfsfsync", slpflag, slptimeo);
splx(s);
- if (error == 0)
- panic("nfs_fsync: inconsistent lock");
+ if (error == 0) {
+ BUF_UNLOCK(bp);
+ goto loop;
+ }
if (error == ENOLCK)
goto loop;
if (nfs_sigintr(nmp, NULL, td)) {
@@ -2880,23 +3009,28 @@ loop:
VI_UNLOCK(vp);
goto loop;
}
- /*
- * Wait for all the async IO requests to drain
+ /*
+ * Wait for all the async IO requests to drain
*/
+ VI_UNLOCK(vp);
+ mtx_lock(&np->n_mtx);
while (np->n_directio_asyncwr > 0) {
np->n_flag |= NFSYNCWAIT;
- error = nfs_tsleep(td, (caddr_t)&np->n_directio_asyncwr,
- slpflag | (PRIBIO + 1), "nfsfsync", 0);
+ error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr,
+ &np->n_mtx, slpflag | (PRIBIO + 1),
+ "nfsfsync", 0);
if (error) {
if (nfs_sigintr(nmp, (struct nfsreq *)0, td)) {
- error = EINTR;
+ mtx_unlock(&np->n_mtx);
+ error = EINTR;
goto done;
}
}
}
-
- }
- VI_UNLOCK(vp);
+ mtx_unlock(&np->n_mtx);
+ } else
+ VI_UNLOCK(vp);
+ mtx_lock(&np->n_mtx);
if (np->n_flag & NWRITEERR) {
error = np->n_error;
np->n_flag &= ~NWRITEERR;
@@ -2904,6 +3038,7 @@ loop:
if (commit && vp->v_bufobj.bo_dirty.bv_cnt == 0 &&
vp->v_bufobj.bo_numoutput == 0 && np->n_directio_asyncwr == 0)
np->n_flag &= ~NMODIFIED;
+ mtx_unlock(&np->n_mtx);
done:
if (bvec != NULL && bvec != bvec_on_stack)
free(bvec, M_TEMP);
@@ -2916,13 +3051,19 @@ done:
static int
nfs_advlock(struct vop_advlock_args *ap)
{
-
+ int error;
+
+ mtx_lock(&Giant);
if ((VFSTONFS(ap->a_vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
struct nfsnode *np = VTONFS(ap->a_vp);
- return (lf_advlock(ap, &(np->n_lockf), np->n_size));
+ error = lf_advlock(ap, &(np->n_lockf), np->n_size);
+ goto out;
}
- return (nfs_dolock(ap));
+ error = nfs_dolock(ap);
+out:
+ mtx_unlock(&Giant);
+ return (error);
}
/*
@@ -2934,7 +3075,7 @@ nfs_print(struct vop_print_args *ap)
struct vnode *vp = ap->a_vp;
struct nfsnode *np = VTONFS(vp);
- printf("\tfileid %ld fsid 0x%x",
+ nfs_printf("\tfileid %ld fsid 0x%x",
np->n_vattr.va_fileid, np->n_vattr.va_fsid);
if (vp->v_type == VFIFO)
fifo_printinfo(vp);
@@ -2998,7 +3139,6 @@ nfs_writebp(struct buf *bp, int force __unused, struct thread *td)
reassignbuf(bp);
splx(s);
}
-
brelse(bp);
return (rtval);
}
@@ -3039,9 +3179,11 @@ nfsspec_access(struct vop_access_args *ap)
vap = &vattr;
error = VOP_GETATTR(vp, vap, cred, ap->a_td);
if (error)
- return (error);
- return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
- mode, cred, NULL));
+ goto out;
+ error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
+ mode, cred, NULL);
+out:
+ return error;
}
/*
@@ -3051,13 +3193,17 @@ static int
nfsfifo_read(struct vop_read_args *ap)
{
struct nfsnode *np = VTONFS(ap->a_vp);
+ int error;
/*
* Set access flag.
*/
+ mtx_lock(&np->n_mtx);
np->n_flag |= NACC;
getnanotime(&np->n_atim);
- return (fifo_specops.vop_read(ap));
+ mtx_unlock(&np->n_mtx);
+ error = fifo_specops.vop_read(ap);
+ return error;
}
/*
@@ -3071,9 +3217,11 @@ nfsfifo_write(struct vop_write_args *ap)
/*
* Set update flag.
*/
+ mtx_lock(&np->n_mtx);
np->n_flag |= NUPD;
getnanotime(&np->n_mtim);
- return (fifo_specops.vop_write(ap));
+ mtx_unlock(&np->n_mtx);
+ return(fifo_specops.vop_write(ap));
}
/*
@@ -3089,6 +3237,7 @@ nfsfifo_close(struct vop_close_args *ap)
struct vattr vattr;
struct timespec ts;
+ mtx_lock(&np->n_mtx);
if (np->n_flag & (NACC | NUPD)) {
getnanotime(&ts);
if (np->n_flag & NACC)
@@ -3103,9 +3252,13 @@ nfsfifo_close(struct vop_close_args *ap)
vattr.va_atime = np->n_atim;
if (np->n_flag & NUPD)
vattr.va_mtime = np->n_mtim;
+ mtx_unlock(&np->n_mtx);
(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_td);
+ goto out;
}
}
+ mtx_unlock(&np->n_mtx);
+out:
return (fifo_specops.vop_close(ap));
}
diff --git a/sys/nfsclient/nfsmount.h b/sys/nfsclient/nfsmount.h
index a8bcc14..e7b9d0f 100644
--- a/sys/nfsclient/nfsmount.h
+++ b/sys/nfsclient/nfsmount.h
@@ -41,7 +41,6 @@ struct nfs_tcp_mountstate {
#define NFS_TCP_EXPECT_RPCMARKER 0x0001 /* Expect to see a RPC/TCP marker next */
#define NFS_TCP_FORCE_RECONNECT 0x0002 /* Force a TCP reconnect */
int flags;
- struct mtx mtx;
};
/*
@@ -50,6 +49,7 @@ struct nfs_tcp_mountstate {
* Holds NFS specific information for mount.
*/
struct nfsmount {
+ struct mtx nm_mtx;
int nm_flag; /* Flags for soft/hard... */
int nm_state; /* Internal state flags */
struct mount *nm_mountp; /* Vfs structure for this filesystem */
diff --git a/sys/nfsclient/nfsnode.h b/sys/nfsclient/nfsnode.h
index a0a2a88..e03e97c 100644
--- a/sys/nfsclient/nfsnode.h
+++ b/sys/nfsclient/nfsnode.h
@@ -88,6 +88,7 @@ struct nfsdmap {
* be well aligned and, therefore, tightly packed.
*/
struct nfsnode {
+ struct mtx n_mtx; /* Protects all of these members */
u_quad_t n_size; /* Current size of file */
u_quad_t n_brev; /* Modify rev when cached */
u_quad_t n_lrev; /* Modify rev for lease */
@@ -124,9 +125,8 @@ struct nfsnode {
struct nfs4_fctx n_wfc;
u_char *n_name; /* leaf name, for v4 OPEN op */
uint32_t n_namelen;
- daddr_t ra_expect_lbn;
int n_directio_opens;
- int n_directio_asyncwr;
+ int n_directio_asyncwr;
};
#define n_atim n_un1.nf_atim
@@ -140,6 +140,8 @@ struct nfsnode {
/*
* Flags for n_flag
*/
+#define NFSYNCWAIT 0x0002 /* fsync waiting for all directio async writes
+ to drain */
#define NMODIFIED 0x0004 /* Might have a modified buffer in bio */
#define NWRITEERR 0x0008 /* Flag write errors so close will know */
/* 0x20, 0x40, 0x80 free */
@@ -150,8 +152,7 @@ struct nfsnode {
#define NTRUNCATE 0x1000 /* Opened by nfs_setattr() */
#define NSIZECHANGED 0x2000 /* File size has changed: need cache inval */
#define NNONCACHE 0x4000 /* Node marked as noncacheable */
-#define NFSYNCWAIT 0x8000 /* fsync waiting for all directio async writes
- to drain */
+#define NDIRCOOKIELK 0x8000 /* Lock to serialize access to directory cookies */
/*
* Convert between nfsnode pointers and vnode pointers
@@ -193,6 +194,12 @@ nfsuint64 *nfs_getcookie(struct nfsnode *, off_t, int);
uint64_t *nfs4_getcookie(struct nfsnode *, off_t, int);
void nfs_invaldir(struct vnode *);
void nfs4_invaldir(struct vnode *);
+int nfs_upgrade_vnlock(struct vnode *vp, struct thread *td);
+void nfs_downgrade_vnlock(struct vnode *vp, struct thread *td, int old_lock);
+void nfs_printf(const char *fmt, ...);
+
+void nfs_dircookie_lock(struct nfsnode *np);
+void nfs_dircookie_unlock(struct nfsnode *np);
#endif /* _KERNEL */
OpenPOWER on IntegriCloud