summaryrefslogtreecommitdiffstats
path: root/sys/fs
diff options
context:
space:
mode:
authorrmacklem <rmacklem@FreeBSD.org>2010-04-22 23:51:01 +0000
committerrmacklem <rmacklem@FreeBSD.org>2010-04-22 23:51:01 +0000
commit2b8346aabd349cd3ecc65ef9df6d44d6bf48a2aa (patch)
tree0c8d854f9075c4cbdb0c569f23e53fa52c2178f0 /sys/fs
parent42eb898c75b992137e13abf8bfc18b021635f943 (diff)
downloadFreeBSD-src-2b8346aabd349cd3ecc65ef9df6d44d6bf48a2aa.zip
FreeBSD-src-2b8346aabd349cd3ecc65ef9df6d44d6bf48a2aa.tar.gz
When the experimental NFS client is handling an NFSv4 server reboot
with delegations enabled, the recovery could fail if the renew thread is trying to return a delegation, since it will not do the recovery. This patch fixes the above by having nfscl_recalldeleg() fail with the I/O operations returning EIO, so that they will be attempted later. Most of the patch consists of adding an argument to various functions to indicate the delegation recall case where this needs to be done. MFC after: 1 week
Diffstat (limited to 'sys/fs')
-rw-r--r--sys/fs/nfs/nfs_var.h4
-rw-r--r--sys/fs/nfsclient/nfs.h6
-rw-r--r--sys/fs/nfsclient/nfs_clbio.c31
-rw-r--r--sys/fs/nfsclient/nfs_clnfsiod.c6
-rw-r--r--sys/fs/nfsclient/nfs_clnode.c2
-rw-r--r--sys/fs/nfsclient/nfs_clrpcops.c16
-rw-r--r--sys/fs/nfsclient/nfs_clstate.c25
-rw-r--r--sys/fs/nfsclient/nfs_clvnops.c37
8 files changed, 91 insertions, 36 deletions
diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index 498511c..5388e13 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -369,7 +369,7 @@ int nfsrpc_readlink(vnode_t, struct uio *, struct ucred *,
int nfsrpc_read(vnode_t, struct uio *, struct ucred *, NFSPROC_T *,
struct nfsvattr *, int *, void *);
int nfsrpc_write(vnode_t, struct uio *, int *, u_char *,
- struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
+ struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *, int);
int nfsrpc_mknod(vnode_t, char *, int, struct vattr *, u_int32_t,
enum vtype, struct ucred *, NFSPROC_T *, struct nfsvattr *,
struct nfsvattr *, struct nfsfh **, int *, int *, void *);
@@ -502,7 +502,7 @@ int nfscl_maperr(NFSPROC_T *, int, uid_t, gid_t);
void nfscl_init(void);
/* nfs_clbio.c */
-int ncl_flush(vnode_t, int, struct ucred *, NFSPROC_T *, int);
+int ncl_flush(vnode_t, int, struct ucred *, NFSPROC_T *, int, int);
/* nfs_clnode.c */
void ncl_invalcaches(vnode_t);
diff --git a/sys/fs/nfsclient/nfs.h b/sys/fs/nfsclient/nfs.h
index 4b54286..c6071af 100644
--- a/sys/fs/nfsclient/nfs.h
+++ b/sys/fs/nfsclient/nfs.h
@@ -79,14 +79,16 @@ int ncl_biowrite(struct vnode *, struct uio *, int, struct ucred *);
int ncl_vinvalbuf(struct vnode *, int, struct thread *, int);
int ncl_asyncio(struct nfsmount *, struct buf *, struct ucred *,
struct thread *);
-int ncl_doio(struct vnode *, struct buf *, struct ucred *, struct thread *);
+int ncl_doio(struct vnode *, struct buf *, struct ucred *, struct thread *,
+ int);
void ncl_nhinit(void);
void ncl_nhuninit(void);
void ncl_nodelock(struct nfsnode *);
void ncl_nodeunlock(struct nfsnode *);
int ncl_getattrcache(struct vnode *, struct vattr *);
int ncl_readrpc(struct vnode *, struct uio *, struct ucred *);
-int ncl_writerpc(struct vnode *, struct uio *, struct ucred *, int *, int *);
+int ncl_writerpc(struct vnode *, struct uio *, struct ucred *, int *, int *,
+ int);
int ncl_readlinkrpc(struct vnode *, struct uio *, struct ucred *);
int ncl_readdirrpc(struct vnode *, struct uio *, struct ucred *,
struct thread *);
diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c
index d0dd2cc..2401c88 100644
--- a/sys/fs/nfsclient/nfs_clbio.c
+++ b/sys/fs/nfsclient/nfs_clbio.c
@@ -336,7 +336,7 @@ ncl_putpages(struct vop_putpages_args *ap)
else
iomode = NFSWRITE_FILESYNC;
- error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit);
+ error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit, 0);
pmap_qremove(kva, npages);
relpbuf(bp, &ncl_pbuf_freecnt);
@@ -554,7 +554,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
if ((bp->b_flags & B_CACHE) == 0) {
bp->b_iocmd = BIO_READ;
vfs_busy_pages(bp, 0);
- error = ncl_doio(vp, bp, cred, td);
+ error = ncl_doio(vp, bp, cred, td, 0);
if (error) {
brelse(bp);
return (error);
@@ -583,7 +583,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
if ((bp->b_flags & B_CACHE) == 0) {
bp->b_iocmd = BIO_READ;
vfs_busy_pages(bp, 0);
- error = ncl_doio(vp, bp, cred, td);
+ error = ncl_doio(vp, bp, cred, td, 0);
if (error) {
bp->b_ioflags |= BIO_ERROR;
brelse(bp);
@@ -609,7 +609,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
if ((bp->b_flags & B_CACHE) == 0) {
bp->b_iocmd = BIO_READ;
vfs_busy_pages(bp, 0);
- error = ncl_doio(vp, bp, cred, td);
+ error = ncl_doio(vp, bp, cred, td, 0);
if (error) {
brelse(bp);
}
@@ -638,7 +638,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
if ((bp->b_flags & B_CACHE) == 0) {
bp->b_iocmd = BIO_READ;
vfs_busy_pages(bp, 0);
- error = ncl_doio(vp, bp, cred, td);
+ error = ncl_doio(vp, bp, cred, td, 0);
/*
* no error + B_INVAL == directory EOF,
* use the block.
@@ -771,7 +771,7 @@ do_sync:
uio.uio_td = td;
iomode = NFSWRITE_FILESYNC;
error = ncl_writerpc(vp, &uio, cred, &iomode,
- &must_commit);
+ &must_commit, 0);
KASSERT((must_commit == 0),
("ncl_directio_write: Did not commit write"));
if (error)
@@ -1122,7 +1122,7 @@ again:
if ((bp->b_flags & B_CACHE) == 0) {
bp->b_iocmd = BIO_READ;
vfs_busy_pages(bp, 0);
- error = ncl_doio(vp, bp, cred, td);
+ error = ncl_doio(vp, bp, cred, td, 0);
if (error) {
brelse(bp);
break;
@@ -1523,7 +1523,7 @@ ncl_doio_directwrite(struct buf *bp)
iomode = NFSWRITE_FILESYNC;
uiop->uio_td = NULL; /* NULL since we're in nfsiod */
- ncl_writerpc(bp->b_vp, uiop, bp->b_wcred, &iomode, &must_commit);
+ ncl_writerpc(bp->b_vp, uiop, bp->b_wcred, &iomode, &must_commit, 0);
KASSERT((must_commit == 0), ("ncl_doio_directwrite: Did not commit write"));
free(iov_base, M_NFSDIRECTIO);
free(uiop->uio_iov, M_NFSDIRECTIO);
@@ -1550,7 +1550,8 @@ ncl_doio_directwrite(struct buf *bp)
* synchronously or from an nfsiod.
*/
int
-ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
+ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td,
+ int called_from_strategy)
{
struct uio *uiop;
struct nfsnode *np;
@@ -1695,7 +1696,8 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
else
iomode = NFSWRITE_FILESYNC;
- error = ncl_writerpc(vp, uiop, cr, &iomode, &must_commit);
+ error = ncl_writerpc(vp, uiop, cr, &iomode, &must_commit,
+ called_from_strategy);
/*
* When setting B_NEEDCOMMIT also set B_CLUSTEROK to try
@@ -1732,6 +1734,12 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
* the block is reused. This is indicated by setting
* the B_DELWRI and B_NEEDCOMMIT flags.
*
+ * EIO is returned by ncl_writerpc() to indicate a recoverable
+ * write error and is handled as above, except that
+ * B_EINTR isn't set. One cause of this is a stale stateid
+ * error for the RPC that indicates recovery is required,
+ * when called with called_from_strategy != 0.
+ *
* If the buffer is marked B_PAGING, it does not reside on
* the vp's paging queues so we cannot call bdirty(). The
* bp in this case is not an NFS cache block so we should
@@ -1760,7 +1768,8 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
bdirty(bp);
bp->b_flags &= ~B_DONE;
}
- if (error && (bp->b_flags & B_ASYNC) == 0)
+ if ((error == EINTR || error == ETIMEDOUT) &&
+ (bp->b_flags & B_ASYNC) == 0)
bp->b_flags |= B_EINTR;
splx(s);
} else {
diff --git a/sys/fs/nfsclient/nfs_clnfsiod.c b/sys/fs/nfsclient/nfs_clnfsiod.c
index 6649fc0..62ea4f8 100644
--- a/sys/fs/nfsclient/nfs_clnfsiod.c
+++ b/sys/fs/nfsclient/nfs_clnfsiod.c
@@ -278,9 +278,11 @@ nfssvc_iod(void *instance)
(void)ncl_doio_directwrite(bp);
} else {
if (bp->b_iocmd == BIO_READ)
- (void) ncl_doio(bp->b_vp, bp, bp->b_rcred, NULL);
+ (void) ncl_doio(bp->b_vp, bp, bp->b_rcred,
+ NULL, 0);
else
- (void) ncl_doio(bp->b_vp, bp, bp->b_wcred, NULL);
+ (void) ncl_doio(bp->b_vp, bp, bp->b_wcred,
+ NULL, 0);
}
mtx_lock(&ncl_iod_mutex);
/*
diff --git a/sys/fs/nfsclient/nfs_clnode.c b/sys/fs/nfsclient/nfs_clnode.c
index 6b2aa7a..c133742 100644
--- a/sys/fs/nfsclient/nfs_clnode.c
+++ b/sys/fs/nfsclient/nfs_clnode.c
@@ -199,7 +199,7 @@ ncl_inactive(struct vop_inactive_args *ap)
* available for the writes.
*/
if (nfscl_mustflush(vp))
- (void) ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1);
+ (void) ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1, 0);
(void) nfsrpc_close(vp, 1, ap->a_td);
}
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index a7015f5..63c15c1 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -1346,11 +1346,16 @@ nfsmout:
/*
* nfs write operation
+ * When called_from_strategy != 0, it should return EIO for an error that
+ * indicates recovery is in progress, so that the buffer will be left
+ * dirty and be written back to the server later. If it loops around,
+ * the recovery thread could get stuck waiting for the buffer and recovery
+ * will then deadlock.
*/
APPLESTATIC int
nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, u_char *verfp,
struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
- void *stuff)
+ void *stuff, int called_from_strategy)
{
int error, expireret = 0, retrycnt, nostateid;
u_int32_t clidrev = 0;
@@ -1410,12 +1415,15 @@ nfscl_dumpstate(nmp, 1, 1, 0, 0);
expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
}
retrycnt++;
- } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
- error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
+ } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
+ ((error == NFSERR_STALESTATEID ||
+ error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
(error == NFSERR_OLDSTATEID && retrycnt < 20) ||
((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
expireret == 0 && clidrev != 0 && retrycnt < 4));
- if (error && retrycnt >= 4)
+ if (error != 0 && (retrycnt >= 4 ||
+ ((error == NFSERR_STALESTATEID ||
+ error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
error = EIO;
if (NFSHASNFSV4(nmp) && p == NULL)
NFSFREECRED(newcred);
diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c
index 78b5966..cf8c922 100644
--- a/sys/fs/nfsclient/nfs_clstate.c
+++ b/sys/fs/nfsclient/nfs_clstate.c
@@ -139,7 +139,7 @@ static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *);
static int nfscl_errmap(struct nfsrv_descript *);
static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
- struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *);
+ struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int);
static void nfscl_freeopenowner(struct nfsclowner *, int);
static void nfscl_cleandeleg(struct nfscldeleg *);
static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *,
@@ -2469,7 +2469,7 @@ tryagain:
NFSUNLOCKCLSTATE();
newnfs_copycred(&dp->nfsdl_cred, cred);
ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp,
- NULL, cred, p);
+ NULL, cred, p, 1);
if (!ret) {
nfscl_cleandeleg(dp);
TAILQ_REMOVE(&clp->nfsc_deleg, dp,
@@ -3309,7 +3309,8 @@ nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
*/
static int
nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
- struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p)
+ struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p,
+ int called_from_renewthread)
{
struct nfsclowner *owp, *lowp, *nowp;
struct nfsclopen *op, *lop;
@@ -3343,6 +3344,7 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
* Ok, if it's a write delegation, flush data to the server, so
* that close/open consistency is retained.
*/
+ ret = 0;
NFSLOCKNODE(np);
if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) {
#ifdef APPLE
@@ -3351,7 +3353,8 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
np->n_flag |= NDELEGRECALL;
#endif
NFSUNLOCKNODE(np);
- (void) ncl_flush(vp, MNT_WAIT, cred, p, 1);
+ ret = ncl_flush(vp, MNT_WAIT, cred, p, 1,
+ called_from_renewthread);
NFSLOCKNODE(np);
#ifdef APPLE
OSBitAndAtomic((int32_t)~(NMODIFIED | NDELEGRECALL), (UInt32 *)&np->n_flag);
@@ -3360,6 +3363,16 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
#endif
}
NFSUNLOCKNODE(np);
+ if (ret == EIO && called_from_renewthread != 0) {
+ /*
+ * If the flush failed with EIO for the renew thread,
+ * return now, so that the dirty buffer will be flushed
+ * later.
+ */
+ if (gotvp != 0)
+ vrele(vp);
+ return (ret);
+ }
/*
* Now, for each openowner with opens issued locally, move them
@@ -3857,7 +3870,7 @@ nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp)
NFSUNLOCKCLSTATE();
cred = newnfs_getcred();
newnfs_copycred(&dp->nfsdl_cred, cred);
- (void) nfscl_recalldeleg(clp, nmp, dp, vp, cred, p);
+ (void) nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0);
NFSFREECRED(cred);
triedrecall = 1;
NFSLOCKCLSTATE();
@@ -3955,7 +3968,7 @@ nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp,
NFSUNLOCKCLSTATE();
cred = newnfs_getcred();
newnfs_copycred(&dp->nfsdl_cred, cred);
- (void) nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p);
+ (void) nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0);
NFSFREECRED(cred);
triedrecall = 1;
NFSLOCKCLSTATE();
diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c
index 3be823f..4e844f1 100644
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@@ -670,13 +670,13 @@ nfs_close(struct vop_close_args *ap)
* traditional vnode locking implemented for Vnode Ops.
*/
int cm = newnfs_commit_on_close ? 1 : 0;
- error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm);
+ error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0);
/* np->n_flag &= ~NMODIFIED; */
} else if (NFS_ISV4(vp)) {
if (nfscl_mustflush(vp)) {
int cm = newnfs_commit_on_close ? 1 : 0;
error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td,
- cm);
+ cm, 0);
/*
* as above w.r.t races when clearing
* NMODIFIED.
@@ -1306,7 +1306,7 @@ ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
*/
int
ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
- int *iomode, int *must_commit)
+ int *iomode, int *must_commit, int called_from_strategy)
{
struct nfsvattr nfsva;
int error = 0, attrflag, ret;
@@ -1315,7 +1315,7 @@ ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
*must_commit = 0;
error = nfsrpc_write(vp, uiop, iomode, verf, cred,
- uiop->uio_td, &nfsva, &attrflag, NULL);
+ uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy);
NFSLOCKMNT(nmp);
if (!error && NFSHASWRITEVERF(nmp) &&
NFSBCMP(verf, nmp->nm_verf, NFSX_VERF)) {
@@ -2473,7 +2473,7 @@ nfs_strategy(struct vop_strategy_args *ap)
*/
if ((bp->b_flags & B_ASYNC) == 0 ||
ncl_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread))
- (void)ncl_doio(ap->a_vp, bp, cr, curthread);
+ (void) ncl_doio(ap->a_vp, bp, cr, curthread, 1);
return (0);
}
@@ -2484,17 +2484,20 @@ nfs_strategy(struct vop_strategy_args *ap)
static int
nfs_fsync(struct vop_fsync_args *ap)
{
- return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1));
+ return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1, 0));
}
/*
* Flush all the blocks associated with a vnode.
* Walk through the buffer pool and push any dirty pages
* associated with the vnode.
+ * If the called_from_renewthread argument is TRUE, it has been called
+ * from the NFSv4 renew thread and, as such, cannot block indefinitely
+ * waiting for a buffer write to complete.
*/
int
ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td,
- int commit)
+ int commit, int called_from_renewthread)
{
struct nfsnode *np = VTONFS(vp);
struct buf *bp;
@@ -2513,6 +2516,8 @@ ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td,
struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
int bvecsize = 0, bveccount;
+ if (called_from_renewthread != 0)
+ slptimeo = hz;
if (nmp->nm_flag & NFSMNT_INT)
slpflag = NFS_PCATCH;
if (!commit)
@@ -2708,6 +2713,14 @@ loop:
error = 0;
goto loop;
}
+ if (called_from_renewthread != 0) {
+ /*
+ * Return EIO so the flush will be retried
+ * later.
+ */
+ error = EIO;
+ goto done;
+ }
if (newnfs_sigintr(nmp, td)) {
error = EINTR;
goto done;
@@ -2747,6 +2760,14 @@ loop:
error = bufobj_wwait(bo, slpflag, slptimeo);
if (error) {
BO_UNLOCK(bo);
+ if (called_from_renewthread != 0) {
+ /*
+ * Return EIO so that the flush will be
+ * retried later.
+ */
+ error = EIO;
+ goto done;
+ }
error = newnfs_sigintr(nmp, td);
if (error)
goto done;
@@ -2838,7 +2859,7 @@ nfs_advlock(struct vop_advlock_args *ap)
*/
if (ap->a_op == F_UNLCK &&
nfscl_checkwritelocked(vp, ap->a_fl, cred, td))
- (void) ncl_flush(vp, MNT_WAIT, cred, td, 1);
+ (void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0);
/*
* Loop around doing the lock op, while a blocking lock
OpenPOWER on IntegriCloud